Python Dataframe 예제들, dataFrame_pd.Dataframe Python 예제들

예제 #1

0

파일 보기

파일: plots_toolbox.py 프로젝트: puttak/gasifier_analysis

 def getDataframe(self):
     
     if self.ignore_nan:
         working_data = self.data.finite_set(self.y_col, cols = [self.x_col])
     else:
         working_data = self.data
     
     grouped_data = [working_data[self.y_col][i-self.sample_size:i].values for i in range(0,len(working_data.index), self.sample_size)[1:]]
     grouped_x = [working_data[self.x_col][i].to_datetime() for i in range(0, len(working_data.index), self.sample_size)[1:]] # midpoints
     
     #drop the last group if it is too small -- may want to make this optional
     if len(grouped_data) > 0:
         if len(grouped_data[-1]) != self.sample_size:   #this needs to be a while statement to pop the grouped data down to the right size
             grouped_data.pop()
         self.output_data = df.Dataframe()
         p = 0
         
         for group, x in zip(grouped_data,grouped_x):
             #print type(group)
             #print type(x)
             self.output_data[x] = group
             p += 1
         
         return self.output_data
     else:
         return df.Dataframe()

예제 #2

0

파일 보기

    def testUpdateLoad(self):
        """The dataframe should successfully update existing records in an existing table"""
        # set up the database -- need to make a "safe" user for this, talk to Adrian about this
        os.system("mysql -h 192.168.10.20 -u chris -p < dataframe_pd_setup.sql")
        
        # go get the data from the database
        data = df.Dataframe()
        interface = SQL.db_interface(host = "192.168.10.20", user = "******", passwd = "cmp87ud01")
        interface.connect()
        q = SQL.use_Query("gas_unit_test")
        interface.query(q)
        data.SQL_load_data(interface, table = "dataframe_pd_test_table")
        
        #create a new dataframe with one line
        
        data2 = df.Dataframe({'A':[1.2],'B':[1.0],'C':[2.0],'cheetah':[3.0]})
        
        data2.SQL_upload_data(interface, table = "dataframe_upload_test_table", index_col = 'A')
        
        #pull the data back into data
        data.SQL_load_data(interface, table = "dataframe_upload_test_table")

        #make sure the first row is equal
        self.assertEqual(1.2, data.iloc[0][0])
        self.assertEqual(1.0, data.iloc[0][1])
        self.assertEqual(2.0, data.iloc[0][2])
        self.assertEqual(3.0, data.iloc[0][3])

예제 #3

0

파일 보기

    def testInsertLoadNan(self):
        """The dataframe should successfully add new records to an existing table when NaN values are in the table"""
        # set up the database -- need to make a "safe" user for this, talk to Adrian about this
        os.system("mysql -h 192.168.10.20 -u chris -p < dataframe_pd_setup.sql")
        
        # go get the data from the database
        data = df.Dataframe()
        interface = SQL.db_interface(host = "192.168.10.20", user = "******", passwd = "cmp87ud01")
        interface.connect()
        q = SQL.use_Query("gas_unit_test")
        interface.query(q)
        data.SQL_load_data(interface, table = "dataframe_pd_test_table")

        #replace a couple values with nan
        data.iloc[0][2] = np.nan
        data.iloc[1][1] = np.nan

        #upload the data to the other table
        data.SQL_upload_data(interface, table = "dataframe_upload_test_table")
        #download the data into a new dataframe
        data2 = df.Dataframe()
        data2.SQL_load_data(interface, table = "dataframe_upload_test_table")
         
        
        self.assertTrue(np.isnan(data2.iloc[0][2]))
        self.assertTrue(np.isnan(data2.iloc[1][1]))

예제 #4

0

파일 보기

 def testCorrectGlossaryReplace(self):
     """Correctly replace column names in the glossary"""
     A = np.array([1.2,3.1,1.1])
     B = np.array([4.6,7.0,7.3])
     C = np.array([3.6,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     data.glossary_replace({'cheetah':'D'})
     data2 = df.Dataframe({'A':A, 'B':B,'C':C,'D':cheetah})
     self.assertTrue(np.all((data==data2).values))

예제 #5

0

파일 보기

파일: plots_toolbox.py 프로젝트: puttak/gasifier_analysis

    def plot(self, chart2 = None):
        self._calcXBarPoints()
        self._calcControlLimits()
        try:
#            print self.X_bar
            data = df.Dataframe({'x-bar':self.X_bar, chart2:getattr(self,chart2), 'x':self.ord_pts})
            #print data
            self.chart1_plot = XYPlot(data = data, x_label = self.x_label, y_label = 'x-bar', X_col = 'x', Y_cols = ['x-bar'], auto_scale = True, subplot = True, subplot_num = 211, marker = 'o')     
            self.chart1_plot.plot()
            
            if chart2 is not None:
                self.chart2_plot = XYPlot(data = data, x_label = self.x_label, y_label = chart2, X_col = 'x', Y_cols = [chart2], auto_scale = True, subplot = True, subplot_num = 212, marker = 'o')
                self.chart2_plot.plot()


            self.colors = {'UCL':'red', 'LCL':'red', 'target':'blue'}
            for suf in ['UCL', 'LCL', 'target']:
                
                for ch_num in [1,2]:
                    plotnum = 210 + ch_num
                    plt.subplot(plotnum)
                    val = getattr(self, "chart%s_%s" % (ch_num,suf))
                    if val is not None:
                        if suf is not 'LCL' or ch_num == 1 or val > 0:
                            plt.hlines(getattr(self, "chart%s_%s" % (ch_num,suf)), min(self.ord_pts), max(self.ord_pts), colors = self.colors[suf])
                            if suf == 'LCL':
                                if getattr(self, "chart%s_LCL" % ch_num) < getattr(self, "chart%s_plot" % ch_num).y_min or getattr(self, "chart%s_plot" % ch_num).y_min is not np.nan:
                                    getattr(self, "chart%s_plot" % ch_num).rescale(ymin=getattr(self,"chart%s_LCL" % ch_num)-0.05*(getattr(self, "chart%s_UCL" % ch_num) - getattr(self, "chart%s_LCL" % ch_num)), padded = False)
                                
                            elif suf == 'UCL':
                                if getattr(self, "chart%s_UCL" % ch_num) > getattr(self, "chart%s_plot" % ch_num).y_max or getattr(self, "chart%s_plot" % ch_num).y_max is not np.nan:
                                    getattr(self, "chart%s_plot" % ch_num).rescale(ymax=getattr(self,"chart%s_UCL" % ch_num)+0.05*(getattr(self, "chart%s_UCL" % ch_num) - getattr(self, "chart%s_LCL" % ch_num)), padded = False)

        except Exception, e:
            print "Warning: Could not plot control chart due to empty dataframe: %s" % e

예제 #6

0

파일 보기

파일: plots_toolbox.py 프로젝트: puttak/gasifier_analysis

    def __init__(self, data = None, x_label = None, y_label = None, X_col = None, Y_cols = None, auto_scale = True, legend = True, marker = '-', y_min = None, y_max = None, **kwargs):
        Plot.__init__(self, **kwargs)
        #We should have taken care of whether this is a subplot already...just need to put the plotting machinery in place

        if data is None:
            #data must be a dataframe
            data = df.Dataframe()
        if not isinstance(data, df.Dataframe):
            raise Exception, "XY chart values MUST be in the form of a dataframe"            

        self.data = data

        #More error checking may be appropriate later

        self.X_col = X_col
        self.Y_cols = Y_cols

        if x_label is None and X_col is not None:
            x_label = X_col
        if y_label is None and Y_cols is not None:
            y_label = Y_cols[0]

        self.x_label = x_label
        self.y_label = y_label

        self.auto_scale = auto_scale
        self.legend = legend
        self.marker = marker

        self.y_min = y_min
        self.y_max = y_max

예제 #7

0

파일 보기

 def testNonexistentColumn(self):
     """Passing a column name that isn't there should raise an error"""
     A = np.array([1.2,3.1,1.1])
     B = np.array([4.6,7.0,7.3])
     C = np.array([3.6,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     self.assertRaises(df.BadGlossaryTagError, data.glossary_replace, {'turtle':'BB'})

예제 #8

0

파일 보기

 def testBadAction(self):
     """Raise an error if the action is not in the action list"""
     A = np.array([1.2,3.1,1.1])
     B = np.array([4.6,7.0,7.3])
     C = np.array([3.6,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     self.assertRaises(df.BadArgumentError, data.filter_vals, 'cheetah', 2.0, 'middle')

예제 #9

0

파일 보기

 def testNonexistentColumn(self):
     """Passing a column name that isn't there should raise an error"""
     A = np.array([1.2,3.1,None])
     B = np.array([4.6,7.0,7.3])
     C = np.array([3.6,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     self.assertRaises(df.NoColumnError, data.replace_None_w_nan, 'frogs')

예제 #10

0

파일 보기

 def testCorrectOutput(self):
     """The dataframe should return a (value,unit) tuple correctly"""
     units = {'A':'m/s', 'B':'kg/s', 'C':'Pa', 'cheetah':'s'}
     data = df.Dataframe([Data.a, Data.b, Data.c], units_dict = units)
     vu = data.val_units('C')
     C = np.array([3.6,8.0,2.5])
     self.assertTrue((vu[0]==C).all())
     self.assertEqual(vu[1], 'Pa')

예제 #11

0

파일 보기

 def testNonexistentColumn(self):
     """Passing a column name that isn't there should raise an error"""
     A = np.array([1.2,3.1,np.nan])
     B = np.array([4.6,7.0,7.3])
     C = np.array([np.nan,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     self.assertRaises(df.NoColumnError, data.finite_vals, 'foo')

예제 #12

0

파일 보기

 def testNonexistentColumn(self):
     """Passing a column name that isn't there should raise an error"""
     A = np.array([1.2,3.1,1.1])
     B = np.array([4.6,7.0,7.3])
     C = np.array([3.6,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     self.assertRaises(df.dfException, data.set_units,{'A':'m/s', 'frog':'m'})

예제 #13

0

파일 보기

 def testDuplicateColumn(self):
     """Passing a column name as a replace value that already exists should raise an error"""
     A = np.array([1.2,3.1,1.1])
     B = np.array([4.6,7.0,7.3])
     C = np.array([3.6,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     #self.assertRaises(df.BadGlossaryTagError, data.glossary_replace, {'cheetah':'A'})
     self.assertEqual(1,0)

예제 #14

0

파일 보기

    def testNonexistentColumn(self):
        """Passing a column name that isn't there should raise an error"""
        A = np.array([1.2,3.1,1.1])
        B = np.array([4.6,7.0,7.3])
        C = np.array([3.6,8.0,2.5])
        cheetah = np.array([2.6,9.2,1.1])

        data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
        self.assertRaises(df.dfException, data.write_csv, "test.txt", "new", 'ninja')

예제 #15

0

파일 보기

 def testCorrectUnitsAdd(self):
     """Unit dictionary should be added to the dataframe correctly"""
     A = np.array([1.2,3.1,1.1])
     B = np.array([4.6,7.0,7.3])
     C = np.array([3.6,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     data.set_units({'A':'m/s', 'B':'m'})
     self.assertEqual(data.units, {'A':'m/s', 'B':'m'})

예제 #16

0

파일 보기

    def testBadUnitType(self):
        """Passing a non-string unit should raise an error"""
        A = np.array([1.2,3.1,1.1])
        B = np.array([4.6,7.0,7.3])
        C = np.array([3.6,8.0,2.5])
        cheetah = np.array([2.6,9.2,1.1])
        data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
        self.assertRaises(df.dfException, data.set_units,{'A':'m/s', 'B':[2.3,5.3]})
	self.assertRaises(df.dfException, data.set_units,{'A':'m/s', 'B':1.2})

예제 #17

0

파일 보기

 def testCorrectReplaceAll(self):
     """Correctly replace all NaN values in dataframe with Nones"""
     A = np.array([1.2,3.1,None])
     B = np.array([4.6,7.0,7.3])
     C = np.array([None,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     data.replace_None_w_nan_all()
     self.assertTrue(np.isnan(data['A'][2]))
     self.assertTrue(np.isnan(data['C'][0]))

예제 #18

0

파일 보기

    def testCorrectSQLLoad(self):
        """The dataframe should correctly be loaded from an SQL table"""
        data = df.Dataframe()
        interface = SQL.db_interface(host = "192.168.10.20", user = "******", passwd = "cmp87ud01")
        interface.connect()
        q = SQL.use_Query("gas_unit_test")
        interface.query(q)

        data.SQL_load_data(interface, table = "dataframe_pd_test_table")
        self.assertTrue(np.all((data==Data.df1).values))

예제 #19

0

파일 보기

    def testBadType(self):
        """Raise an error on bad unit types"""
        A = np.array([1.2,3.1,1.1])
        B = np.array([4.6,7.0,7.3])
        C = np.array([3.6,8.0,2.5])
        cheetah = np.array([2.6,9.2,1.1])
        data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
        self.assertRaises(df.BadArgumentError, data.filter_vals, 'cheetah', 'meep', 'high')
	self.assertRaises(df.BadArgumentError, data.filter_vals, 'cheetah', None, 'high')
	self.assertRaises(df.BadArgumentError, data.filter_vals, 'cheetah', [1.2, 3.4], 'high')

예제 #20

0

파일 보기

 def testUnitConversion(self):
     """The dataframe should return a (value,unit) tuple in the specified units"""
     units = {'A':'m/s', 'B':'kg/s', 'C':'Pa', 'cheetah':'s'}
     data = df.Dataframe([Data.a, Data.b, Data.c], units_dict = units)
     vu = data.val_units('A', 'ft/hr')
     A = np.array([1.2,3.1,1.1])
     conv = uc.UnitConverter()
     Ac = conv.convert_units(A, 'm/s', 'ft/hr')
     self.assertTrue((vu[0]==Ac).all())
     self.assertEqual(vu[1], 'ft/hr')

예제 #21

0

파일 보기

    def testBadFilename(self):
        """The dataframe should raise an error when a bad filename type is used"""
        A = np.array([1.2,3.1,1.1])
        B = np.array([4.6,7.0,7.3])
        C = np.array([3.6,8.0,2.5])
        cheetah = np.array([2.6,9.2,1.1])

        data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})

        self.assertRaises(df.dfException, data.write_csv, 24.0)
        self.assertRaises(df.dfException, data.write_csv, None)

예제 #22

0

파일 보기

    def testBadGlossaryType(self):
        """Raise error on bad glossary type"""
        A = np.array([1.2,3.1,1.1])
        B = np.array([4.6,7.0,7.3])
        C = np.array([3.6,8.0,2.5])
        cheetah = np.array([2.6,9.2,1.1])

        data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
        self.assertRaises(df.BadGlossaryTypeError, data.glossary_replace, 2.0)
        self.assertRaises(df.BadGlossaryTypeError, data.glossary_replace, [2.3,4.2])
        self.assertRaises(df.BadGlossaryTypeError, data.glossary_replace, "fetid")

예제 #23

0

파일 보기

 def testCorrectFilterLow(self):
     """Correctly filter values in low mode"""
     A = np.array([1.2,3.1,1.1])
     B = np.array([4.6,7.0,7.3])
     C = np.array([3.6,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     filtered = data.filter_vals('A', 3.0, 'low')
     
     self.assertTrue(np.isnan(filtered[0]))
     self.assertTrue(np.isnan(filtered[2]))
     self.assertEqual(filtered[1], 3.1)

예제 #24

0

파일 보기

파일: plots_toolbox.py 프로젝트: puttak/gasifier_analysis

    def __init__(self, data = None, label = None, data_col = None, nbins = 5, useOffset = False, **kwargs):
        Plot.__init__(self,**kwargs)
        if data is None:
            data = df.Dataframe()
        if not isinstance(data, df.Dataframe):
            raise Exception, "data must be a Dataframe!"

        self.data = data
        self.label = label
        self.nbins = nbins
        self.data_col = data_col
        self.useOffset = useOffset

예제 #25

0

파일 보기

 def testCorrectFiniteValCol(self):
     """Correctly return a column of only finite values from a dataframe"""
     A = np.array([1.2,3.1,np.nan])
     B = np.array([4.6,7.0,7.3])
     C = np.array([np.nan,8.0,2.5])
     cheetah = np.array([2.6,9.2,1.1])
     data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
     f = data.finite_vals('A')
     
     self.assertEqual(len(f), 2)
     self.assertEqual(f[0], 1.2)
     self.assertEqual(f[1], 3.1)

예제 #26

0

파일 보기

    def testCorrectInitialization(self):
        """The data in the data frame should match the data in the array_dict"""
        
        A = np.array([1.2,3.1,1.1])
        B = np.array([4.6,7.0,7.3])
        C = np.array([3.6,8.0,2.5])
        cheetah = np.array([2.6,9.2,1.1])

        data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah})
        
        #need a way to compare one dataframe to another
        self.assertTrue(np.all((data==Data.df1).values))

예제 #27

0

파일 보기

파일: plots_toolbox.py 프로젝트: puttak/gasifier_analysis

    def __init__(self, data = None, y_col = None, x_col = None, sample_size = 1, ignore_nan = False):
        #Data is in the form of a dataframe
        if data is None:
            data = df.Dataframe()

        if not isinstance(data, df.Dataframe):
            raise Exception, "data must be in the form of a Dataframe"

        self.data = data
        self.y_col = y_col
        self.x_col = x_col
        self.sample_size = sample_size
        self.ignore_nan = ignore_nan

예제 #28

0

파일 보기

    def testSQLError(self):
        """The dataframe should raise an error when the SQL library sends one back"""
        data = df.Dataframe()
        #error - no database selected
        interface = SQL.db_interface(host = "192.168.10.20", user = "******", passwd = "cmp87ud01")
        interface.connect()
        self.assertRaises(df.dfSQLError, data.SQL_load_data, interface)

        q = SQL.use_Query("gas_unit_test")
        interface.query(q)

        #error - bad table
        self.assertRaises(df.dfSQLError, data.SQL_load_data, interface, "moat")        

        #error - bad condition
        self.assertRaises(df.dfSQLError, data.SQL_load_data, interface, "dataframe_pd_test_table", ["denver>2.0"])

예제 #29

0

파일 보기

파일: plots_toolbox.py 프로젝트: puttak/gasifier_analysis

    def __init__(self, data = None, x_labels = None, y_labels = None, plot_cols = None, h_plots = 1, auto_scale = True, markers = None, **kwargs):
        """Initialize the XY Plot.  data must be a dataframe.  plot_cols is a list of tuples of string/list pairs, with each string corresponding to an X and each list the corresponding Y's to plot"""

        Plot.__init__(self,**kwargs)
        if data is None:
            #Data must be a dataframe
            data = df.Dataframe()
        if not isinstance(data, df.Dataframe):
            raise Exception, "XY chart values MUST be a dataframe"
        self.data = data


        #plot_cols is a list of (string, []), representing (x,Y)
        if plot_cols is None:
            self.plot_cols = []
        self.plot_cols = plot_cols
        
        self.h_plots = h_plots			#For plots with multiple subplots, number of horizontal subplots


        if x_labels is None:
            x_labels = []
            for row in plot_cols:
                x_labels.append(row[0])
            
        if y_labels is None:
            y_labels = []
            for row in plot_cols:
                y_labels.append(row[1][0])

        if not isinstance(x_labels, list):
            raise Exception, "x_labels must be a list of labels, equal to the length of data"
        if not isinstance(y_labels, list):
            raise Exception, "y_labels must be a list of labels, equal to the length of data"
        
        self.markers = markers

       
        self.x_labels = x_labels                
        self.y_labels = y_labels

예제 #30

0

파일 보기

 def testNonexistentColumn(self):
     """Passing a column name that isn't there should raise an error"""
     units = {'A':'m/s', 'B':'kg/s', 'C':'Pa', 'cheetah':'s'}
     data = df.Dataframe([Data.a, Data.b, Data.c], units_dict = units)
     self.assertRaises(df.NoColumnError, data.val_units, 'mouse')