def getDataframe(self): if self.ignore_nan: working_data = self.data.finite_set(self.y_col, cols = [self.x_col]) else: working_data = self.data grouped_data = [working_data[self.y_col][i-self.sample_size:i].values for i in range(0,len(working_data.index), self.sample_size)[1:]] grouped_x = [working_data[self.x_col][i].to_datetime() for i in range(0, len(working_data.index), self.sample_size)[1:]] # midpoints #drop the last group if it is too small -- may want to make this optional if len(grouped_data) > 0: if len(grouped_data[-1]) != self.sample_size: #this needs to be a while statement to pop the grouped data down to the right size grouped_data.pop() self.output_data = df.Dataframe() p = 0 for group, x in zip(grouped_data,grouped_x): #print type(group) #print type(x) self.output_data[x] = group p += 1 return self.output_data else: return df.Dataframe()
def testUpdateLoad(self): """The dataframe should successfully update existing records in an existing table""" # set up the database -- need to make a "safe" user for this, talk to Adrian about this os.system("mysql -h 192.168.10.20 -u chris -p < dataframe_pd_setup.sql") # go get the data from the database data = df.Dataframe() interface = SQL.db_interface(host = "192.168.10.20", user = "******", passwd = "cmp87ud01") interface.connect() q = SQL.use_Query("gas_unit_test") interface.query(q) data.SQL_load_data(interface, table = "dataframe_pd_test_table") #create a new dataframe with one line data2 = df.Dataframe({'A':[1.2],'B':[1.0],'C':[2.0],'cheetah':[3.0]}) data2.SQL_upload_data(interface, table = "dataframe_upload_test_table", index_col = 'A') #pull the data back into data data.SQL_load_data(interface, table = "dataframe_upload_test_table") #make sure the first row is equal self.assertEqual(1.2, data.iloc[0][0]) self.assertEqual(1.0, data.iloc[0][1]) self.assertEqual(2.0, data.iloc[0][2]) self.assertEqual(3.0, data.iloc[0][3])
def testInsertLoadNan(self): """The dataframe should successfully add new records to an existing table when NaN values are in the table""" # set up the database -- need to make a "safe" user for this, talk to Adrian about this os.system("mysql -h 192.168.10.20 -u chris -p < dataframe_pd_setup.sql") # go get the data from the database data = df.Dataframe() interface = SQL.db_interface(host = "192.168.10.20", user = "******", passwd = "cmp87ud01") interface.connect() q = SQL.use_Query("gas_unit_test") interface.query(q) data.SQL_load_data(interface, table = "dataframe_pd_test_table") #replace a couple values with nan data.iloc[0][2] = np.nan data.iloc[1][1] = np.nan #upload the data to the other table data.SQL_upload_data(interface, table = "dataframe_upload_test_table") #download the data into a new dataframe data2 = df.Dataframe() data2.SQL_load_data(interface, table = "dataframe_upload_test_table") self.assertTrue(np.isnan(data2.iloc[0][2])) self.assertTrue(np.isnan(data2.iloc[1][1]))
def testCorrectGlossaryReplace(self): """Correctly replace column names in the glossary""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) data.glossary_replace({'cheetah':'D'}) data2 = df.Dataframe({'A':A, 'B':B,'C':C,'D':cheetah}) self.assertTrue(np.all((data==data2).values))
def plot(self, chart2 = None): self._calcXBarPoints() self._calcControlLimits() try: # print self.X_bar data = df.Dataframe({'x-bar':self.X_bar, chart2:getattr(self,chart2), 'x':self.ord_pts}) #print data self.chart1_plot = XYPlot(data = data, x_label = self.x_label, y_label = 'x-bar', X_col = 'x', Y_cols = ['x-bar'], auto_scale = True, subplot = True, subplot_num = 211, marker = 'o') self.chart1_plot.plot() if chart2 is not None: self.chart2_plot = XYPlot(data = data, x_label = self.x_label, y_label = chart2, X_col = 'x', Y_cols = [chart2], auto_scale = True, subplot = True, subplot_num = 212, marker = 'o') self.chart2_plot.plot() self.colors = {'UCL':'red', 'LCL':'red', 'target':'blue'} for suf in ['UCL', 'LCL', 'target']: for ch_num in [1,2]: plotnum = 210 + ch_num plt.subplot(plotnum) val = getattr(self, "chart%s_%s" % (ch_num,suf)) if val is not None: if suf is not 'LCL' or ch_num == 1 or val > 0: plt.hlines(getattr(self, "chart%s_%s" % (ch_num,suf)), min(self.ord_pts), max(self.ord_pts), colors = self.colors[suf]) if suf == 'LCL': if getattr(self, "chart%s_LCL" % ch_num) < getattr(self, "chart%s_plot" % ch_num).y_min or getattr(self, "chart%s_plot" % ch_num).y_min is not np.nan: getattr(self, "chart%s_plot" % ch_num).rescale(ymin=getattr(self,"chart%s_LCL" % ch_num)-0.05*(getattr(self, "chart%s_UCL" % ch_num) - getattr(self, "chart%s_LCL" % ch_num)), padded = False) elif suf == 'UCL': if getattr(self, "chart%s_UCL" % ch_num) > getattr(self, "chart%s_plot" % ch_num).y_max or getattr(self, "chart%s_plot" % ch_num).y_max is not np.nan: getattr(self, "chart%s_plot" % ch_num).rescale(ymax=getattr(self,"chart%s_UCL" % ch_num)+0.05*(getattr(self, "chart%s_UCL" % ch_num) - getattr(self, "chart%s_LCL" % ch_num)), padded = False) except Exception, e: print "Warning: Could not plot control chart due to empty dataframe: %s" % e
def __init__(self, data = None, x_label = None, y_label = None, X_col = None, Y_cols = None, auto_scale = True, legend = True, marker = '-', y_min = None, y_max = None, **kwargs): Plot.__init__(self, **kwargs) #We should have taken care of whether this is a subplot already...just need to put the plotting machinery in place if data is None: #data must be a dataframe data = df.Dataframe() if not isinstance(data, df.Dataframe): raise Exception, "XY chart values MUST be in the form of a dataframe" self.data = data #More error checking may be appropriate later self.X_col = X_col self.Y_cols = Y_cols if x_label is None and X_col is not None: x_label = X_col if y_label is None and Y_cols is not None: y_label = Y_cols[0] self.x_label = x_label self.y_label = y_label self.auto_scale = auto_scale self.legend = legend self.marker = marker self.y_min = y_min self.y_max = y_max
def testNonexistentColumn(self): """Passing a column name that isn't there should raise an error""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.BadGlossaryTagError, data.glossary_replace, {'turtle':'BB'})
def testBadAction(self): """Raise an error if the action is not in the action list""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.BadArgumentError, data.filter_vals, 'cheetah', 2.0, 'middle')
def testNonexistentColumn(self): """Passing a column name that isn't there should raise an error""" A = np.array([1.2,3.1,None]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.NoColumnError, data.replace_None_w_nan, 'frogs')
def testCorrectOutput(self): """The dataframe should return a (value,unit) tuple correctly""" units = {'A':'m/s', 'B':'kg/s', 'C':'Pa', 'cheetah':'s'} data = df.Dataframe([Data.a, Data.b, Data.c], units_dict = units) vu = data.val_units('C') C = np.array([3.6,8.0,2.5]) self.assertTrue((vu[0]==C).all()) self.assertEqual(vu[1], 'Pa')
def testNonexistentColumn(self): """Passing a column name that isn't there should raise an error""" A = np.array([1.2,3.1,np.nan]) B = np.array([4.6,7.0,7.3]) C = np.array([np.nan,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.NoColumnError, data.finite_vals, 'foo')
def testNonexistentColumn(self): """Passing a column name that isn't there should raise an error""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.dfException, data.set_units,{'A':'m/s', 'frog':'m'})
def testDuplicateColumn(self): """Passing a column name as a replace value that already exists should raise an error""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) #self.assertRaises(df.BadGlossaryTagError, data.glossary_replace, {'cheetah':'A'}) self.assertEqual(1,0)
def testNonexistentColumn(self): """Passing a column name that isn't there should raise an error""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.dfException, data.write_csv, "test.txt", "new", 'ninja')
def testCorrectUnitsAdd(self): """Unit dictionary should be added to the dataframe correctly""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) data.set_units({'A':'m/s', 'B':'m'}) self.assertEqual(data.units, {'A':'m/s', 'B':'m'})
def testBadUnitType(self): """Passing a non-string unit should raise an error""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.dfException, data.set_units,{'A':'m/s', 'B':[2.3,5.3]}) self.assertRaises(df.dfException, data.set_units,{'A':'m/s', 'B':1.2})
def testCorrectReplaceAll(self): """Correctly replace all NaN values in dataframe with Nones""" A = np.array([1.2,3.1,None]) B = np.array([4.6,7.0,7.3]) C = np.array([None,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) data.replace_None_w_nan_all() self.assertTrue(np.isnan(data['A'][2])) self.assertTrue(np.isnan(data['C'][0]))
def testCorrectSQLLoad(self): """The dataframe should correctly be loaded from an SQL table""" data = df.Dataframe() interface = SQL.db_interface(host = "192.168.10.20", user = "******", passwd = "cmp87ud01") interface.connect() q = SQL.use_Query("gas_unit_test") interface.query(q) data.SQL_load_data(interface, table = "dataframe_pd_test_table") self.assertTrue(np.all((data==Data.df1).values))
def testBadType(self): """Raise an error on bad unit types""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.BadArgumentError, data.filter_vals, 'cheetah', 'meep', 'high') self.assertRaises(df.BadArgumentError, data.filter_vals, 'cheetah', None, 'high') self.assertRaises(df.BadArgumentError, data.filter_vals, 'cheetah', [1.2, 3.4], 'high')
def testUnitConversion(self): """The dataframe should return a (value,unit) tuple in the specified units""" units = {'A':'m/s', 'B':'kg/s', 'C':'Pa', 'cheetah':'s'} data = df.Dataframe([Data.a, Data.b, Data.c], units_dict = units) vu = data.val_units('A', 'ft/hr') A = np.array([1.2,3.1,1.1]) conv = uc.UnitConverter() Ac = conv.convert_units(A, 'm/s', 'ft/hr') self.assertTrue((vu[0]==Ac).all()) self.assertEqual(vu[1], 'ft/hr')
def testBadFilename(self): """The dataframe should raise an error when a bad filename type is used""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.dfException, data.write_csv, 24.0) self.assertRaises(df.dfException, data.write_csv, None)
def testBadGlossaryType(self): """Raise error on bad glossary type""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) self.assertRaises(df.BadGlossaryTypeError, data.glossary_replace, 2.0) self.assertRaises(df.BadGlossaryTypeError, data.glossary_replace, [2.3,4.2]) self.assertRaises(df.BadGlossaryTypeError, data.glossary_replace, "fetid")
def testCorrectFilterLow(self): """Correctly filter values in low mode""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) filtered = data.filter_vals('A', 3.0, 'low') self.assertTrue(np.isnan(filtered[0])) self.assertTrue(np.isnan(filtered[2])) self.assertEqual(filtered[1], 3.1)
def __init__(self, data = None, label = None, data_col = None, nbins = 5, useOffset = False, **kwargs): Plot.__init__(self,**kwargs) if data is None: data = df.Dataframe() if not isinstance(data, df.Dataframe): raise Exception, "data must be a Dataframe!" self.data = data self.label = label self.nbins = nbins self.data_col = data_col self.useOffset = useOffset
def testCorrectFiniteValCol(self): """Correctly return a column of only finite values from a dataframe""" A = np.array([1.2,3.1,np.nan]) B = np.array([4.6,7.0,7.3]) C = np.array([np.nan,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) f = data.finite_vals('A') self.assertEqual(len(f), 2) self.assertEqual(f[0], 1.2) self.assertEqual(f[1], 3.1)
def testCorrectInitialization(self): """The data in the data frame should match the data in the array_dict""" A = np.array([1.2,3.1,1.1]) B = np.array([4.6,7.0,7.3]) C = np.array([3.6,8.0,2.5]) cheetah = np.array([2.6,9.2,1.1]) data = df.Dataframe({'A':A,'B':B,'C':C,'cheetah':cheetah}) #need a way to compare one dataframe to another self.assertTrue(np.all((data==Data.df1).values))
def __init__(self, data = None, y_col = None, x_col = None, sample_size = 1, ignore_nan = False): #Data is in the form of a dataframe if data is None: data = df.Dataframe() if not isinstance(data, df.Dataframe): raise Exception, "data must be in the form of a Dataframe" self.data = data self.y_col = y_col self.x_col = x_col self.sample_size = sample_size self.ignore_nan = ignore_nan
def testSQLError(self): """The dataframe should raise an error when the SQL library sends one back""" data = df.Dataframe() #error - no database selected interface = SQL.db_interface(host = "192.168.10.20", user = "******", passwd = "cmp87ud01") interface.connect() self.assertRaises(df.dfSQLError, data.SQL_load_data, interface) q = SQL.use_Query("gas_unit_test") interface.query(q) #error - bad table self.assertRaises(df.dfSQLError, data.SQL_load_data, interface, "moat") #error - bad condition self.assertRaises(df.dfSQLError, data.SQL_load_data, interface, "dataframe_pd_test_table", ["denver>2.0"])
def __init__(self, data = None, x_labels = None, y_labels = None, plot_cols = None, h_plots = 1, auto_scale = True, markers = None, **kwargs): """Initialize the XY Plot. data must be a dataframe. plot_cols is a list of tuples of string/list pairs, with each string corresponding to an X and each list the corresponding Y's to plot""" Plot.__init__(self,**kwargs) if data is None: #Data must be a dataframe data = df.Dataframe() if not isinstance(data, df.Dataframe): raise Exception, "XY chart values MUST be a dataframe" self.data = data #plot_cols is a list of (string, []), representing (x,Y) if plot_cols is None: self.plot_cols = [] self.plot_cols = plot_cols self.h_plots = h_plots #For plots with multiple subplots, number of horizontal subplots if x_labels is None: x_labels = [] for row in plot_cols: x_labels.append(row[0]) if y_labels is None: y_labels = [] for row in plot_cols: y_labels.append(row[1][0]) if not isinstance(x_labels, list): raise Exception, "x_labels must be a list of labels, equal to the length of data" if not isinstance(y_labels, list): raise Exception, "y_labels must be a list of labels, equal to the length of data" self.markers = markers self.x_labels = x_labels self.y_labels = y_labels
def testNonexistentColumn(self): """Passing a column name that isn't there should raise an error""" units = {'A':'m/s', 'B':'kg/s', 'C':'Pa', 'cheetah':'s'} data = df.Dataframe([Data.a, Data.b, Data.c], units_dict = units) self.assertRaises(df.NoColumnError, data.val_units, 'mouse')