def test_quartile_numeric_object(self): # Test numeric values within an object typed column. df = pandas.DataFrame({'a': [1.0] * 9 + [2.0]}, dtype='object') expected = df.ix[9:, :] expected.loc[:, 'meas_flag'] = ['extreme'] expected.loc[:, 'meas_method'] = ['quartile'] result = outliers(df, 'a', 'quartile') self.assertEqual(expected.to_string(), result.to_string()) self.assertTrue(all(['object', 'object', 'object'] == result.dtypes))
def test_quartile_none(self): # Test a data frame containing a missing value. df = pandas.DataFrame({'a': [1.0] * 8 + [None, 2.0]}) expected = df.ix[9:, :] expected.loc[:, 'meas_flag'] = ['extreme'] expected.loc[:, 'meas_method'] = ['quartile'] self.assertEqual( expected.to_string(), outliers(df, 'a', 'quartile').to_string())
def test_quartile_empty(self): # Test calling quartile on an empty DataFrame. df = ec.query_measurements_original({'company': 'Company-1', 'site': 'Site-1', 'device_group': '1000', 'tester': 'Station-NONE'}, # No data for Station-NONE ts[0], ts[14] + timedelta(seconds=0.5)).toPandas() self.assertEqual(0, outliers(df, 'meas_value', 'quartile').shape[0])
def test_quartile(self): df = ec.query_measurements_original( { 'company': 'Company-1', 'site': 'Site-1', 'device_group': '1000', 'tester': 'Station-2'}, ts[0], ts[14] + timedelta( seconds=0.5)).toPandas() expected = df.ix[10:14, :] expected.loc[ :, 'meas_flag'] = [ 'mild', 'extreme', 'mild', 'extreme'] expected.loc[:, 'meas_method'] = ['quartile'] self.assertEqual(expected.to_string(), outliers(df, 'meas_value', 'quartile').to_string())
def test_invalid_method(self): df = pandas.DataFrame({'a': [1.0]}) with self.assertRaises(ValueError): outliers(df, 'meas_value', 'INVALID_METHOD')
def test_quartile_invalid_column(self): df = pandas.DataFrame({'a': [1.0]}) with self.assertRaises(KeyError): outliers(df, 'INVALID_COLUMN_NAME', 'quartile')
def test_quartile_string(self): df = pandas.DataFrame({'a': [1.0] * 8 + ['A STRING']}) self.assertEqual(0, outliers(df, 'a', 'quartile').shape[0])