class TestDataSet(unittest.TestCase): def setUp(self): self.dataSet = DataManager() def tearDown(self): pass def test_read_and_write_data_frame(self): # TODO: Better way to test this print 'Read and Write Data Frame to HDF5 file' hdf5_file = '/tmp/test.h5' node_name = 'SomeNodeName' df = pandas.DataFrame(np.random.randn(100, 4), columns=list('ABCD')) self.dataSet.write_df_to_store(node_name, df, hdf5_file) self.assertTrue(self.dataSet.check_if_node_already_exists(node_name, hdf5_file)) read_df = self.dataSet.read_df_from_store(node_name, hdf5_file) assert_frame_equal(df.sort(axis=1), read_df.sort(axis=1), check_names=True) def test_fetch_data_from_vertica_to_df(self): # TODO: Better way to test this print "Get data from Vertica and save it to Data Frame" query = """SELECT client_id, position_id, visited_domains FROM train.base_training_data WHERE log_time_hour = '2015-04-14 00:00:00' LIMIT 100""" df = self.dataSet.fetch_from_vertica_to_df(query) self.assertEquals(len(df), 100)
def setUp(self): self.dataSet = DataManager()