def test_equals(self): """Test the DataManager.equals() and __eq__() methods""" df1 = create_random_dataframe() dm1 = DataManager(df1) dm2 = DataManager(df1) self.assertTrue(dm1.equals(dm2)) self.assertTrue(dm1 == dm2) df3 = create_random_dataframe() dm3 = DataManager(df3) self.assertFalse(dm1.equals(dm3)) self.assertFalse(dm1 == dm3) self.assertTrue(dm1 != dm3)
def test_deepcopy(self): """Test the deepcopy functionality of instances of the DataManager class""" df = create_random_dataframe() dm1 = DataManager(df) dm2 = copy.deepcopy(dm1) self.assertTrue(dm1.equals(dm2)) self.assertIsNot(dm1, dm2)
def test_get_data(self): """A simple test case of the DataManager.get_data method""" # create a DataManager from a DataFrame with random data data_df = create_random_dataframe() data_manager = DataManager(data_df) # get data from the DataManager results_df = data_manager.get_data() # test if the data manager returns a copy of the DataFrame pd.testing.assert_frame_equal(data_manager.get_data(), data_df) self.assertIsNot(results_df, data_df)
def test_to_hdf_path(self): """Test the DataManager.to_hdf() and DataManager.read_hdf() methods when saving with a file path""" key = '/dm' data = create_random_dataframe() data_origin = DataManager.create_data_origin(data, 'test') dm1 = DataManager(data, data_origin) dm1.to_hdf(self.temp_hdf_path, key) dm2 = DataManager.read_hdf(self.temp_hdf_path, key) self.assertTrue(dm1.equals(dm2))
def test_add_data_manager_simple(self): """Test a simple case of DataManager.add_data_manager() usage""" # create a data manager with random data df = create_random_dataframe(number_of_rows=50) dm = DataManager(df) # split the data up and add the results together dm1 = DataManager(df.iloc[:25]) dm2 = DataManager(df.iloc[25:]) dm_add_result = dm1.add_data_manager(dm2) # test that the original data set and the add result are equal self.assertTrue(dm.equals(dm_add_result))
def test_init_without_origin(self): """Test the initialization method of DataManager with no origin DataFrame""" data_df = create_random_dataframe() variable_names = data_df.keys() origin_data = [[var, np.nan] for var in variable_names] nan_origin_df = pd.DataFrame(data=origin_data, columns=['variable', 'origin']) data_manager_without_origin = DataManager(data_df) # test that data is being stored correctly pd.testing.assert_frame_equal(data_manager_without_origin.get_data(), data_df) pd.testing.assert_frame_equal(data_manager_without_origin.get_origin(), nan_origin_df) # test that DataFrames aren't the same instance self.assertFalse(data_manager_without_origin.get_data() is data_df) self.assertFalse(data_manager_without_origin.get_origin() is nan_origin_df)
def test_to_hdf_buf(self): """Test the DataManager.to_hdf() and DataManager.read_hdf() methods when saving with a pd.HDFStore instance""" key = '/dm/' data = create_random_dataframe() data_origin = DataManager.create_data_origin(data, 'test') dm1 = DataManager(data, data_origin) with pd.HDFStore(self.temp_hdf_path) as store: dm1.to_hdf(store, key) with pd.HDFStore(self.temp_hdf_path) as store: dm2 = DataManager.read_hdf(store, key) self.assertTrue(dm1.equals(dm2))