def test__create_extension(self, impute_mock, fit_mock, flatten_mock): """Tests that the create extension method returns correct parameters.""" # Setup data_navigator = MagicMock() modeler = Modeler(data_navigator) table = pd.DataFrame({ 'foreign': [0, 1, 0, 1, 0, 1], 'a': [0, 1, 0, 1, 0, 1], 'b': [1, 2, 3, 4, 5, 6] }) foreign = table[table.a == 0] table_info = ('foreign', 'child') impute_mock.return_value = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) fit_mock.return_value = 'fitted model' flatten_mock.return_value = pd.Series({ 'covariance__0__0': 0.0, 'covariance__1__0': 0.0, 'covariance__1__1': 1.4999999999999991, 'distribs__a__mean': 0.0, 'distribs__a__std': 0.001, 'distribs__b__mean': 3.0, 'distribs__b__std': 1.632993161855452 }) # Run result = modeler._create_extension(foreign, table, table_info) # Check assert result.equals(flatten_mock.return_value) df = pd.DataFrame({'a': [0, 1, 0, 1, 0, 1], 'b': [1, 2, 3, 4, 5, 6]}) df = df.loc[foreign.index] assert len(impute_mock.call_args_list) call_args = impute_mock.call_args_list[0] assert len(call_args[0]) == 1 assert call_args[0][0].equals(df) assert call_args[1] == {} fit_mock.assert_called_once_with(impute_mock.return_value) flatten_mock.assert_called_once_with('fitted model', 'child')
def test__create_extension_wrong_index_return_none(self): """_create_extension return None if transformed_child_table can't be indexed by df.""" # Setup data_navigator = MagicMock() modeler = Modeler(data_navigator) transformed_child_table = pd.DataFrame(np.eye(3), columns=['A', 'B', 'C']) table_info = ('', '') df = pd.DataFrame(index=range(5, 10)) # Run result = modeler._create_extension(df, transformed_child_table, table_info) # Check assert result is None
def test__create_extension_without_num_child_rows(self): """Modeler _craete_extension with no num_child_rows""" # Setup # Run modeler_mock = Mock() foreign = pd.DataFrame({'foreign_key': []}) transformed_child_table = pd.DataFrame() table_info = 'foreign_key', 'child_name' result = Modeler._create_extension(modeler_mock, foreign, transformed_child_table, table_info) # Asserts assert result is None
class ModelerTest(TestCase): def setUp(self): """Set up test fixtures, if any.""" dl = CSVDataLoader('tests/data/meta.json') self.dn = dl.load_data() self.dn.transform_data() self.modeler = Modeler(self.dn) def test__create_extension(self): """Tests that the create extension method returns correct parameters.""" # Setup child_table = self.dn.get_data('DEMO_ORDERS') user = child_table[child_table['CUSTOMER_ID'] == 50] expected = pd.Series([ 1.500000e+00, 0.000000e+00, -1.269991e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, -1.269991e+00, 0.000000e+00, 1.500000e+00, 0.000000e+00, 0.000000e+00, -7.401487e-17, 1.000000e+00, 7.000000e+00, 2.449490e+00, 4.000000e+00, 5.000000e+01, 5.000000e+01, 1.000000e-03, 5.000000e+01, 7.300000e+02, 2.380000e+03, 7.618545e+02, 1.806667e+03 ]) # Run parameters = self.modeler._create_extension(user, child_table) # Check assert expected.subtract(parameters).all() < 10E-3 def test__get_extensions(self): """_get_extensions returns a works for table with child""" # Setup pk = 'ORDER_ID' table = 'DEMO_ORDERS' children = self.dn.get_children(table) # Run result = self.modeler._get_extensions(pk, children, table) # Check assert len(result) == 1 assert result[0].shape == (10, 35) def test_get_extensions_no_children(self): """Tests that get extensions works for table with no children.""" # Setup pk = 'ORDER_ITEM_ID' table = 'DEMO_ORDER_ITEMS' children = self.dn.get_children(table) expected_result = [] # Run result = self.modeler._get_extensions(pk, children, table) # Check assert result == expected_result def test_CPA(self): """ """ # Setup self.modeler.model_database() table_name = 'DEMO_CUSTOMERS' # Run self.modeler.CPA(table_name) # Check for name, table in self.modeler.tables.items(): with self.subTest(table=name): raw_table = self.modeler.dn.tables[name].data # When we run Conditional Parameter Aggregation we add a key on Modeler.tables # for each table. It contains a not null pandas DataFrame with the computed # extension. assert isinstance(table, pd.DataFrame) assert raw_table.shape[0] == table.shape[0] assert (raw_table.index == table.index).all() assert all( [column in table.columns for column in raw_table.columns]) def test_flatten_model(self): """flatten_model returns a pandas.Series with all the params to recreate a model.""" # Setup for data in self.dn.transformed_data.values(): num_columns = data.shape[1] model = self.modeler.model() model.fit(data) # We generate it this way because RDT behavior is not fully deterministic # and transformed data can change between test runs. distribs_values = np.array( [[col_model.std, col_model.mean] for col_model in model.distribs.values()]).flatten() expected_result = pd.Series( list(model.covariance.flatten()) + list(distribs_values)) # Run result = self.modeler.flatten_model(model) # Check assert (result == expected_result).all() assert len(result) == num_columns**2 + (2 * num_columns) def test_impute_table(self): """impute_table fills all NaN values with 0 or the mean of values.""" # Setup table = pd.DataFrame([ { 'A': np.nan, 'B': 10., 'C': 20. }, { 'A': 5., 'B': np.nan, 'C': 20. }, { 'A': 5., 'B': 10., 'C': np.nan }, ]) expected_result = pd.DataFrame([ { 'A': 5., 'B': 10., 'C': 20. }, { 'A': 5., 'B': 10., 'C': 20. }, { 'A': 5., 'B': 10., 'C': 20. }, ]) # Run result = self.modeler.impute_table(table) # Check assert result.equals(expected_result) # No null values are left assert not result.isnull().all().all() # Averages are computed on every column for column in result: assert 0 not in result[column].values def test_model_database(self): """model_database computes conditions between tables and models them.""" # Run self.modeler.model_database() # Check assert self.modeler.tables.keys() == self.modeler.models.keys() def test_get_foreign_key(self): """get_foreign_key returns the foreign key from a metadata and a primary key.""" # Setup fields = self.modeler.dn.get_meta_data('DEMO_ORDERS')['fields'] primary = 'CUSTOMER_ID' expected_result = 'CUSTOMER_ID' # Run result = self.modeler.get_foreign_key(fields, primary) # Check assert result == expected_result