def test_allow_1_to_N_relationship(self): class C(r.ColumnTransformer): def get_column_mapping(self): return {"color": ["color1", "color2"], "price": "price"} t = C("color", "price") t.fit(create_df_all())
def test_transform_columns(self): t = r.ColumnsTransformer("color", "price") df = create_df_all() t._transform_columns = MagicMock(return_value=df) t.fit_transform(df) # TODO: Columns order does not really matter. t._transform_columns.assert_called_once_with(df, ["color", "price"])
def _test_transform_column_calls(self, col_format): t = r.ColumnTransformer("color", "price", col_format=col_format) t._transform_column = MagicMock(return_value=[1, 1, 1]) df = create_df_all() t.fit_transform(df) calls = [call(df, "price"), call(df, "color")] t._transform_column.assert_has_calls(calls, any_order=True)
def test_inverse_transform(self): """Inverse transform is impossible, just identity to avoid errors. """ t = r.SelectTransformer() df_in = create_df_all() df_out = t.inverse_transform(df_in) self.assertTrue(df_in.equals(df_out))
def test_select_one_column(self): """Check select works with one column. """ df = create_df_all() t = r.SelectTransformer() t.cols = ["color"] df = t.transform(df) self.assertEqual(list(df.columns), ["color"])
def test_drop_one_column(self): """Check drop works with one column. """ df = create_df_all() t = r.DropTransformer() t.cols = ["color"] df = t.transform(df) self.assertEqual(list(df.columns), ["price", "amount"])
def test_fit(self): sk = SklearnTransformerMock() sk.fit = MagicMock() t = r.SklearnFitOneWrapper(sk, "price", "amount") df_out = t.fit(create_df_all()) a = np.array([1.5, 1, 2.5, 2, 3.5, 3]).reshape(-1, 1) a_out = sk.fit.call_args_list[0][0][0] self.assertEqual(a.shape, a_out.shape) # flatten() is not really needed. self.assertListEqual(list(a.flatten()), list(a_out.flatten()))
def test_init(self): t = r.ReduceMemoryTransformer(verbose=True) df = create_df_all() t.fit_transform(df) dtypes_expected = { "color": pd.CategoricalDtype(["blue", "red"]), "amount": np.dtype("int8"), "price": np.dtype("float32"), } # This transformer modifies df in place! self.assertDictEqual(df.dtypes.to_dict(), dtypes_expected)
def test_inverse_transform(self): """Inverse transform is impossible, we cannot create columns. To avoid an error in the pipeline (non-existing inverse transform method), check that the inverse transform returns df. """ t = r.DropTransformer() df_in = create_df_all() df_out = t.inverse_transform(df_in) self.assertTrue(df_in.equals(df_out))
def test_fit_transform(self): df = create_df_all() t = r.CategoryEncoder("color") df_out = t.fit_transform(df) df_expected = pd.DataFrame({ "color": [1, 0, 1], "price": [1.5, 2.5, 3.5], "amount": [1, 2, 3] }) df_expected["color"] = df_expected["color"].astype("int8") self.assertTrue(df_out.equals(df_expected))
def test_transform_columns_inverse(self): t = r.ColumnsTransformer("color", "price", col_format="{}_out") df = create_df_all() df_out = df[["color", "price"]].copy() df_out.columns = ["color", "price"] t._inverse_transform_columns = MagicMock(return_value=df_out) t.fit(df) df = t.transform(df) t.inverse_transform(df) # TODO: Columns order does not matter. params = [df, ["color_out", "price_out"]] t._inverse_transform_columns.assert_called_once_with(*params)
def test_not_N_to_1_relationship(self): class C(r.ColumnTransformer): def get_column_mapping(self): # 2 input cols and 3 output cols, but still N to N. return { tuple(["color", "price"]): "color_price", "color": tuple(["color1", "color2"]) } t = C("color", "price") with self.assertRaisesRegex(ValueError, "Only 1 to N relationships.*"): t.fit(create_df_all())
def _test_transform_column_values(self, col_format): t = r.ColumnTransformer("color", "price", col_format=col_format) t._transform_column = MagicMock(return_value=[1, 1, 1]) df = create_df_all() df_out = t.fit_transform(df) df_expected = pd.DataFrame({ col_format.format("color"): [1, 1, 1], col_format.format("price"): [1, 1, 1], "amount": [1, 2, 3] }) self.assertTrue(df_out.equals(df_expected)) self.assertFalse(df.equals(df_expected)) # No changes on input df.
def test_transform(self): df = create_df_all() t = r.QueryTransformer("color == 'red'") df_out = t.fit_transform(df) expected = pd.DataFrame({ "color": ["red", "red"], "price": [1.5, 3.5], "amount": [1, 3], "index": [0, 2] }) expected.set_index("index", inplace=True) self.assertTrue(expected.equals(df_out))
def test_inverse_transform_column_values(self): t = r.ColumnTransformer("color", "price", col_format="{}_out") t._transform_column = MagicMock(return_value=[1, 1, 1]) t._inverse_transform_column = MagicMock(return_value=[2, 2, 2]) df = create_df_all() df_out = t.fit_transform(df) df_out = t.inverse_transform(df_out) df_expected = pd.DataFrame({ "color": [2, 2, 2], "price": [2, 2, 2], "amount": [1, 2, 3] }) self.assertTrue(df_out.equals(df_expected)) self.assertFalse(df.equals(df_expected)) # No changes on input df.
def test_fit_transform_features_name_result(self): sk = SklearnTransformerMock() sk.get_feature_names = MagicMock(return_value=["0_blue", "0_red"]) return_value = np.array([[1, 2, 3], [3, 2, 1]]).T sk.transform = MagicMock(return_value=return_value) t = r.SklearnColumnsWrapper(sk, "color") df_out = t.fit_transform(create_df_all()) df_expected = pd.DataFrame({ "color=blue": [1, 2, 3], "color=red": [3, 2, 1], "price": [1.5, 2.5, 3.5], "amount": [1, 2, 3] }) sk.transform.assert_called_once() self.assertTrue(df_out.equals(df_expected))
def test_inverse_transform_unknown_default(self): df = create_df_all() t = r.CategoryEncoder("color", unknown_value="UNKNOWN") t.fit(df) df_in_inverse = pd.DataFrame({ "color": [2, 0], "price": [1.5, 2.5], "amount": [1, 2] }) df_out = t.inverse_transform(df_in_inverse) df_expected = pd.DataFrame({ "color": ["red", "UNKNOWN"], "price": [1.5, 2.5], "amount": [1, 2] }) self.assertTrue(df_out.equals(df_expected))
def test_fit_transform_unknown_default(self): df = create_df_all() t = r.CategoryEncoder("color", unknown_value="UNKNOWN") t.fit(df) df_in = pd.DataFrame({ "color": ["red", "yellow"], "price": [1.5, 2.5], "amount": [1, 2] }) df_out = t.transform(df_in) df_expected = pd.DataFrame({ "color": [2, 0], "price": [1.5, 2.5], "amount": [1, 2] }) df_expected["color"] = df_expected["color"].astype("int8") self.assertTrue(df_out.equals(df_expected))
def test_transform_column_1_N(self): t = r.ColumnTransformer("color") t.get_column_mapping = MagicMock( return_value={"color": ["color1", "color2"]}) t._transform_column = MagicMock(return_value=[[1, 2], [1, 2], [1, 2]]) df = create_df_all() df_out = t.fit_transform(df) df_expected = pd.DataFrame({ "color1": [1, 1, 1], "color2": [2, 2, 2], "price": [1.5, 2.5, 3.5], "amount": [1, 2, 3] }) calls = [call(df, "color")] t._transform_column.assert_has_calls(calls, any_order=True) print(df_out) print(df_expected) self.assertTrue(df_out.equals(df_expected))
def _test_fit_transform_unknown_none(self, error_unknown): df = create_df_all() t = r.CategoryEncoder("color", error_unknown=error_unknown) t.fit(df) df_in = pd.DataFrame({ "color": ["red", "yellow"], "price": [1.5, 2.5], "amount": [1, 2] }) df_out = t.transform(df_in) df_expected = pd.DataFrame({ "color": [1, -1], "price": [1.5, 2.5], "amount": [1, 2] }) df_expected["color"] = df_expected["color"].astype("int8") print(df_out) print(df_expected) self.assertTrue(df_out.equals(df_expected))
def _test_select_non_existing(self, error): df = create_df_all() t = r.SelectTransformer(cols_not_found_error=error) t.cols = ["color", "colorize"] df = t.transform(df) self.assertEqual(list(df.columns), ["color"])
def test_init_no_target_in_df(self): t = r.TargetEncoderTransformer(target="label") with self.assertRaisesRegex(ValueError, "Target must be in the fitted DataFrame"): t.fit(create_df_all())
def _test_drop_non_existing(self, error): df = create_df_all() t = r.DropTransformer(cols_not_found_error=error) t.cols = ["color", "colorize"] df = t.transform(df) self.assertEqual(list(df.columns), ["price", "amount"])