def test_inverse_transform_cols_map_str_and_tuples(self): """Test 1:1 and n:1 in the same map. """ class C(r.RecipipeTransformer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get_column_mapping(self): return {"c1": "c1", ("c1", "t1"): "c1t1"} def _transform(self, df): df = df[["c1", "t1"]] df.columns = ["c1", "c1t1"] return df def _inverse_transform(self, df): df = df[["c1", "c1t1"]] df.columns = ["c1", "t1"] return df t = C("c1", "t1") df = create_df_3dtypes() t.fit(df) df = t.transform(df) df = t.inverse_transform(df) out_cols = ["c1", "t1", "c2"] self.assertListEqual(list(df.columns), out_cols)
def test_fit_cols_all(self): """When not cols are specified we need to fit all of them. """ t = RecipipeTransformerMock() t.fit(create_df_3dtypes()) self.assertListEqual(t.cols, ["c1", "c2", "t1"]) self.assertEqual(t.n_fit, 1)
def _test_fit_check_column_mapping(self): t = r.RecipipeTransformer() t.get_column_mapping = MagicMock(return_value={ "c1": ["c1", "c2"], "c2": [] }) with self.assertRaises(ValueError): t.fit(create_df_3dtypes())
def test_transform_all_columns(self): """Transform a df and return the same columns. """ t = RecipipeTransformerMock() df = create_df_3dtypes() t.fit(df) df = t.transform(df) self.assertListEqual(list(df.columns), ["c1", "c2", "t1"])
def test_cols_taken_from_col_map(self): """If no cols are given, the col_map should be used to obtain them. """ class C(r.RecipipeTransformer): def get_column_mapping(self): return {"c1": ["hi", "bye"]} t = C() t.fit(create_df_3dtypes()) self.assertListEqual(t.cols, ["c1"])
def test_transform_some_columns(self): class C(r.RecipipeTransformer): def _transform(self, df): return df[self.cols] t = C("c1", "c2") df = create_df_3dtypes() t.fit(df) df = t.transform(df) self.assertListEqual(list(df.columns), ["c1", "c2", "t1"])
def test_transform_keep_original_false_and_format(self): class C(r.RecipipeTransformer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _transform(self, df): df = df[self.cols] df.columns = [self.col_format.format(i) for i in df.columns] return df t = C("c1", "c2", keep_original=False, col_format="{}_out") df = create_df_3dtypes() t.fit(df) df = t.transform(df) out_cols = ["c1_out", "c2_out", "t1"] self.assertListEqual(list(df.columns), out_cols)
def test_transform_cols_map_tuples(self): class C(r.RecipipeTransformer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get_column_mapping(self): return {"c1": ("c1_1", "c1_2"), ("c1", "t1"): "c1t1"} def _transform(self, df): df = df[["c1", "c1", "t1"]] df.columns = ["c1_1", "c1_2", "c1t1"] return df t = C("c1", "t1") df = create_df_3dtypes() t.fit(df) df = t.transform(df) out_cols = ["c1_1", "c1_2", "c1t1", "c2"] self.assertListEqual(list(df.columns), out_cols)
def test_inverse_transform_keep_original_without_original(self): class C(r.RecipipeTransformer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _transform(self, df): df = df[self.cols] df.columns = ["c1_out", "c2_out"] return df def _inverse_transform(self, df): df = df[["c1_out", "c2_out"]] df.columns = ["c1", "c2"] return df t = C("c*", keep_original=True, col_format="{}_out") df = create_df_3dtypes() t.fit(df) df = t.transform(df) df = df.drop(["c1", "c2"], axis=1) df = t.inverse_transform(df) out_cols = ["c1", "c2", "t1"] self.assertListEqual(list(df.columns), out_cols)
def test_fit_columns_no_match(self): with self.assertRaises(ValueError): cols = fit_columns(create_df_3dtypes(), ["r*"])
def test_fit_columns_cols_dtype(self): cols = fit_columns(create_df_3dtypes(), ["c*"], int) self.assertListEqual(cols, ["c1"])
def test_fit_columns_cols_empty(self): cols = fit_columns(create_df_3dtypes(), [], []) self.assertListEqual(cols, ["c1", "c2", "t1"])
def test_fit_exclude(self): t = RecipipeTransformerMock(exclude=["c1", ["c*"]]) t.fit(create_df_3dtypes()) self.assertListEqual(t.cols, ["t1"]) self.assertEqual(t.n_fit, 1)
def test_fit_columns_no_match_no_error(self): cols = fit_columns(create_df_3dtypes(), ["r*"], raise_error=False) self.assertListEqual(cols, [])
def test_fit_cols_and_dtype_exclude(self): t = RecipipeTransformerMock("c*", dtype=dict(exclude=int)) t.fit(create_df_3dtypes()) self.assertListEqual(t.cols, ["c2"]) self.assertEqual(t.n_fit, 1)
def test_fit_columns_duplicates_drop(self): cols = fit_columns(create_df_3dtypes(), cols=["c*", "c1"]) self.assertListEqual(cols, ["c1", "c2"])
def test_fit_columns_no_dtype_in_df(self): cols = fit_columns(create_df_3dtypes(), dtype=float) self.assertListEqual(cols, [])
def test_fit_columns_no_cols_dtype(self): cols = fit_columns(create_df_3dtypes()) self.assertListEqual(cols, ["c1", "c2", "t1"])
def test_fit_cols_keep_original_collision(self): """Keep original only works when no name collisions exist. """ t = RecipipeTransformerMock(keep_original=True) with self.assertRaises(ValueError): t.fit(create_df_3dtypes())
def test_fit_columns_cols(self): cols = fit_columns(create_df_3dtypes(), ["c*"]) self.assertListEqual(cols, ["c1", "c2"])