Exemple #1
0
    def test_inverse_transform_cols_map_str_and_tuples(self):
        """Test 1:1 and n:1 in the same map. """
        class C(r.RecipipeTransformer):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

            def get_column_mapping(self):
                return {"c1": "c1", ("c1", "t1"): "c1t1"}

            def _transform(self, df):
                df = df[["c1", "t1"]]
                df.columns = ["c1", "c1t1"]
                return df

            def _inverse_transform(self, df):
                df = df[["c1", "c1t1"]]
                df.columns = ["c1", "t1"]
                return df

        t = C("c1", "t1")
        df = create_df_3dtypes()
        t.fit(df)
        df = t.transform(df)
        df = t.inverse_transform(df)
        out_cols = ["c1", "t1", "c2"]
        self.assertListEqual(list(df.columns), out_cols)
Exemple #2
0
    def test_fit_cols_all(self):
        """When not cols are specified we need to fit all of them. """

        t = RecipipeTransformerMock()
        t.fit(create_df_3dtypes())
        self.assertListEqual(t.cols, ["c1", "c2", "t1"])
        self.assertEqual(t.n_fit, 1)
Exemple #3
0
 def _test_fit_check_column_mapping(self):
     t = r.RecipipeTransformer()
     t.get_column_mapping = MagicMock(return_value={
         "c1": ["c1", "c2"],
         "c2": []
     })
     with self.assertRaises(ValueError):
         t.fit(create_df_3dtypes())
Exemple #4
0
    def test_transform_all_columns(self):
        """Transform a df and return the same columns. """

        t = RecipipeTransformerMock()
        df = create_df_3dtypes()
        t.fit(df)
        df = t.transform(df)
        self.assertListEqual(list(df.columns), ["c1", "c2", "t1"])
Exemple #5
0
    def test_cols_taken_from_col_map(self):
        """If no cols are given, the col_map should be used to obtain them. """
        class C(r.RecipipeTransformer):
            def get_column_mapping(self):
                return {"c1": ["hi", "bye"]}

        t = C()
        t.fit(create_df_3dtypes())
        self.assertListEqual(t.cols, ["c1"])
Exemple #6
0
    def test_transform_some_columns(self):
        class C(r.RecipipeTransformer):
            def _transform(self, df):
                return df[self.cols]

        t = C("c1", "c2")
        df = create_df_3dtypes()
        t.fit(df)
        df = t.transform(df)
        self.assertListEqual(list(df.columns), ["c1", "c2", "t1"])
Exemple #7
0
    def test_transform_keep_original_false_and_format(self):
        class C(r.RecipipeTransformer):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

            def _transform(self, df):
                df = df[self.cols]
                df.columns = [self.col_format.format(i) for i in df.columns]
                return df

        t = C("c1", "c2", keep_original=False, col_format="{}_out")
        df = create_df_3dtypes()
        t.fit(df)
        df = t.transform(df)
        out_cols = ["c1_out", "c2_out", "t1"]
        self.assertListEqual(list(df.columns), out_cols)
Exemple #8
0
    def test_transform_cols_map_tuples(self):
        class C(r.RecipipeTransformer):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

            def get_column_mapping(self):
                return {"c1": ("c1_1", "c1_2"), ("c1", "t1"): "c1t1"}

            def _transform(self, df):
                df = df[["c1", "c1", "t1"]]
                df.columns = ["c1_1", "c1_2", "c1t1"]
                return df

        t = C("c1", "t1")
        df = create_df_3dtypes()
        t.fit(df)
        df = t.transform(df)
        out_cols = ["c1_1", "c1_2", "c1t1", "c2"]
        self.assertListEqual(list(df.columns), out_cols)
Exemple #9
0
    def test_inverse_transform_keep_original_without_original(self):
        class C(r.RecipipeTransformer):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

            def _transform(self, df):
                df = df[self.cols]
                df.columns = ["c1_out", "c2_out"]
                return df

            def _inverse_transform(self, df):
                df = df[["c1_out", "c2_out"]]
                df.columns = ["c1", "c2"]
                return df

        t = C("c*", keep_original=True, col_format="{}_out")
        df = create_df_3dtypes()
        t.fit(df)
        df = t.transform(df)
        df = df.drop(["c1", "c2"], axis=1)
        df = t.inverse_transform(df)
        out_cols = ["c1", "c2", "t1"]
        self.assertListEqual(list(df.columns), out_cols)
Exemple #10
0
    def test_fit_columns_no_match(self):

        with self.assertRaises(ValueError):
            cols = fit_columns(create_df_3dtypes(), ["r*"])
Exemple #11
0
    def test_fit_columns_cols_dtype(self):

        cols = fit_columns(create_df_3dtypes(), ["c*"], int)
        self.assertListEqual(cols, ["c1"])
Exemple #12
0
    def test_fit_columns_cols_empty(self):

        cols = fit_columns(create_df_3dtypes(), [], [])
        self.assertListEqual(cols, ["c1", "c2", "t1"])
Exemple #13
0
 def test_fit_exclude(self):
     t = RecipipeTransformerMock(exclude=["c1", ["c*"]])
     t.fit(create_df_3dtypes())
     self.assertListEqual(t.cols, ["t1"])
     self.assertEqual(t.n_fit, 1)
Exemple #14
0
    def test_fit_columns_no_match_no_error(self):

        cols = fit_columns(create_df_3dtypes(), ["r*"], raise_error=False)
        self.assertListEqual(cols, [])
Exemple #15
0
 def test_fit_cols_and_dtype_exclude(self):
     t = RecipipeTransformerMock("c*", dtype=dict(exclude=int))
     t.fit(create_df_3dtypes())
     self.assertListEqual(t.cols, ["c2"])
     self.assertEqual(t.n_fit, 1)
Exemple #16
0
    def test_fit_columns_duplicates_drop(self):

        cols = fit_columns(create_df_3dtypes(), cols=["c*", "c1"])
        self.assertListEqual(cols, ["c1", "c2"])
Exemple #17
0
    def test_fit_columns_no_dtype_in_df(self):

        cols = fit_columns(create_df_3dtypes(), dtype=float)
        self.assertListEqual(cols, [])
Exemple #18
0
    def test_fit_columns_no_cols_dtype(self):

        cols = fit_columns(create_df_3dtypes())
        self.assertListEqual(cols, ["c1", "c2", "t1"])
Exemple #19
0
    def test_fit_cols_keep_original_collision(self):
        """Keep original only works when no name collisions exist. """

        t = RecipipeTransformerMock(keep_original=True)
        with self.assertRaises(ValueError):
            t.fit(create_df_3dtypes())
Exemple #20
0
    def test_fit_columns_cols(self):

        cols = fit_columns(create_df_3dtypes(), ["c*"])
        self.assertListEqual(cols, ["c1", "c2"])