コード例 #1
0
    def test_allow_1_to_N_relationship(self):
        class C(r.ColumnTransformer):
            def get_column_mapping(self):
                return {"color": ["color1", "color2"], "price": "price"}

        t = C("color", "price")
        t.fit(create_df_all())
コード例 #2
0
 def test_transform_columns(self):
     t = r.ColumnsTransformer("color", "price")
     df = create_df_all()
     t._transform_columns = MagicMock(return_value=df)
     t.fit_transform(df)
     # TODO: Columns order does not really matter.
     t._transform_columns.assert_called_once_with(df, ["color", "price"])
コード例 #3
0
 def _test_transform_column_calls(self, col_format):
     t = r.ColumnTransformer("color", "price", col_format=col_format)
     t._transform_column = MagicMock(return_value=[1, 1, 1])
     df = create_df_all()
     t.fit_transform(df)
     calls = [call(df, "price"), call(df, "color")]
     t._transform_column.assert_has_calls(calls, any_order=True)
コード例 #4
0
    def test_inverse_transform(self):
        """Inverse transform is impossible, just identity to avoid errors. """

        t = r.SelectTransformer()
        df_in = create_df_all()
        df_out = t.inverse_transform(df_in)
        self.assertTrue(df_in.equals(df_out))
コード例 #5
0
    def test_select_one_column(self):
        """Check select works with one column. """

        df = create_df_all()
        t = r.SelectTransformer()
        t.cols = ["color"]
        df = t.transform(df)
        self.assertEqual(list(df.columns), ["color"])
コード例 #6
0
    def test_drop_one_column(self):
        """Check drop works with one column. """

        df = create_df_all()
        t = r.DropTransformer()
        t.cols = ["color"]
        df = t.transform(df)
        self.assertEqual(list(df.columns), ["price", "amount"])
コード例 #7
0
 def test_fit(self):
     sk = SklearnTransformerMock()
     sk.fit = MagicMock()
     t = r.SklearnFitOneWrapper(sk, "price", "amount")
     df_out = t.fit(create_df_all())
     a = np.array([1.5, 1, 2.5, 2, 3.5, 3]).reshape(-1, 1)
     a_out = sk.fit.call_args_list[0][0][0]
     self.assertEqual(a.shape, a_out.shape)
     # flatten() is not really needed.
     self.assertListEqual(list(a.flatten()), list(a_out.flatten()))
コード例 #8
0
 def test_init(self):
     t = r.ReduceMemoryTransformer(verbose=True)
     df = create_df_all()
     t.fit_transform(df)
     dtypes_expected = {
         "color": pd.CategoricalDtype(["blue", "red"]),
         "amount": np.dtype("int8"),
         "price": np.dtype("float32"),
     }
     # This transformer modifies df in place!
     self.assertDictEqual(df.dtypes.to_dict(), dtypes_expected)
コード例 #9
0
    def test_inverse_transform(self):
        """Inverse transform is impossible, we cannot create columns.
        
        To avoid an error in the pipeline (non-existing inverse transform
        method), check that the inverse transform returns df.
        """

        t = r.DropTransformer()
        df_in = create_df_all()
        df_out = t.inverse_transform(df_in)
        self.assertTrue(df_in.equals(df_out))
コード例 #10
0
 def test_fit_transform(self):
     df = create_df_all()
     t = r.CategoryEncoder("color")
     df_out = t.fit_transform(df)
     df_expected = pd.DataFrame({
         "color": [1, 0, 1],
         "price": [1.5, 2.5, 3.5],
         "amount": [1, 2, 3]
     })
     df_expected["color"] = df_expected["color"].astype("int8")
     self.assertTrue(df_out.equals(df_expected))
コード例 #11
0
 def test_transform_columns_inverse(self):
     t = r.ColumnsTransformer("color", "price", col_format="{}_out")
     df = create_df_all()
     df_out = df[["color", "price"]].copy()
     df_out.columns = ["color", "price"]
     t._inverse_transform_columns = MagicMock(return_value=df_out)
     t.fit(df)
     df = t.transform(df)
     t.inverse_transform(df)
     # TODO: Columns order does not matter.
     params = [df, ["color_out", "price_out"]]
     t._inverse_transform_columns.assert_called_once_with(*params)
コード例 #12
0
    def test_not_N_to_1_relationship(self):
        class C(r.ColumnTransformer):
            def get_column_mapping(self):
                # 2 input cols and 3 output cols, but still N to N.
                return {
                    tuple(["color", "price"]): "color_price",
                    "color": tuple(["color1", "color2"])
                }

        t = C("color", "price")
        with self.assertRaisesRegex(ValueError, "Only 1 to N relationships.*"):
            t.fit(create_df_all())
コード例 #13
0
 def _test_transform_column_values(self, col_format):
     t = r.ColumnTransformer("color", "price", col_format=col_format)
     t._transform_column = MagicMock(return_value=[1, 1, 1])
     df = create_df_all()
     df_out = t.fit_transform(df)
     df_expected = pd.DataFrame({
         col_format.format("color"): [1, 1, 1],
         col_format.format("price"): [1, 1, 1],
         "amount": [1, 2, 3]
     })
     self.assertTrue(df_out.equals(df_expected))
     self.assertFalse(df.equals(df_expected))  # No changes on input df.
コード例 #14
0
 def test_transform(self):
     df = create_df_all()
     t = r.QueryTransformer("color == 'red'")
     df_out = t.fit_transform(df)
     expected = pd.DataFrame({
         "color": ["red", "red"],
         "price": [1.5, 3.5],
         "amount": [1, 3],
         "index": [0, 2]
     })
     expected.set_index("index", inplace=True)
     self.assertTrue(expected.equals(df_out))
コード例 #15
0
 def test_inverse_transform_column_values(self):
     t = r.ColumnTransformer("color", "price", col_format="{}_out")
     t._transform_column = MagicMock(return_value=[1, 1, 1])
     t._inverse_transform_column = MagicMock(return_value=[2, 2, 2])
     df = create_df_all()
     df_out = t.fit_transform(df)
     df_out = t.inverse_transform(df_out)
     df_expected = pd.DataFrame({
         "color": [2, 2, 2],
         "price": [2, 2, 2],
         "amount": [1, 2, 3]
     })
     self.assertTrue(df_out.equals(df_expected))
     self.assertFalse(df.equals(df_expected))  # No changes on input df.
コード例 #16
0
 def test_fit_transform_features_name_result(self):
     sk = SklearnTransformerMock()
     sk.get_feature_names = MagicMock(return_value=["0_blue", "0_red"])
     return_value = np.array([[1, 2, 3], [3, 2, 1]]).T
     sk.transform = MagicMock(return_value=return_value)
     t = r.SklearnColumnsWrapper(sk, "color")
     df_out = t.fit_transform(create_df_all())
     df_expected = pd.DataFrame({
         "color=blue": [1, 2, 3],
         "color=red": [3, 2, 1],
         "price": [1.5, 2.5, 3.5],
         "amount": [1, 2, 3]
     })
     sk.transform.assert_called_once()
     self.assertTrue(df_out.equals(df_expected))
コード例 #17
0
 def test_inverse_transform_unknown_default(self):
     df = create_df_all()
     t = r.CategoryEncoder("color", unknown_value="UNKNOWN")
     t.fit(df)
     df_in_inverse = pd.DataFrame({
         "color": [2, 0],
         "price": [1.5, 2.5],
         "amount": [1, 2]
     })
     df_out = t.inverse_transform(df_in_inverse)
     df_expected = pd.DataFrame({
         "color": ["red", "UNKNOWN"],
         "price": [1.5, 2.5],
         "amount": [1, 2]
     })
     self.assertTrue(df_out.equals(df_expected))
コード例 #18
0
 def test_fit_transform_unknown_default(self):
     df = create_df_all()
     t = r.CategoryEncoder("color", unknown_value="UNKNOWN")
     t.fit(df)
     df_in = pd.DataFrame({
         "color": ["red", "yellow"],
         "price": [1.5, 2.5],
         "amount": [1, 2]
     })
     df_out = t.transform(df_in)
     df_expected = pd.DataFrame({
         "color": [2, 0],
         "price": [1.5, 2.5],
         "amount": [1, 2]
     })
     df_expected["color"] = df_expected["color"].astype("int8")
     self.assertTrue(df_out.equals(df_expected))
コード例 #19
0
 def test_transform_column_1_N(self):
     t = r.ColumnTransformer("color")
     t.get_column_mapping = MagicMock(
         return_value={"color": ["color1", "color2"]})
     t._transform_column = MagicMock(return_value=[[1, 2], [1, 2], [1, 2]])
     df = create_df_all()
     df_out = t.fit_transform(df)
     df_expected = pd.DataFrame({
         "color1": [1, 1, 1],
         "color2": [2, 2, 2],
         "price": [1.5, 2.5, 3.5],
         "amount": [1, 2, 3]
     })
     calls = [call(df, "color")]
     t._transform_column.assert_has_calls(calls, any_order=True)
     print(df_out)
     print(df_expected)
     self.assertTrue(df_out.equals(df_expected))
コード例 #20
0
 def _test_fit_transform_unknown_none(self, error_unknown):
     df = create_df_all()
     t = r.CategoryEncoder("color", error_unknown=error_unknown)
     t.fit(df)
     df_in = pd.DataFrame({
         "color": ["red", "yellow"],
         "price": [1.5, 2.5],
         "amount": [1, 2]
     })
     df_out = t.transform(df_in)
     df_expected = pd.DataFrame({
         "color": [1, -1],
         "price": [1.5, 2.5],
         "amount": [1, 2]
     })
     df_expected["color"] = df_expected["color"].astype("int8")
     print(df_out)
     print(df_expected)
     self.assertTrue(df_out.equals(df_expected))
コード例 #21
0
 def _test_select_non_existing(self, error):
     df = create_df_all()
     t = r.SelectTransformer(cols_not_found_error=error)
     t.cols = ["color", "colorize"]
     df = t.transform(df)
     self.assertEqual(list(df.columns), ["color"])
コード例 #22
0
 def test_init_no_target_in_df(self):
     t = r.TargetEncoderTransformer(target="label")
     with self.assertRaisesRegex(ValueError,
                                 "Target must be in the fitted DataFrame"):
         t.fit(create_df_all())
コード例 #23
0
 def _test_drop_non_existing(self, error):
     df = create_df_all()
     t = r.DropTransformer(cols_not_found_error=error)
     t.cols = ["color", "colorize"]
     df = t.transform(df)
     self.assertEqual(list(df.columns), ["price", "amount"])