Example #1
0
    def test_one_hot_encoder_fit_called(self, mocker):
        """Test that fit calls OneHotEncoder.fit."""

        expected_keyword_args = {"X": d.create_df_1()[["b"]], "y": None}

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        mocker.patch("sklearn.preprocessing.OneHotEncoder.fit")

        x.fit(df)

        assert (
            sklearn.preprocessing.OneHotEncoder.fit.call_count == 1
        ), f"Not enough calls to OneHotEncoder.fit -\n  Expected: 1\n  Actual: {sklearn.preprocessing.OneHotEncoder.fit.call_count}"

        call_args = sklearn.preprocessing.OneHotEncoder.fit.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        h.assert_equal_dispatch(
            expected=expected_keyword_args,
            actual=call_kwargs,
            msg="kwargs for OneHotEncoder.fit in OneHotEncodingTransformer.init",
        )

        assert (
            len(call_pos_args) == 1
        ), f"Unepxected number of positional args in OneHotEncoder.fit call -\n  Expected: 1\n  Actual: {len(call_pos_args)}"

        assert (
            call_pos_args[0] is x
        ), f"Unexpected positional arg (self) in OneHotEncoder.fit call -\n  Expected: self\n  Actual: {call_pos_args[0]}"
Example #2
0
    def test_super_fit_call(self, mocker):
        """Test the call to CappingTransformer.fit."""

        spy = mocker.spy(tubular.capping.CappingTransformer, "fit")

        df = d.create_df_9()

        x = OutOfRangeNullTransformer(quantiles={
            "a": [0.1, 1],
            "b": [0.5, None]
        },
                                      weights_column="c")

        x.fit(df)

        assert (spy.call_count == 1
                ), "unexpected number of calls to CappingTransformer.fit"

        call_args = spy.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        assert call_pos_args == (
            x, ), "unexpected positional args in CappingTransformer.fit call"

        expected_kwargs = {"X": d.create_df_9(), "y": None}

        h.assert_equal_dispatch(
            expected=expected_kwargs,
            actual=call_kwargs,
            msg="unexpected kwargs in CappingTransformer.fit call",
        )
Example #3
0
    def test_mappings_unchanged(self):
        """Test that mappings is unchanged in transform."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        x.transform(df)

        h.assert_equal_dispatch(
            expected=mapping,
            actual=x.mappings,
            msg=
            "CrossColumnAddTransformer.transform has changed self.mappings unexpectedly",
        )
    def test_mappings_unchanged(self):
        """Test that mappings is unchanged in transform."""

        df = d.create_df_1()

        mapping = {
            "a": {
                1: "a",
                2: "b",
                3: "c",
                4: "d",
                5: "e",
                6: "f"
            },
            "b": {
                "a": 1,
                "b": 2,
                "c": 3,
                "d": 4,
                "e": 5,
                "f": 6
            },
        }

        x = BaseMappingTransformer(mappings=mapping)

        x.transform(df)

        h.assert_equal_dispatch(
            expected=mapping,
            actual=x.mappings,
            msg=
            "BaseMappingTransformer.transform has changed self.mappings unexpectedly",
        )
Example #5
0
    def test_base_nominal_transformer_fit_called(self, mocker):
        """Test that fit calls BaseNominalTransformer.fit."""

        expected_keyword_args = {"X": d.create_df_1(), "y": None}

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        mocker.patch("tubular.nominal.BaseNominalTransformer.fit")

        x.fit(df)

        assert (
            tubular.nominal.BaseNominalTransformer.fit.call_count == 1
        ), f"Not enough calls to BaseNominalTransformer.fit -\n  Expected: 1\n  Actual: {tubular.nominal.BaseNominalTransformer.fit.call_count}"

        call_args = tubular.nominal.BaseNominalTransformer.fit.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        h.assert_equal_dispatch(
            expected=expected_keyword_args,
            actual=call_kwargs,
            msg="kwargs for BaseNominalTransformer.fit in OneHotEncodingTransformer.init",
        )

        assert (
            len(call_pos_args) == 1
        ), f"Unepxected number of positional args in BaseNominalTransformer.fit call -\n  Expected: 1\n  Actual: {len(call_pos_args)}"

        assert (
            call_pos_args[0] is x
        ), f"Unexpected positional arg (self) in BaseNominalTransformer.fit call -\n  Expected: self\n  Actual: {call_pos_args[0]}"
Example #6
0
    def test_one_hot_encoder_init_called(self, mocker):
        """Test that init calls OneHotEncoder.init.

        Again not using h.assert_function_call for this as it does not handle self being passed to OneHotEncoder.init
        """

        expected_keyword_args = {"sparse": False, "handle_unknown": "ignore"}

        mocker.patch("sklearn.preprocessing.OneHotEncoder.__init__")

        x = OneHotEncodingTransformer(
            columns=None, verbose=True, copy=True, separator="x", drop_original=True
        )

        assert (
            sklearn.preprocessing.OneHotEncoder.__init__.call_count == 1
        ), f"Not enough calls to OneHotEncoder.__init__ -\n  Expected: 1\n  Actual: {sklearn.preprocessing.OneHotEncoder.__init__.call_count}"

        call_args = sklearn.preprocessing.OneHotEncoder.__init__.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        h.assert_equal_dispatch(
            expected=expected_keyword_args,
            actual=call_kwargs,
            msg="kwargs for OneHotEncoder.__init__ in OneHotEncodingTransformer.init",
        )

        assert (
            len(call_pos_args) == 1
        ), f"Unepxected number of positional args in OneHotEncoder.__init__ call -\n  Expected: 1\n  Actual: {len(call_pos_args)}"

        assert (
            call_pos_args[0] is x
        ), f"Unexpected positional arg (self) in OneHotEncoder.__init__ call -\n  Expected: self\n  Actual: {call_pos_args[0]}"
    def test_X_returned(self, df, expected):
        """Test that X is returned from transform."""

        mapping = {
            "a": {
                1: "a",
                2: "b",
                3: "c",
                4: "d",
                5: "e",
                6: "f"
            },
            "b": {
                "a": 1,
                "b": 2,
                "c": 3,
                "d": 4,
                "e": 5,
                "f": 6
            },
        }

        x = BaseMappingTransformer(mappings=mapping)

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check X returned from transform",
        )
    def test_scaler_transform_call(self, mocker, scaler, scaler_type_str):
        """Test that the call to the scaler.transform method."""

        df = d.create_df_3()

        x = ScalingTransformer(columns=["b", "c"],
                               scaler=scaler,
                               scaler_kwargs={"copy": True})

        x.fit(df)

        mocked = mocker.patch(
            f"sklearn.preprocessing.{scaler_type_str}.transform",
            return_value=df[["b", "c"]],
        )

        x.transform(df)

        assert mocked.call_count == 1, "unexpected number of calls to scaler fit"

        call_args = mocked.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        expected_positional_args = (df[["b", "c"]], )

        h.assert_equal_dispatch(
            expected=expected_positional_args,
            actual=call_pos_args,
            msg=
            f"unexpected positional args in {scaler_type_str} transform call",
        )

        assert (call_kwargs == {}
                ), f"unexpected kwargs in {scaler_type_str} transform call"
    def test_output_from_scaler_transform_set_to_columns(
            self, mocker, scaler, scaler_type_str):
        """Test that the call to the scaler.transform method."""

        df = d.create_df_3()

        x = ScalingTransformer(columns=["b", "c"],
                               scaler=scaler,
                               scaler_kwargs={"copy": True})

        x.fit(df)

        scaler_transform_output = pd.DataFrame({
            "b": [1, 2, 3, 4, 5, 6, 7],
            "c": [7, 6, 5, 4, 3, 2, 1]
        })

        mocker.patch(
            f"sklearn.preprocessing.{scaler_type_str}.transform",
            return_value=scaler_transform_output,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=scaler_transform_output,
            actual=df_transformed[["b", "c"]],
            msg=
            f"output from {scaler_type_str} transform not assigned to columns",
        )
Example #10
0
    def test_categories_not_modified(self):
        """Test that the categories from fit are not changed in transform."""

        df_train = d.create_df_1()
        df_test = d.create_df_7()

        x = OneHotEncodingTransformer(columns=["a", "b"], verbose=False)
        x2 = OneHotEncodingTransformer(columns=["a", "b"], verbose=False)

        x.fit(df_train)
        x2.fit(df_train)

        x.transform(df_test)

        h.assert_equal_dispatch(
            expected=list(x2.categories_[0]),
            actual=list(x.categories_[0]),
            msg="categories_ (index 0) modified during transform",
        )

        h.assert_equal_dispatch(
            expected=list(x2.categories_[1]),
            actual=list(x.categories_[1]),
            msg="categories_ (index 1) modified during transform",
        )
Example #11
0
    def test_super_init_called(self, mocker):
        """Test that init calls BaseNominalTransformer.init.

        Note, not using h.assert_function_call for this as it does not handle self being passed to BaseNominalTransformer.init.
        """

        expected_keyword_args = {"columns": None, "verbose": True, "copy": True}

        mocker.patch("tubular.nominal.BaseNominalTransformer.__init__")

        x = OneHotEncodingTransformer(columns=None, verbose=True, copy=True)

        assert (
            tubular.nominal.BaseNominalTransformer.__init__.call_count == 1
        ), f"Not enough calls to BaseNominalTransformer.__init__ -\n  Expected: 1\n  Actual: {tubular.nominal.BaseNominalTransformer.__init__.call_count}"

        call_args = tubular.nominal.BaseNominalTransformer.__init__.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        h.assert_equal_dispatch(
            expected=expected_keyword_args,
            actual=call_kwargs,
            msg="kwargs for BaseNominalTransformer.__init__ in OneHotEncodingTransformer.init",
        )

        assert (
            len(call_pos_args) == 1
        ), f"Unepxected number of positional args in BaseNominalTransformer.__init__ call -\n  Expected: 1\n  Actual: {len(call_pos_args)}"

        assert (
            call_pos_args[0] is x
        ), f"Unexpected positional arg (self) in BaseNominalTransformer.__init__ call -\n  Expected: self\n  Actual: {call_pos_args[0]}"
    def test_value_set_in_transform(self, df, expected):
        """Test that transform sets the value as expected."""

        x = SetValueTransformer(columns=["a", "b"], value="a")

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            actual=df_transformed,
            expected=expected,
            msg="incorrect value after SetValueTransformer transform",
        )
    def test_X_returned(self, df, expected):
        """Test that X is returned from transform."""

        x = BaseTransformer(columns="a", copy=True)

        df_transformed = x.transform(X=df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check X returned from transform",
        )
    def test_null_indicator_columns_correct(self, df, expected):
        """Test that the created indicator column is correct - and unrelated columns are unchanged"""

        x = NullIndicator(columns=["b", "c"])

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check null indicator columns created correctly in transform.",
        )
    def test_expected_output_1(self, df, expected):
        """Test that transform is giving the expected output when applied to float column."""

        x1 = BaseImputer()
        x1.columns = ["a"]
        x1.impute_values_ = {"a": 7}

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="ArbitraryImputer transform col a",
        )
    def test_expected_output_3(self, df, expected):
        """Test that transform is giving the expected output when applied to object and categorical columns."""

        x1 = BaseImputer()
        x1.columns = ["b", "c"]
        x1.impute_values_ = {"b": "g", "c": "f"}

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="ArbitraryImputer transform col b, c",
        )
Example #17
0
    def test_fit_not_changing_data(self):
        """Test fit does not change X."""

        df = d.create_OrdinalEncoderTransformer_test_df()

        x = OrdinalEncoderTransformer(response_column="a", columns="b")

        x.fit(df)

        h.assert_equal_dispatch(
            expected=d.create_OrdinalEncoderTransformer_test_df(),
            actual=df,
            msg="Check X not changing during fit",
        )
    def test_fit_not_changing_data(self):
        """Test fit does not change X."""

        df = d.create_df_1()

        x = ModeImputer(columns="a")

        x.fit(df)

        h.assert_equal_dispatch(
            expected=d.create_df_1(),
            actual=df,
            msg="Check X not changing during fit",
        )
Example #19
0
    def test_fit_not_changing_data(self):
        """Test fit does not change X."""

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        x.fit(df)

        h.assert_equal_dispatch(
            expected=d.create_df_1(),
            actual=df,
            msg="Check X not changing during fit",
        )
    def test_columns_set_to_all_columns_when_none(self):
        """Test that X.columns are set to self.columns if self.columns is None when function called."""

        df = d.create_df_1()

        x = BaseTransformer(columns=None)

        x.columns_set_or_check(X=df)

        h.assert_equal_dispatch(
            expected=list(df.columns.values),
            actual=x.columns,
            msg="x.columns set when None",
        )
    def test_X_returned(self):
        """Test that the input X is returned from the method."""

        df = d.create_df_2()

        x = ScalingTransformer(columns=["a"], scaler="standard")

        df_returned = x.check_numeric_columns(df)

        h.assert_equal_dispatch(
            expected=df,
            actual=df_returned,
            msg="unexepcted object returned from check_numeric_columns",
        )
Example #22
0
    def test_fit_not_changing_data(self):
        """Test fit does not change X."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        h.assert_equal_dispatch(
            expected=d.create_df_5(),
            actual=df,
            msg="Check X not changing during fit",
        )
    def test_columns_none_get_cat_columns(self):
        """If self.columns is None then object and categorical columns are set as self.columns."""

        df = d.create_df_4()

        x = BaseNominalTransformer()

        x.columns = None

        x.columns_set_or_check(df)

        h.assert_equal_dispatch(expected=["b", "c"],
                                actual=x.columns,
                                msg="nominal columns getting")
    def test_fit_not_changing_data(self):
        """Test fit does not change X."""

        df = d.create_df_1()

        x = NominalToIntegerTransformer(columns=["a", "b"])

        x.fit(df)

        h.assert_equal_dispatch(
            expected=d.create_df_1(),
            actual=df,
            msg="Check X not changing during fit",
        )
    def test_nulls_imputed_correctly(self, df, expected):
        """Test missing values are filled with the correct values."""

        x = MedianImputer(columns=["a", "b", "c"])

        # set the impute values dict directly rather than fitting x on df so test works with helpers
        x.impute_values_ = {"a": 3.5, "b": 5.0, "c": -1.5}

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check nulls filled correctly in transform",
        )
Example #26
0
    def test_unseen_categories_encoded_as_all_zeroes(self, df_test, expected):
        """Test OneHotEncodingTransformer.transform encodes unseen categories correctly (all 0s)."""

        # transformer is fit on the whole dataset separately from the input df to work with the decorators
        df_train = d.create_df_7()
        x = OneHotEncodingTransformer(columns=["a", "b", "c"], verbose=False)
        x.fit(df_train)

        df_transformed = x.transform(df_test)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="unseen category rows not encoded as 0s",
        )
    def test_nulls_imputed_correctly_2(self, df, expected):
        """Test missing values are filled with the correct values - and unrelated columns are not changed."""

        x = ModeImputer(columns=["a"])

        # set the impute values dict directly rather than fitting x on df so test works with helpers
        x.impute_values_ = {"a": 1.0}

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check nulls filled correctly in transform",
        )
Example #28
0
    def test_mappings_unchanged(self):
        """Test that mappings is unchanged in transform."""

        df = d.create_df_1()

        mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b")

        x.transform(df)

        h.assert_equal_dispatch(
            expected=mapping,
            actual=x.mappings,
            msg="CrossColumnMappingTransformer.transform has changed self.mappings unexpectedly",
        )
Example #29
0
    def test_original_columns_kept_when_specified(self):
        """Test OneHotEncodingTransformer.transform keeps original columns when specified."""

        df = d.create_df_7()

        x = OneHotEncodingTransformer(drop_original=False)

        x.fit(df)

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=list(set()),
            actual=list(set(["a", "b", "c"]) - set(df_transformed.columns)),
            msg="original columns not kept",
        )
    def test_expected_output_4(self, df, expected):
        """Test that transform is giving the expected output when adding one and not dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=True,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )