def expected_df_5():
        """Expected output of test_expected_output_5."""

        df = d.create_df_4()

        df["a_new_col"] = np.log(df["a"] + 1) / np.log(5)

        return df
Esempio n. 2
0
    def test_response_column_nulls_error(self):
        """Test that an exception is raised if nulls are present in response_column."""

        df = d.create_df_4()

        x = OrdinalEncoderTransformer(response_column="a", columns=["b"])

        with pytest.raises(ValueError):

            x.fit(df)
    def expected_df_6():
        """Expected output of test_expected_output_6."""

        df = d.create_df_4()

        df["a_new_col"] = np.log(df["a"]) / np.log(7)

        df.drop("a", axis=1, inplace=True)

        return df
    def test_columns_none_get_cat_columns(self):
        """If self.columns is None then object and categorical columns are set as self.columns."""

        df = d.create_df_4()

        x = BaseNominalTransformer()

        x.columns = None

        x.columns_set_or_check(df)

        h.assert_equal_dispatch(expected=["b", "c"],
                                actual=x.columns,
                                msg="nominal columns getting")
class TestTransform(object):
    """Tests for LogTransformer.transform()."""
    def expected_df_1():
        """Expected output of test_expected_output_1."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"])
        df["b_new_col"] = np.log(df["b"])

        df.drop(columns=["a", "b"], inplace=True)

        return df

    def expected_df_2():
        """Expected output of test_expected_output_2."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"] + 1)
        df["b_new_col"] = np.log(df["b"] + 1)

        df.drop(columns=["a", "b"], inplace=True)

        return df

    def expected_df_3():
        """Expected output of test_expected_output_3."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"])
        df["b_new_col"] = np.log(df["b"])

        return df

    def expected_df_4():
        """Expected output of test_expected_output_4."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"] + 1)
        df["b_new_col"] = np.log(df["b"] + 1)

        return df

    def expected_df_5():
        """Expected output of test_expected_output_5."""

        df = d.create_df_4()

        df["a_new_col"] = np.log(df["a"] + 1) / np.log(5)

        return df

    def expected_df_6():
        """Expected output of test_expected_output_6."""

        df = d.create_df_4()

        df["a_new_col"] = np.log(df["a"]) / np.log(7)

        df.drop("a", axis=1, inplace=True)

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=LogTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = LogTransformer(columns=["a", "b"])

        expected_call_args = {0: {"args": (d.create_df_3(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_3(),
        ):

            x.transform(df)

    def test_error_with_non_numeric_columns(self):
        """Test an exception is raised if transform is applied to non-numeric columns."""

        df = d.create_df_5()

        x = LogTransformer(columns=["a", "b", "c"])

        with pytest.raises(
                TypeError,
                match=
                r"The following columns are not numeric in X; \['b', 'c'\]"):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1()) +
        h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_expected_output_1(self, df, expected):
        """Test that transform is giving the expected output when not adding one and dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=False,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_2()) +
        h.index_preserved_params(d.create_df_3(), expected_df_2()),
    )
    def test_expected_output_2(self, df, expected):
        """Test that transform is giving the expected output when adding one and dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=True,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_3()) +
        h.index_preserved_params(d.create_df_3(), expected_df_3()),
    )
    def test_expected_output_3(self, df, expected):
        """Test that transform is giving the expected output when not adding one and not dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=False,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_4()) +
        h.index_preserved_params(d.create_df_3(), expected_df_4()),
    )
    def test_expected_output_4(self, df, expected):
        """Test that transform is giving the expected output when adding one and not dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=True,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_5()) +
        h.index_preserved_params(d.create_df_4(), expected_df_5()),
    )
    def test_expected_output_5(self, df, expected):
        """Test that transform is giving the expected output when adding one and not dropping
        original columns and using base."""

        x1 = LogTransformer(columns=["a"],
                            base=5,
                            add_1=True,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_6()) +
        h.index_preserved_params(d.create_df_4(), expected_df_6()),
    )
    def test_expected_output_6(self, df, expected):
        """Test that transform is giving the expected output when  not adding one and dropping
        original columns and using base."""

        x1 = LogTransformer(columns=["a"],
                            base=7,
                            add_1=False,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform should be using base, not adding 1, and not dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, columns, add_1, extra_exception_text",
        (
            [pd.DataFrame({"a": [1, 2, 0]}), ["a"], False, ""],
            [
                pd.DataFrame({
                    "a": [1, 2, 0],
                    "b": [1, 2, 3]
                }), ["a", "b"], False, ""
            ],
            [
                pd.DataFrame({"a": [1, 2, -1]}), ["a"], True,
                r" \(after adding 1\)"
            ],
            [
                pd.DataFrame({
                    "a": [1, 2, -1],
                    "b": [1, 2, 3]
                }),
                ["a", "b"],
                True,
                r" \(after adding 1\)",
            ],
            [pd.DataFrame({"b": [1, 2, -0.001]}), ["b"], False, ""],
            [
                pd.DataFrame({
                    "b": [1, 2, -0.001],
                    "a": [1, 2, 3]
                }),
                ["a", "b"],
                False,
                "",
            ],
            [
                pd.DataFrame({"b": [1, 2, -1.001]}), ["b"], True,
                r" \(after adding 1\)"
            ],
            [
                pd.DataFrame({
                    "b": [1, 2, -1.001],
                    "a": [1, 2, 3]
                }),
                ["a", "b"],
                True,
                r" \(after adding 1\)",
            ],
        ),
    )
    def test_negative_values_raise_exception(self, df, columns, add_1,
                                             extra_exception_text):
        """Test that an exception is raised if negative values are passed in transform."""

        x = LogTransformer(columns=columns, add_1=add_1, drop=True)

        with pytest.raises(
                ValueError,
                match=
                f"values less than or equal to 0 in columns{extra_exception_text}, make greater than 0 before using transform",
        ):

            x.transform(df)
Esempio n. 6
0
class TestTransform(object):
    """Tests for CappingTransformer.transform()."""

    def expected_df_1():
        """Expected output from test_expected_output_min_and_max."""

        df = pd.DataFrame(
            {
                "a": [2, 2, 3, 4, 5, 5, np.NaN],
                "b": [1, 2, 3, np.NaN, 7, 7, 7],
                "c": [np.NaN, 1, 2, 3, 0, 0, 0],
            }
        )

        return df

    def expected_df_2():
        """Expected output from test_expected_output_max."""

        df = pd.DataFrame(
            {
                "a": [2, 2, 3, 4, 5, 6, 7, np.NaN],
                "b": ["a", "b", "c", "d", "e", "f", "g", np.NaN],
                "c": ["a", "b", "c", "d", "e", "f", "g", np.NaN],
            }
        )

        df["c"] = df["c"].astype("category")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=CappingTransformer.transform, expected_arguments=["self", "X"]
        )

    def test_check_is_fitted_call_count(self, mocker):
        """Test there are 2 calls to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        with h.assert_function_call_count(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", 2
        ):

            x.transform(df)

    def test_check_is_fitted_call_1(self, mocker):
        """Test the first call to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {
            0: {"args": (["capping_values"],), "kwargs": {}},
            1: {"args": (["_replacement_values"],), "kwargs": {}},
        }

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args
        ):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {0: {"args": (d.create_df_3(),), "kwargs": {}}}

        with h.assert_function_call(
            mocker,
            tubular.base.BaseTransformer,
            "transform",
            expected_call_args,
            return_value=d.create_df_3(),
        ):

            x.transform(df)

    def test_learnt_values_not_modified(self):
        """Test that the replacements from fit are not changed in transform."""

        capping_values_dict = {"a": [2, 5], "b": [-1, 8]}

        df = d.create_df_3()

        x = CappingTransformer(capping_values_dict)

        x.transform(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={"capping_values": capping_values_dict},
            msg="Attributes for CappingTransformer set in init",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1())
        + h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_expected_output_min_and_max_combinations(self, df, expected):
        """Test that capping is applied correctly in transform."""

        x = CappingTransformer(
            capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]}
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_2())
        + h.index_preserved_params(d.create_df_4(), expected_df_2()),
    )
    def test_non_cap_column_left_untouched(self, df, expected):
        """Test that capping is applied only to specific columns, others remain the same."""

        x = CappingTransformer(capping_values={"a": [2, 10]})

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed",
        )

    def test_non_numeric_column_error(self):
        """Test that transform will raise an error if a column to transform is not numeric."""

        df = d.create_df_5()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]})

        with pytest.raises(
            TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]"
        ):

            x.transform(df)

    def test_quantile_not_fit_error(self):
        """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        with pytest.raises(
            ValueError,
            match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)

    def test_replacement_values_dict_not_set_error(self):
        """Test that transform will raise an error if _replacement_values is an empty dict."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        # manually set attribute to get past the capping_values attribute is an empty dict exception
        x.capping_values = {"a": [1, 4]}

        with pytest.raises(
            ValueError,
            match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)

    def test_attributes_unchanged_from_transform(self):
        """Test that attributes are unchanged after transform is run."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x.fit(df)

        x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x2.fit(df)

        x2.transform(df)

        assert (
            x.capping_values == x2.capping_values
        ), "capping_values attribute modified in transform"
        assert (
            x._replacement_values == x2._replacement_values
        ), "_replacement_values attribute modified in transform"
        assert (
            x.weights_column == x2.weights_column
        ), "weights_column attribute modified in transform"
        assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"