Python create_df_3の例、tubular.testing.test_data.create_df_3 Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_LogTransformer.py プロジェクト: mylonabusiness28/tubular

    def expected_df_1():
        """Expected output of test_expected_output_1."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"])
        df["b_new_col"] = np.log(df["b"])

        df.drop(columns=["a", "b"], inplace=True)

        return df

コード例 #2

0

ファイルを表示

    def test_check_is_fitted_call_count(self, mocker):
        """Test there are 2 calls to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        with h.assert_function_call_count(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", 2
        ):

            x.transform(df)

コード例 #3

0

ファイルを表示

    def test_quantiles_none_error(self):
        """Test that an exception is raised if quantiles is None when fit is run."""

        with pytest.warns(
            UserWarning,
            match="quantiles not set so no fitting done in CappingTransformer",
        ):

            df = d.create_df_3()

            x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

            x.fit(df)

コード例 #4

0

ファイルを表示

ファイル: test_ScalingTransformer.py プロジェクト: mylonabusiness28/tubular

    def test_return_type(self, scaler, scaler_type_str, columns):
        """Test that transform returns a pd.DataFrame."""

        df = d.create_df_3()

        x = ScalingTransformer(columns=columns,
                               scaler=scaler,
                               scaler_kwargs={"copy": True})

        x.fit(df)

        df_transformed = x.transform(df)

        assert (type(df_transformed) is
                pd.DataFrame), "unexpected output type from transform"

コード例 #5

0

ファイルを表示

ファイル: test_DataFrameMethodTransformer.py プロジェクト: mylonabusiness28/tubular

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = DataFrameMethodTransformer(new_column_name="d",
                                       pd_method_name="sum",
                                       columns=["b", "c"])

        expected_call_args = {0: {"args": (df.copy(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

コード例 #6

0

ファイルを表示

    def test_learnt_values_not_modified(self):
        """Test that the replacements from fit are not changed in transform."""

        capping_values_dict = {"a": [2, 5], "b": [-1, 8]}

        df = d.create_df_3()

        x = CappingTransformer(capping_values_dict)

        x.transform(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={"capping_values": capping_values_dict},
            msg="Attributes for CappingTransformer set in init",
        )

コード例 #7

0

ファイルを表示

    def test_check_is_fitted_call_1(self, mocker):
        """Test the first call to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {
            0: {"args": (["capping_values"],), "kwargs": {}},
            1: {"args": (["_replacement_values"],), "kwargs": {}},
        }

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args
        ):

            x.transform(df)

コード例 #8

0

ファイルを表示

ファイル: test_ModeImputer.py プロジェクト: mylonabusiness28/tubular

    def test_learnt_values_not_modified(self):
        """Test that the impute_values_ from fit are not changed in transform."""

        df = d.create_df_3()

        x = ModeImputer(columns=["a", "b", "c"])

        x.fit(df)

        x2 = ModeImputer(columns=["a", "b", "c"])

        x2.fit_transform(df)

        h.assert_equal_dispatch(
            expected=x.impute_values_,
            actual=x2.impute_values_,
            msg="Impute values not changed in transform",
        )

コード例 #9

0

ファイルを表示

ファイル: test_ModeImputer.py プロジェクト: mylonabusiness28/tubular

    def test_learnt_values(self):
        """Test that the impute values learnt during fit are expected."""

        df = d.create_df_3()

        x = ModeImputer(columns=["a", "b", "c"])

        x.fit(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={
                "impute_values_": {
                    "a": df["a"].mode()[0],
                    "b": df["b"].mode()[0],
                    "c": df["c"].mode()[0],
                }
            },
            msg="impute_values_ attribute",
        )

コード例 #10

0

ファイルを表示

ファイル: test_ModeImputer.py プロジェクト: mylonabusiness28/tubular

class TestTransform(object):
    """Tests for ModeImputer.transform()."""
    def expected_df_1():
        """Expected output for test_nulls_imputed_correctly."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6, np.NaN],
            "b": [1, 2, 3, np.NaN, 7, 8, 9],
            "c": [np.NaN, 1, 2, 3, -4, -5, -6],
        })

        for col in ["a", "b", "c"]:

            df[col].loc[df[col].isnull()] = df[col].mode()[0]

        return df

    def expected_df_2():
        """Expected output for test_nulls_imputed_correctly_2."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6, np.NaN],
            "b": [1, 2, 3, np.NaN, 7, 8, 9],
            "c": [np.NaN, 1, 2, 3, -4, -5, -6],
        })

        for col in ["a"]:

            df[col].loc[df[col].isnull()] = df[col].mode()[0]

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=ModeImputer.transform,
                                  expected_arguments=["self", "X"])

    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_1()

        x = ModeImputer(columns="a")

        x.fit(df)

        expected_call_args = {
            0: {
                "args": (["impute_values_"], ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_1()

        x = ModeImputer(columns="a")

        x.fit(df)

        expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1()) +
        h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_nulls_imputed_correctly(self, df, expected):
        """Test missing values are filled with the correct values."""

        x = ModeImputer(columns=["a", "b", "c"])

        # set the impute values dict directly rather than fitting x on df so test works with helpers
        x.impute_values_ = {"a": 1.0, "b": 1.0, "c": -6.0}

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check nulls filled correctly in transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_2()) +
        h.index_preserved_params(d.create_df_3(), expected_df_2()),
    )
    def test_nulls_imputed_correctly_2(self, df, expected):
        """Test missing values are filled with the correct values - and unrelated columns are not changed."""

        x = ModeImputer(columns=["a"])

        # set the impute values dict directly rather than fitting x on df so test works with helpers
        x.impute_values_ = {"a": 1.0}

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check nulls filled correctly in transform",
        )

    def test_learnt_values_not_modified(self):
        """Test that the impute_values_ from fit are not changed in transform."""

        df = d.create_df_3()

        x = ModeImputer(columns=["a", "b", "c"])

        x.fit(df)

        x2 = ModeImputer(columns=["a", "b", "c"])

        x2.fit_transform(df)

        h.assert_equal_dispatch(
            expected=x.impute_values_,
            actual=x2.impute_values_,
            msg="Impute values not changed in transform",
        )

コード例 #11

0

ファイルを表示

ファイル: test_DataFrameMethodTransformer.py プロジェクト: mylonabusiness28/tubular

class TestTransform(object):
    """Tests for DataFrameMethodTransformer.transform()."""
    def expected_df_1():
        """Expected output of test_expected_output_single_columns_assignment."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6, np.NaN],
            "b": [1, 2, 3, np.NaN, 7, 8, 9],
            "c": [np.NaN, 1, 2, 3, -4, -5, -6],
            "d": [1.0, 3.0, 5.0, 3.0, 3.0, 3.0, 3.0],
        })

        return df

    def expected_df_2():
        """Expected output of test_expected_output_multi_columns_assignment."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6, np.NaN],
            "b": [1, 2, 3, np.NaN, 7, 8, 9],
            "c": [np.NaN, 1, 2, 3, -4, -5, -6],
            "d": [0.5, 1.0, 1.5, np.NaN, 3.5, 4.0, 4.5],
            "e": [np.NaN, 0.5, 1.0, 1.5, -2.0, -2.5, -3.0],
        })

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=DataFrameMethodTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = DataFrameMethodTransformer(new_column_name="d",
                                       pd_method_name="sum",
                                       columns=["b", "c"])

        expected_call_args = {0: {"args": (df.copy(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1()) +
        h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_expected_output_single_columns_assignment(self, df, expected):
        """Test a single column output from transform gives expected results."""

        x = DataFrameMethodTransformer(
            new_column_name="d",
            pd_method_name="sum",
            columns=["b", "c"],
            pd_method_kwargs={"axis": 1},
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="DataFrameMethodTransformer sum columns b and c",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_2()) +
        h.index_preserved_params(d.create_df_3(), expected_df_2()),
    )
    def test_expected_output_multi_columns_assignment(self, df, expected):
        """Test a multiple column output from transform gives expected results."""

        x = DataFrameMethodTransformer(
            new_column_name=["d", "e"],
            pd_method_name="div",
            columns=["b", "c"],
            pd_method_kwargs={"other": 2},
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="DataFrameMethodTransformer divide by 2 columns b and c",
        )

    @pytest.mark.parametrize(
        "df, new_column_name, pd_method_name, columns, pd_method_kwargs",
        [
            (d.create_df_3(), ["d", "e"], "div", ["b", "c"], {
                "other": 2
            }),
            (d.create_df_3(), "d", "sum", ["b", "c"], {
                "axis": 1
            }),
            (d.create_df_3(), ["d", "e"], "cumprod", ["b", "c"], {
                "axis": 1
            }),
            (d.create_df_3(), ["d", "e", "f"], "mod", ["a", "b", "c"], {
                "other": 2
            }),
            (d.create_df_3(), ["d", "e", "f"], "le", ["a", "b", "c"], {
                "other": 0
            }),
            (d.create_df_3(), ["d", "e"], "abs", ["a", "b"], {}),
        ],
    )
    def test_pandas_method_called(self, mocker, df, new_column_name,
                                  pd_method_name, columns, pd_method_kwargs):
        """Test that the pandas method is called as expected (with kwargs passed) during transform."""

        spy = mocker.spy(pd.DataFrame, pd_method_name)

        x = DataFrameMethodTransformer(
            new_column_name=new_column_name,
            pd_method_name=pd_method_name,
            columns=columns,
            pd_method_kwargs=pd_method_kwargs,
        )

        x.transform(df)

        # pull out positional and keyword args to target the call
        call_args = spy.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        # test keyword are as expected
        h.assert_dict_equal_msg(
            actual=call_kwargs,
            expected=pd_method_kwargs,
            msg_tag=f"""Keyword arg assert for {pd_method_name}""",
        )

        # test positional args are as expected
        h.assert_list_tuple_equal_msg(
            actual=call_pos_args,
            expected=(df[columns], ),
            msg_tag=f"""Positional arg assert for {pd_method_name}""",
        )

コード例 #12

0

ファイルを表示

ファイル: test_LogTransformer.py プロジェクト: mylonabusiness28/tubular

class TestTransform(object):
    """Tests for LogTransformer.transform()."""
    def expected_df_1():
        """Expected output of test_expected_output_1."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"])
        df["b_new_col"] = np.log(df["b"])

        df.drop(columns=["a", "b"], inplace=True)

        return df

    def expected_df_2():
        """Expected output of test_expected_output_2."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"] + 1)
        df["b_new_col"] = np.log(df["b"] + 1)

        df.drop(columns=["a", "b"], inplace=True)

        return df

    def expected_df_3():
        """Expected output of test_expected_output_3."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"])
        df["b_new_col"] = np.log(df["b"])

        return df

    def expected_df_4():
        """Expected output of test_expected_output_4."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"] + 1)
        df["b_new_col"] = np.log(df["b"] + 1)

        return df

    def expected_df_5():
        """Expected output of test_expected_output_5."""

        df = d.create_df_4()

        df["a_new_col"] = np.log(df["a"] + 1) / np.log(5)

        return df

    def expected_df_6():
        """Expected output of test_expected_output_6."""

        df = d.create_df_4()

        df["a_new_col"] = np.log(df["a"]) / np.log(7)

        df.drop("a", axis=1, inplace=True)

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=LogTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = LogTransformer(columns=["a", "b"])

        expected_call_args = {0: {"args": (d.create_df_3(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_3(),
        ):

            x.transform(df)

    def test_error_with_non_numeric_columns(self):
        """Test an exception is raised if transform is applied to non-numeric columns."""

        df = d.create_df_5()

        x = LogTransformer(columns=["a", "b", "c"])

        with pytest.raises(
                TypeError,
                match=
                r"The following columns are not numeric in X; \['b', 'c'\]"):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1()) +
        h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_expected_output_1(self, df, expected):
        """Test that transform is giving the expected output when not adding one and dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=False,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_2()) +
        h.index_preserved_params(d.create_df_3(), expected_df_2()),
    )
    def test_expected_output_2(self, df, expected):
        """Test that transform is giving the expected output when adding one and dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=True,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_3()) +
        h.index_preserved_params(d.create_df_3(), expected_df_3()),
    )
    def test_expected_output_3(self, df, expected):
        """Test that transform is giving the expected output when not adding one and not dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=False,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_4()) +
        h.index_preserved_params(d.create_df_3(), expected_df_4()),
    )
    def test_expected_output_4(self, df, expected):
        """Test that transform is giving the expected output when adding one and not dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=True,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_5()) +
        h.index_preserved_params(d.create_df_4(), expected_df_5()),
    )
    def test_expected_output_5(self, df, expected):
        """Test that transform is giving the expected output when adding one and not dropping
        original columns and using base."""

        x1 = LogTransformer(columns=["a"],
                            base=5,
                            add_1=True,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_6()) +
        h.index_preserved_params(d.create_df_4(), expected_df_6()),
    )
    def test_expected_output_6(self, df, expected):
        """Test that transform is giving the expected output when  not adding one and dropping
        original columns and using base."""

        x1 = LogTransformer(columns=["a"],
                            base=7,
                            add_1=False,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform should be using base, not adding 1, and not dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, columns, add_1, extra_exception_text",
        (
            [pd.DataFrame({"a": [1, 2, 0]}), ["a"], False, ""],
            [
                pd.DataFrame({
                    "a": [1, 2, 0],
                    "b": [1, 2, 3]
                }), ["a", "b"], False, ""
            ],
            [
                pd.DataFrame({"a": [1, 2, -1]}), ["a"], True,
                r" \(after adding 1\)"
            ],
            [
                pd.DataFrame({
                    "a": [1, 2, -1],
                    "b": [1, 2, 3]
                }),
                ["a", "b"],
                True,
                r" \(after adding 1\)",
            ],
            [pd.DataFrame({"b": [1, 2, -0.001]}), ["b"], False, ""],
            [
                pd.DataFrame({
                    "b": [1, 2, -0.001],
                    "a": [1, 2, 3]
                }),
                ["a", "b"],
                False,
                "",
            ],
            [
                pd.DataFrame({"b": [1, 2, -1.001]}), ["b"], True,
                r" \(after adding 1\)"
            ],
            [
                pd.DataFrame({
                    "b": [1, 2, -1.001],
                    "a": [1, 2, 3]
                }),
                ["a", "b"],
                True,
                r" \(after adding 1\)",
            ],
        ),
    )
    def test_negative_values_raise_exception(self, df, columns, add_1,
                                             extra_exception_text):
        """Test that an exception is raised if negative values are passed in transform."""

        x = LogTransformer(columns=columns, add_1=add_1, drop=True)

        with pytest.raises(
                ValueError,
                match=
                f"values less than or equal to 0 in columns{extra_exception_text}, make greater than 0 before using transform",
        ):

            x.transform(df)

コード例 #13

0

ファイルを表示

class TestTransform(object):
    """Tests for CappingTransformer.transform()."""

    def expected_df_1():
        """Expected output from test_expected_output_min_and_max."""

        df = pd.DataFrame(
            {
                "a": [2, 2, 3, 4, 5, 5, np.NaN],
                "b": [1, 2, 3, np.NaN, 7, 7, 7],
                "c": [np.NaN, 1, 2, 3, 0, 0, 0],
            }
        )

        return df

    def expected_df_2():
        """Expected output from test_expected_output_max."""

        df = pd.DataFrame(
            {
                "a": [2, 2, 3, 4, 5, 6, 7, np.NaN],
                "b": ["a", "b", "c", "d", "e", "f", "g", np.NaN],
                "c": ["a", "b", "c", "d", "e", "f", "g", np.NaN],
            }
        )

        df["c"] = df["c"].astype("category")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=CappingTransformer.transform, expected_arguments=["self", "X"]
        )

    def test_check_is_fitted_call_count(self, mocker):
        """Test there are 2 calls to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        with h.assert_function_call_count(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", 2
        ):

            x.transform(df)

    def test_check_is_fitted_call_1(self, mocker):
        """Test the first call to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {
            0: {"args": (["capping_values"],), "kwargs": {}},
            1: {"args": (["_replacement_values"],), "kwargs": {}},
        }

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args
        ):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {0: {"args": (d.create_df_3(),), "kwargs": {}}}

        with h.assert_function_call(
            mocker,
            tubular.base.BaseTransformer,
            "transform",
            expected_call_args,
            return_value=d.create_df_3(),
        ):

            x.transform(df)

    def test_learnt_values_not_modified(self):
        """Test that the replacements from fit are not changed in transform."""

        capping_values_dict = {"a": [2, 5], "b": [-1, 8]}

        df = d.create_df_3()

        x = CappingTransformer(capping_values_dict)

        x.transform(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={"capping_values": capping_values_dict},
            msg="Attributes for CappingTransformer set in init",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1())
        + h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_expected_output_min_and_max_combinations(self, df, expected):
        """Test that capping is applied correctly in transform."""

        x = CappingTransformer(
            capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]}
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_2())
        + h.index_preserved_params(d.create_df_4(), expected_df_2()),
    )
    def test_non_cap_column_left_untouched(self, df, expected):
        """Test that capping is applied only to specific columns, others remain the same."""

        x = CappingTransformer(capping_values={"a": [2, 10]})

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed",
        )

    def test_non_numeric_column_error(self):
        """Test that transform will raise an error if a column to transform is not numeric."""

        df = d.create_df_5()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]})

        with pytest.raises(
            TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]"
        ):

            x.transform(df)

    def test_quantile_not_fit_error(self):
        """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        with pytest.raises(
            ValueError,
            match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)

    def test_replacement_values_dict_not_set_error(self):
        """Test that transform will raise an error if _replacement_values is an empty dict."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        # manually set attribute to get past the capping_values attribute is an empty dict exception
        x.capping_values = {"a": [1, 4]}

        with pytest.raises(
            ValueError,
            match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)

    def test_attributes_unchanged_from_transform(self):
        """Test that attributes are unchanged after transform is run."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x.fit(df)

        x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x2.fit(df)

        x2.transform(df)

        assert (
            x.capping_values == x2.capping_values
        ), "capping_values attribute modified in transform"
        assert (
            x._replacement_values == x2._replacement_values
        ), "_replacement_values attribute modified in transform"
        assert (
            x.weights_column == x2.weights_column
        ), "weights_column attribute modified in transform"
        assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"