class TestTransform(object):
    """Tests for NullIndicator.transform()"""
    def expected_df_1():
        """Expected output for test_null_indicator_columns_correct."""

        df = pd.DataFrame({
            "a": [1, 2, np.nan, 4, np.nan, 6],
            "b": [np.nan, 5, 4, 3, 2, 1],
            "c": [3, 2, 1, 4, 5, 6],
            "b_nulls": [1, 0, 0, 0, 0, 0],
            "c_nulls": [0, 0, 0, 0, 0, 0],
        })

        df[["b_nulls", "c_nulls"]] = df[["b_nulls", "c_nulls"]].astype("int32")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=NullIndicator.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_1()

        x = NullIndicator(columns="a")

        expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_9(), expected_df_1()) +
        h.index_preserved_params(d.create_df_9(), expected_df_1()),
    )
    def test_null_indicator_columns_correct(self, df, expected):
        """Test that the created indicator column is correct - and unrelated columns are unchanged"""

        x = NullIndicator(columns=["b", "c"])

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check null indicator columns created correctly in transform.",
        )
def test__check_dfs_passed_call():
    """Test the call to _check_dfs_passed."""

    df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[7, 8, 9])
    df2 = pd.DataFrame({"a": [2, 3, 4], "b": [5, 6, 7]}, index=[7, 8, 9])

    with mock.patch.object(tubular.testing.helpers,
                           "_check_dfs_passed") as mocked:

        h.row_by_row_params(df1, df2)

    assert mocked.call_count == 1, "unexpected number of calls to _check_dfs_passed"

    call_args = mocked.call_args_list[0]

    assert call_args[1] == {}, "unexpected kwargs in _check_dfs_passed call"

    assert call_args[0] == (
        df1,
        df2,
    ), "unexpected positional args in _check_dfs_passed call"
class TestTransform:
    """Tests for the SetValueTransformer.transform method."""
    def expected_df_1():
        """Expected output of test_value_set_in_transform."""

        df = d.create_df_2()

        df["a"] = "a"
        df["b"] = "a"

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=SetValueTransformer.transform,
            expected_arguments=["self", "X"],
            expected_default_values=None,
        )

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_7()

        x = SetValueTransformer(columns=["a", "b"], value=1)

        expected_call_args = {0: {"args": (d.create_df_7(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_2(), expected_df_1()) +
        h.index_preserved_params(d.create_df_2(), expected_df_1()),
    )
    def test_value_set_in_transform(self, df, expected):
        """Test that transform sets the value as expected."""

        x = SetValueTransformer(columns=["a", "b"], value="a")

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            actual=df_transformed,
            expected=expected,
            msg="incorrect value after SetValueTransformer transform",
        )
def test_returned_object():
    """Test the function returns the expected output."""

    df1_1 = pd.DataFrame({"a": [1], "b": [4]}, index=[7])
    df1_2 = pd.DataFrame({"a": [2], "b": [5]}, index=[8])
    df1_3 = pd.DataFrame({"a": [3], "b": [6]}, index=[9])

    df2_1 = pd.DataFrame({"c": [10], "d": [13]}, index=[7])
    df2_2 = pd.DataFrame({"c": [11], "d": [14]}, index=[8])
    df2_3 = pd.DataFrame({"c": [12], "d": [15]}, index=[9])

    df1 = pd.concat([df1_1, df1_2, df1_3], axis=0)
    df2 = pd.concat([df2_1, df2_2, df2_3], axis=0)

    expected_df_pairs = [(df1_1, df2_1), (df1_2, df2_2), (df1_3, df2_3),
                         (df1, df2)]

    expected_ids = ["index 7", "index 8", "index 9", "all rows (3)"]

    results = h.row_by_row_params(df1, df2)

    assert (type(results) is
            list), "unexpected type for object returned from row_by_row_params"
    assert len(results) == len(
        expected_df_pairs
    ), "unexpected len of object returned from row_by_row_params"

    for i in range(len(expected_df_pairs)):

        assert (type(results[i]) is ParameterSet
                ), f"unexpected type for {i}th item in returned list"

        h.assert_equal_dispatch(
            results[i].values,
            expected_df_pairs[i],
            f"unexpected values for {i}th item in returned list",
        )

        assert (results[i].marks == ()
                ), f"unexpected marks for {i}th item in returned list"
        assert (results[i].id == expected_ids[i]
                ), f"unexpected id for {i}th item in returned list"
Example #5
0
class TestTransform(object):
    """Tests for DateDiffLeapYearTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output_drop_cols_true."""

        df = pd.DataFrame({"c": [
            26,
            19,
            0,
            0,
            0,
            -2,
            -3,
            30,
        ]})

        return df

    def expected_df_2():
        """Expected output for test_expected_output_drop_cols_false."""

        df = pd.DataFrame({
            "a": [
                datetime.date(1993, 9, 27),  # day/month greater than
                datetime.date(2000, 3, 19),  # day/month less than
                datetime.date(2018, 11, 10),  # same day
                datetime.date(2018, 10,
                              10),  # same year day/month greater than
                datetime.date(2018, 10, 10),  # same year day/month less than
                datetime.date(2018, 10, 10),  # negative day/month less than
                datetime.date(2018, 12, 10),  # negative day/month greater than
                datetime.date(
                    1985, 7, 23
                ),  # large gap, this is incorrect with timedelta64 solutions
            ],
            "b": [
                datetime.date(2020, 5, 1),
                datetime.date(2019, 12, 25),
                datetime.date(2018, 11, 10),
                datetime.date(2018, 11, 10),
                datetime.date(2018, 9, 10),
                datetime.date(2015, 11, 10),
                datetime.date(2015, 11, 10),
                datetime.date(2015, 7, 23),
            ],
            "c": [
                26,
                19,
                0,
                0,
                0,
                -2,
                -3,
                30,
            ],
        })

        return df

    def expected_df_3():
        """Expected output for test_expected_output_nulls."""

        df = pd.DataFrame({
            "a": [
                np.NaN,
            ],
            "b": [
                np.NaN,
            ],
            "c": [None],
        })

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=DateDiffLeapYearTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_date_test_df()

        x = DateDiffLeapYearTransformer(column_lower="a",
                                        column_upper="b",
                                        new_column_name="c",
                                        drop_cols=True)

        expected_call_args = {
            0: {
                "args": (d.create_date_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_date_test_df(),
        ):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_date_test_df(), expected_df_1()) +
        h.index_preserved_params(d.create_date_test_df(), expected_df_1()),
    )
    def test_expected_output_drop_cols_true(self, df, expected):
        """Test that the output is expected from transform, when drop_cols is True.

        This tests positive year gaps, negative year gaps, and missing values.

        """

        x = DateDiffLeapYearTransformer(column_lower="a",
                                        column_upper="b",
                                        new_column_name="c",
                                        drop_cols=True)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in DateDiffLeapYearTransformer.transform (with drop_cols)",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_date_test_df(), expected_df_2()) +
        h.index_preserved_params(d.create_date_test_df(), expected_df_2()),
    )
    def test_expected_output_drop_cols_false(self, df, expected):
        """Test that the output is expected from transform, when drop_cols is False.

        This tests positive year gaps , negative year gaps, and missing values.

        """

        x = DateDiffLeapYearTransformer(column_lower="a",
                                        column_upper="b",
                                        new_column_name="c",
                                        drop_cols=False)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in DateDiffLeapYearTransformer.transform (without drop_cols)",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_date_test_nulls_df(), expected_df_3()) +
        h.index_preserved_params(d.create_date_test_nulls_df(),
                                 expected_df_3()),
    )
    def test_expected_output_nulls(self, df, expected):
        """Test that the output is expected from transform, when columns are nulls."""

        x = DateDiffLeapYearTransformer(column_lower="a",
                                        column_upper="b",
                                        new_column_name="c",
                                        drop_cols=False)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in DateDiffLeapYearTransformer.transform (nulls)",
        )
class TestTransform(object):
    """Tests for BetweenDatesTransformer.transform"""
    def expected_df_1():
        """Expected output from transform in test_output."""

        df = d.create_is_between_dates_df_1()

        df["d"] = [True, False]

        return df

    def expected_df_2():
        """Expected output from transform in test_output_both_exclusive."""

        df = d.create_is_between_dates_df_2()

        df["e"] = [False, False, True, True, False, False]

        return df

    def expected_df_3():
        """Expected output from transform in test_output_lower_exclusive."""

        df = d.create_is_between_dates_df_2()

        df["e"] = [False, False, True, True, True, False]

        return df

    def expected_df_4():
        """Expected output from transform in test_output_upper_exclusive."""

        df = d.create_is_between_dates_df_2()

        df["e"] = [False, True, True, True, False, False]

        return df

    def expected_df_5():
        """Expected output from transform in test_output_both_inclusive."""

        df = d.create_is_between_dates_df_2()

        df["e"] = [False, True, True, True, True, False]

        return df

    def test_arguments(self):
        """Test that fit has expected arguments."""

        h.test_function_arguments(
            func=BetweenDatesTransformer.transform,
            expected_arguments=["self", "X"],
            expected_default_values=None,
        )

    def test_super_transform_call(self, mocker):
        """Test that call the BaseTransformer.transform() is as expected."""

        df = d.create_is_between_dates_df_1()

        x = BetweenDatesTransformer(column_lower="a",
                                    column_between="b",
                                    column_upper="c",
                                    new_column_name="d")

        expected_call_args = {
            0: {
                "args": (d.create_is_between_dates_df_1(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_is_between_dates_df_1(),
        ):

            x.transform(df)

    def test_cols_not_datetime(self):
        """Test that an exception is raised if cols not datetime."""

        df = pd.DataFrame({
            "a": [2, 1],
            "b":
            pd.date_range(start="1/3/2016", end="27/09/2017", periods=2),
            "c":
            pd.date_range(start="1/2/2016", end="27/04/2017", periods=2),
        })

        x = BetweenDatesTransformer(column_lower="a",
                                    column_between="b",
                                    column_upper="c",
                                    new_column_name="d")

        with pytest.raises(
                TypeError,
                match=r"a should be datetime64\[ns\] type but got int64"):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_is_between_dates_df_1(), expected_df_1())
        + h.index_preserved_params(d.create_is_between_dates_df_1(),
                                   expected_df_1()),
    )
    def test_output(self, df, expected):
        """Test the output of transform is as expected."""

        x = BetweenDatesTransformer(
            column_lower="a",
            column_between="b",
            column_upper="c",
            new_column_name="d",
            lower_inclusive=False,
            upper_inclusive=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="BetweenDatesTransformer.transform results not as expected",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_is_between_dates_df_2(), expected_df_2())
        + h.index_preserved_params(d.create_is_between_dates_df_2(),
                                   expected_df_2()),
    )
    def test_output_both_exclusive(self, df, expected):
        """Test the output of transform is as expected if both limits are exclusive."""

        x = BetweenDatesTransformer(
            column_lower="a",
            column_between="b",
            column_upper="c",
            new_column_name="e",
            lower_inclusive=False,
            upper_inclusive=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="BetweenDatesTransformer.transform results not as expected",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_is_between_dates_df_2(), expected_df_3())
        + h.index_preserved_params(d.create_is_between_dates_df_2(),
                                   expected_df_3()),
    )
    def test_output_lower_exclusive(self, df, expected):
        """Test the output of transform is as expected if the lower limits are exclusive only."""

        x = BetweenDatesTransformer(
            column_lower="a",
            column_between="b",
            column_upper="c",
            new_column_name="e",
            lower_inclusive=False,
            upper_inclusive=True,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="BetweenDatesTransformer.transform results not as expected",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_is_between_dates_df_2(), expected_df_4())
        + h.index_preserved_params(d.create_is_between_dates_df_2(),
                                   expected_df_4()),
    )
    def test_output_upper_exclusive(self, df, expected):
        """Test the output of transform is as expected if the upper limits are exclusive only."""

        x = BetweenDatesTransformer(
            column_lower="a",
            column_between="b",
            column_upper="c",
            new_column_name="e",
            lower_inclusive=True,
            upper_inclusive=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="BetweenDatesTransformer.transform results not as expected",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_is_between_dates_df_2(), expected_df_5())
        + h.index_preserved_params(d.create_is_between_dates_df_2(),
                                   expected_df_5()),
    )
    def test_output_both_inclusive(self, df, expected):
        """Test the output of transform is as expected if the both limits are inclusive."""

        x = BetweenDatesTransformer(
            column_lower="a",
            column_between="b",
            column_upper="c",
            new_column_name="e",
            lower_inclusive=True,
            upper_inclusive=True,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="BetweenDatesTransformer.transform results not as expected",
        )

    def test_warning_message(self):
        """Test a warning is generated if not all the values in column_upper are greater than or equal to column_lower."""

        x = BetweenDatesTransformer(
            column_lower="a",
            column_between="b",
            column_upper="c",
            new_column_name="e",
            lower_inclusive=True,
            upper_inclusive=True,
        )

        df = d.create_is_between_dates_df_2()

        df["c"][0] = datetime.datetime(1989, 3, 1)

        with pytest.warns(Warning,
                          match="not all c are greater than or equal to a"):

            x.transform(df)
Example #7
0
class TestTransform(object):
    """Tests for SeriesStrMethodTransformer.transform()."""
    def expected_df_1():
        """Expected output of test_expected_output_no_overwrite."""

        df = d.create_df_7()

        df["b_new"] = df["b"].str.find(sub="a")

        return df

    def expected_df_2():
        """Expected output of test_expected_output_overwrite."""

        df = d.create_df_7()

        df["b"] = df["b"].str.pad(width=10)

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=SeriesStrMethodTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_7()

        x = SeriesStrMethodTransformer(new_column_name="cc",
                                       pd_method_name="find",
                                       columns=["c"])

        expected_call_args = {0: {"args": (d.create_df_7(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_7(), expected_df_1()) +
        h.index_preserved_params(d.create_df_7(), expected_df_1()),
    )
    def test_expected_output_no_overwrite(self, df, expected):
        """Test a single column output from transform gives expected results, when not overwriting the original column."""

        x = SeriesStrMethodTransformer(
            new_column_name="b_new",
            pd_method_name="find",
            columns=["b"],
            pd_method_kwargs={"sub": "a"},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesStrMethodTransformer.transform with find, not overwriting original column",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_7(), expected_df_2()) +
        h.index_preserved_params(d.create_df_7(), expected_df_2()),
    )
    def test_expected_output_overwrite(self, df, expected):
        """Test a single column output from transform gives expected results, when overwriting the original column."""

        x = SeriesStrMethodTransformer(
            new_column_name="b",
            pd_method_name="pad",
            columns=["b"],
            pd_method_kwargs={"width": 10},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesStrMethodTransformer.transform with pad, overwriting original column",
        )

    @pytest.mark.parametrize(
        "df, new_column_name, pd_method_name, columns, pd_method_kwargs",
        [
            (d.create_df_7(), "b_new", "find", ["b"], {
                "sub": "a"
            }),
            (
                d.create_df_7(),
                "c_slice",
                "slice",
                ["c"],
                {
                    "start": 0,
                    "stop": 1,
                    "step": 1
                },
            ),
            (d.create_df_7(), "b_upper", "upper", ["b"], {}),
        ],
    )
    def test_pandas_method_called(self, mocker, df, new_column_name,
                                  pd_method_name, columns, pd_method_kwargs):
        """Test that the pandas.Series.str method is called as expected (with kwargs passed) during transform."""

        spy = mocker.spy(pd.Series.str, pd_method_name)

        x = SeriesStrMethodTransformer(
            new_column_name=new_column_name,
            pd_method_name=pd_method_name,
            columns=columns,
            pd_method_kwargs=pd_method_kwargs,
        )

        x.transform(df)

        # pull out positional and keyword args to target the call
        call_args = spy.call_args_list[0]
        call_kwargs = call_args[1]

        # test keyword are as expected
        h.assert_dict_equal_msg(
            actual=call_kwargs,
            expected=pd_method_kwargs,
            msg_tag=f"""Keyword arg assert for {pd_method_name}""",
        )

    def test_attributes_unchanged_by_transform(self):
        """Test that attributes set in init are unchanged by the transform method."""

        df = d.create_df_7()

        x = SeriesStrMethodTransformer(
            new_column_name="b",
            pd_method_name="pad",
            columns=["b"],
            pd_method_kwargs={"width": 10},
        )

        x2 = SeriesStrMethodTransformer(
            new_column_name="b",
            pd_method_name="pad",
            columns=["b"],
            pd_method_kwargs={"width": 10},
        )

        x.transform(df)

        assert (
            x.new_column_name == x2.new_column_name
        ), "new_column_name changed by SeriesDtMethodTransformer.transform"
        assert (
            x.pd_method_name == x2.pd_method_name
        ), "pd_method_name changed by SeriesDtMethodTransformer.transform"
        assert (x.columns == x2.columns
                ), "columns changed by SeriesDtMethodTransformer.transform"
        assert (
            x.pd_method_kwargs == x2.pd_method_kwargs
        ), "pd_method_kwargs changed by SeriesDtMethodTransformer.transform"
class TestTransform(object):
    """Tests for SeriesDtMethodTransformer.transform()."""
    def expected_df_1():
        """Expected output of test_expected_output_no_overwrite."""

        df = d.create_datediff_test_df()

        df["a_year"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985]

        return df

    def expected_df_2():
        """Expected output of test_expected_output_overwrite."""

        df = d.create_datediff_test_df()

        df["a"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985]

        return df

    def expected_df_3():
        """Expected output of test_expected_output_callable."""

        df = d.create_datediff_test_df()

        df["b_new"] = df["b"].dt.to_period("M")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=SeriesDtMethodTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_datediff_test_df()

        x = SeriesDtMethodTransformer(new_column_name="a2",
                                      pd_method_name="year",
                                      column="a")

        expected_call_args = {
            0: {
                "args": (d.create_datediff_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_1()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_1()),
    )
    def test_expected_output_no_overwrite(self, df, expected):
        """Test a single column output from transform gives expected results, when not overwriting the original column."""

        x = SeriesDtMethodTransformer(
            new_column_name="a_year",
            pd_method_name="year",
            column="a",
            pd_method_kwargs={},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesDtMethodTransformer.transform with find, not overwriting original column",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_2()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_2()),
    )
    def test_expected_output_overwrite(self, df, expected):
        """Test a single column output from transform gives expected results, when overwriting the original column."""

        x = SeriesDtMethodTransformer(
            new_column_name="a",
            pd_method_name="year",
            column="a",
            pd_method_kwargs={},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesDtMethodTransformer.transform with pad, overwriting original column",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_3()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_3()),
    )
    def test_expected_output_callable(self, df, expected):
        """Test transform gives expected results, when pd_method_name is a callable."""

        x = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesDtMethodTransformer.transform with to_period",
        )

    def test_attributes_unchanged_by_transform(self):
        """Test that attributes set in init are unchanged by the transform method."""

        df = d.create_datediff_test_df()

        x = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        x2 = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        x.transform(df)

        assert (
            x.new_column_name == x2.new_column_name
        ), "new_column_name changed by SeriesDtMethodTransformer.transform"
        assert (
            x.pd_method_name == x2.pd_method_name
        ), "pd_method_name changed by SeriesDtMethodTransformer.transform"
        assert (x.columns == x2.columns
                ), "columns changed by SeriesDtMethodTransformer.transform"
        assert (
            x.pd_method_kwargs == x2.pd_method_kwargs
        ), "pd_method_kwargs changed by SeriesDtMethodTransformer.transform"
class TestTransform(object):
    """Tests for DateDifferenceTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output_units_Y."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "Y": [
                26.59340677135105,
                19.76757257798535,
                0.0,
                0.08487511721664373,
                -0.08236536912690427,
                -2.915756882984136,
                -3.082769210410435,
                29.999247075573077,
            ],
        })
        return df

    def expected_df_2():
        """Expected output for test_expected_output_units_M."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "M": [
                319.12088125621256,
                237.21087093582423,
                0.0,
                1.0185014065997249,
                -0.9883844295228512,
                -34.989082595809634,
                -36.993230524925224,
                359.9909649068769,
            ],
        })
        return df

    def expected_df_3():
        """Expected output for test_expected_output_units_D."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "D": [
                9713.042372685186,
                7219.957627314815,
                0.0,
                31.0,
                -30.083333333333332,
                -1064.9583333333333,
                -1125.9583333333333,
                10957.0,
            ],
        })
        return df

    def expected_df_4():
        """Expected output for test_expected_output_units_h."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "h": [
                233113.01694444445,
                173278.98305555555,
                0.0,
                744.0,
                -722.0,
                -25559.0,
                -27023.0,
                262968.0,
            ],
        })
        return df

    def expected_df_5():
        """Expected output for test_expected_output_units_m."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "m": [
                13986781.016666668,
                10396738.983333332,
                0.0,
                44640.0,
                -43320.0,
                -1533540.0,
                -1621380.0,
                15778080.0,
            ],
        })
        return df

    def expected_df_6():
        """Expected output for test_expected_output_units_s."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "s": [
                839206861.0,
                623804339.0,
                0.0,
                2678400.0,
                -2599200.0,
                -92012400.0,
                -97282800.0,
                946684800.0,
            ],
        })
        return df

    def expected_df_7():
        """Expected output for test_expected_output_nulls."""

        df = pd.DataFrame(
            {
                "a": [
                    datetime.datetime(1993, 9, 27, 11, 58, 58),
                    np.NaN,
                ],
                "b": [
                    np.NaN,
                    datetime.datetime(2019, 12, 25, 11, 58, 58),
                ],
                "Y": [
                    np.NaN,
                    np.NaN,
                ],
            },
            index=[0, 1],
        )

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=DateDifferenceTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_datediff_test_df()

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="Y",
            units="Y",
            copy=True,
            verbose=False,
        )

        expected_call_args = {
            0: {
                "args": (d.create_datediff_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_datediff_test_df(),
        ):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_1()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_1()),
    )
    def test_expected_output_units_Y(self, df, expected):
        """Test that the output is expected from transform, when units is Y.

        This tests positive year gaps and negative year gaps.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="Y",
            units="Y",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_2()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_2()),
    )
    def test_expected_output_units_M(self, df, expected):
        """Test that the output is expected from transform, when units is M.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="M",
            units="M",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_3()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_3()),
    )
    def test_expected_output_units_D(self, df, expected):
        """Test that the output is expected from transform, when units is D.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="D",
            units="D",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_4()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_4()),
    )
    def test_expected_output_units_h(self, df, expected):
        """Test that the output is expected from transform, when units is h.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="h",
            units="h",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_5()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_5()),
    )
    def test_expected_output_units_m(self, df, expected):
        """Test that the output is expected from transform, when units is m.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="m",
            units="m",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_6()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_6()),
    )
    def test_expected_output_units_s(self, df, expected):
        """Test that the output is expected from transform, when units is s.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="s",
            units="s",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_nulls_df(), expected_df_7())
        + h.index_preserved_params(d.create_datediff_test_nulls_df(),
                                   expected_df_7()),
    )
    def test_expected_output_nulls(self, df, expected):
        """Test that the output is expected from transform, when columns are nulls."""

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="Y",
            units="Y",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in DateDifferenceTransformer.transform (nulls)",
        )
class TestTransform(object):
    """Tests for the transform method on MappingTransformer."""
    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=BaseMappingTransformer.transform,
            expected_arguments=["self", "X"],
            expected_default_values=None,
        )

    def test_check_is_fitted_call(self, mocker):
        """Test the call to check_is_fitted."""

        df = d.create_df_1()

        mapping = {
            "a": {
                1: "a",
                2: "b",
                3: "c",
                4: "d",
                5: "e",
                6: "f"
            },
            "b": {
                "a": 1,
                "b": 2,
                "c": 3,
                "d": 4,
                "e": 5,
                "f": 6
            },
        }

        x = BaseMappingTransformer(mappings=mapping)

        expected_call_args = {0: {"args": (["mappings"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_call(self, mocker):
        """Test the call to BaseTransformer.transform."""

        df = d.create_df_1()

        mapping = {
            "a": {
                1: "a",
                2: "b",
                3: "c",
                4: "d",
                5: "e",
                6: "f"
            },
            "b": {
                "a": 1,
                "b": 2,
                "c": 3,
                "d": 4,
                "e": 5,
                "f": 6
            },
        }

        x = BaseMappingTransformer(mappings=mapping)

        expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), d.create_df_1()) +
        h.index_preserved_params(d.create_df_1(), d.create_df_1()),
    )
    def test_X_returned(self, df, expected):
        """Test that X is returned from transform."""

        mapping = {
            "a": {
                1: "a",
                2: "b",
                3: "c",
                4: "d",
                5: "e",
                6: "f"
            },
            "b": {
                "a": 1,
                "b": 2,
                "c": 3,
                "d": 4,
                "e": 5,
                "f": 6
            },
        }

        x = BaseMappingTransformer(mappings=mapping)

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check X returned from transform",
        )

    def test_mappings_unchanged(self):
        """Test that mappings is unchanged in transform."""

        df = d.create_df_1()

        mapping = {
            "a": {
                1: "a",
                2: "b",
                3: "c",
                4: "d",
                5: "e",
                6: "f"
            },
            "b": {
                "a": 1,
                "b": 2,
                "c": 3,
                "d": 4,
                "e": 5,
                "f": 6
            },
        }

        x = BaseMappingTransformer(mappings=mapping)

        x.transform(df)

        h.assert_equal_dispatch(
            expected=mapping,
            actual=x.mappings,
            msg=
            "BaseMappingTransformer.transform has changed self.mappings unexpectedly",
        )
Example #11
0
class TestTransform(object):
    """Tests for ToDatetimeTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output."""

        df = pd.DataFrame({
            "a": [1950, 1960, 2000, 2001, np.NaN, 2010],
            "b": [1, 2, 3, 4, 5, np.NaN],
            "a_Y": [
                datetime.datetime(1950, 1, 1),
                datetime.datetime(1960, 1, 1),
                datetime.datetime(2000, 1, 1),
                datetime.datetime(2001, 1, 1),
                pd.NaT,
                datetime.datetime(2010, 1, 1),
            ],
            "b_m": [
                datetime.datetime(1900, 1, 1),
                datetime.datetime(1900, 2, 1),
                datetime.datetime(1900, 3, 1),
                datetime.datetime(1900, 4, 1),
                datetime.datetime(1900, 5, 1),
                pd.NaT,
            ],
        })

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=ToDatetimeTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_call(self, mocker):
        """Test the call to BaseTransformer.transform is as expected."""

        df = d.create_datediff_test_df()

        to_dt = ToDatetimeTransformer(column="a", new_column_name="Y")

        expected_call_args = {
            0: {
                "args": (d.create_datediff_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_datediff_test_df(),
        ):

            to_dt.transform(df)

    def test_to_datetime_call(self, mocker):
        """Test the call to pd.to_datetime is as expected."""

        df = d.create_to_datetime_test_df()

        to_dt = ToDatetimeTransformer(column="a",
                                      new_column_name="a_Y",
                                      to_datetime_kwargs={"format": "%Y"})

        expected_call_args = {
            0: {
                "args": (d.create_to_datetime_test_df()["a"], ),
                "kwargs": {
                    "format": "%Y"
                },
            }
        }

        with h.assert_function_call(
                mocker,
                pandas,
                "to_datetime",
                expected_call_args,
                return_value=pd.to_datetime(
                    d.create_to_datetime_test_df()["a"]),
        ):

            to_dt.transform(df)

    def test_output_from_to_datetime_assigned_to_column(self, mocker):
        """Test that the output from pd.to_datetime is assigned to column with name new_column_name."""

        df = d.create_to_datetime_test_df()

        to_dt = ToDatetimeTransformer(column="a",
                                      new_column_name="a_new",
                                      to_datetime_kwargs={"format": "%Y"})

        to_datetime_output = [1, 2, 3, 4, 5, 6]

        mocker.patch("pandas.to_datetime", return_value=to_datetime_output)

        df_transformed = to_dt.transform(df)

        assert (df_transformed["a_new"].tolist() == to_datetime_output
                ), "unexpected values assigned to a_new column"

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_to_datetime_test_df(), expected_df_1()) +
        h.index_preserved_params(d.create_to_datetime_test_df(),
                                 expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test input data is transformed as expected."""

        to_dt_1 = ToDatetimeTransformer(column="a",
                                        new_column_name="a_Y",
                                        to_datetime_kwargs={"format": "%Y"})

        to_dt_2 = ToDatetimeTransformer(column="b",
                                        new_column_name="b_m",
                                        to_datetime_kwargs={"format": "%m"})

        df_transformed = to_dt_1.transform(df)
        df_transformed = to_dt_2.transform(df_transformed)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="ToDatetimeTransformer.transform output",
        )
Example #12
0
class TestTransform(object):
    """Tests for GroupRareLevelsTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output_no_weight."""

        df = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, np.NaN]})

        df["b"] = pd.Series([
            "a", "a", "a", "rare", "rare", "rare", "rare", np.NaN, np.NaN,
            np.NaN
        ])

        df["c"] = pd.Series(
            ["a", "a", "c", "c", "e", "e", "rare", "rare", "rare", "rare"],
            dtype=pd.CategoricalDtype(
                categories=["a", "c", "e", "f", "g", "h", "rare"],
                ordered=False),
        )

        return df

    def expected_df_2():
        """Expected output for test_expected_output_weight."""

        df = pd.DataFrame({
            "a": [2, 2, 2, 2, np.NaN, 2, 2, 2, 3, 3],
            "b": ["a", "a", "a", "d", "e", "f", "g", np.NaN, np.NaN, np.NaN],
            "c": ["a", "b", "c", "d", "f", "f", "f", "g", "g", np.NaN],
        })

        df["c"] = df["c"].astype("category")

        df["b"] = pd.Series([
            "a", "a", "a", "rare", "rare", "rare", "rare", np.NaN, np.NaN,
            np.NaN
        ])

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=GroupRareLevelsTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        expected_call_args = {0: {"args": (["mapping_"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        expected_call_args = {0: {"args": (d.create_df_5(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_5(),
        ):

            x.transform(df)

    def test_learnt_values_not_modified(self):
        """Test that the mapping_ from fit are not changed in transform."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        x2 = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x2.fit(df)

        x2.transform(df)

        h.assert_equal_dispatch(
            expected=x.mapping_,
            actual=x2.mapping_,
            msg="Non rare levels not changed in transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_5(), expected_df_1()) +
        h.index_preserved_params(d.create_df_5(), expected_df_1()),
    )
    def test_expected_output_no_weight(self, df, expected):
        """Test that the output is expected from transform."""

        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a"]}

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )

    def test_expected_output_no_weight_single_row_na(self):
        """test output from a single row transform with np.NaN value remains the same,
        the type is perserved if using existing dataframe, so need to create a new dataframe"""

        one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]})
        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]}

        one_row_df_transformed = x.transform(one_row_df)

        h.assert_frame_equal_msg(
            actual=one_row_df_transformed,
            expected=one_row_df,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )

    def test_expected_output_no_weight_single_row_na_category_column(self):
        """test output from a single row transform with np.NaN value remains the same, when column is type category,
        the type is perserved if using existing dataframe, so need to create a new dataframe"""

        one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]})
        one_row_df["c"] = one_row_df["c"].astype("category")

        # add rare as a category in dataframe
        one_row_df["c"].cat.add_categories("rare", inplace=True)

        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]}

        one_row_df_transformed = x.transform(one_row_df)

        h.assert_frame_equal_msg(
            actual=one_row_df_transformed,
            expected=one_row_df,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_6(), expected_df_2()) +
        h.index_preserved_params(d.create_df_6(), expected_df_2()),
    )
    def test_expected_output_weight(self, df, expected):
        """Test that the output is expected from transform, when weights are used."""

        x = GroupRareLevelsTransformer(columns=["b"],
                                       cut_off_percent=0.3,
                                       weight="a")

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN]}

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in GroupRareLevelsTransformer.transform (with weights)",
        )
class TestTransform(object):
    """Tests for LogTransformer.transform()."""
    def expected_df_1():
        """Expected output of test_expected_output_1."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"])
        df["b_new_col"] = np.log(df["b"])

        df.drop(columns=["a", "b"], inplace=True)

        return df

    def expected_df_2():
        """Expected output of test_expected_output_2."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"] + 1)
        df["b_new_col"] = np.log(df["b"] + 1)

        df.drop(columns=["a", "b"], inplace=True)

        return df

    def expected_df_3():
        """Expected output of test_expected_output_3."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"])
        df["b_new_col"] = np.log(df["b"])

        return df

    def expected_df_4():
        """Expected output of test_expected_output_4."""

        df = d.create_df_3()

        df["a_new_col"] = np.log(df["a"] + 1)
        df["b_new_col"] = np.log(df["b"] + 1)

        return df

    def expected_df_5():
        """Expected output of test_expected_output_5."""

        df = d.create_df_4()

        df["a_new_col"] = np.log(df["a"] + 1) / np.log(5)

        return df

    def expected_df_6():
        """Expected output of test_expected_output_6."""

        df = d.create_df_4()

        df["a_new_col"] = np.log(df["a"]) / np.log(7)

        df.drop("a", axis=1, inplace=True)

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=LogTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = LogTransformer(columns=["a", "b"])

        expected_call_args = {0: {"args": (d.create_df_3(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_3(),
        ):

            x.transform(df)

    def test_error_with_non_numeric_columns(self):
        """Test an exception is raised if transform is applied to non-numeric columns."""

        df = d.create_df_5()

        x = LogTransformer(columns=["a", "b", "c"])

        with pytest.raises(
                TypeError,
                match=
                r"The following columns are not numeric in X; \['b', 'c'\]"):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1()) +
        h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_expected_output_1(self, df, expected):
        """Test that transform is giving the expected output when not adding one and dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=False,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_2()) +
        h.index_preserved_params(d.create_df_3(), expected_df_2()),
    )
    def test_expected_output_2(self, df, expected):
        """Test that transform is giving the expected output when adding one and dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=True,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_3()) +
        h.index_preserved_params(d.create_df_3(), expected_df_3()),
    )
    def test_expected_output_3(self, df, expected):
        """Test that transform is giving the expected output when not adding one and not dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=False,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_4()) +
        h.index_preserved_params(d.create_df_3(), expected_df_4()),
    )
    def test_expected_output_4(self, df, expected):
        """Test that transform is giving the expected output when adding one and not dropping original columns."""

        x1 = LogTransformer(columns=["a", "b"],
                            add_1=True,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_5()) +
        h.index_preserved_params(d.create_df_4(), expected_df_5()),
    )
    def test_expected_output_5(self, df, expected):
        """Test that transform is giving the expected output when adding one and not dropping
        original columns and using base."""

        x1 = LogTransformer(columns=["a"],
                            base=5,
                            add_1=True,
                            drop=False,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform not adding 1 and dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_6()) +
        h.index_preserved_params(d.create_df_4(), expected_df_6()),
    )
    def test_expected_output_6(self, df, expected):
        """Test that transform is giving the expected output when  not adding one and dropping
        original columns and using base."""

        x1 = LogTransformer(columns=["a"],
                            base=7,
                            add_1=False,
                            drop=True,
                            suffix="new_col")

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg=
            "LogTransformer transform should be using base, not adding 1, and not dropping original columns",
        )

    @pytest.mark.parametrize(
        "df, columns, add_1, extra_exception_text",
        (
            [pd.DataFrame({"a": [1, 2, 0]}), ["a"], False, ""],
            [
                pd.DataFrame({
                    "a": [1, 2, 0],
                    "b": [1, 2, 3]
                }), ["a", "b"], False, ""
            ],
            [
                pd.DataFrame({"a": [1, 2, -1]}), ["a"], True,
                r" \(after adding 1\)"
            ],
            [
                pd.DataFrame({
                    "a": [1, 2, -1],
                    "b": [1, 2, 3]
                }),
                ["a", "b"],
                True,
                r" \(after adding 1\)",
            ],
            [pd.DataFrame({"b": [1, 2, -0.001]}), ["b"], False, ""],
            [
                pd.DataFrame({
                    "b": [1, 2, -0.001],
                    "a": [1, 2, 3]
                }),
                ["a", "b"],
                False,
                "",
            ],
            [
                pd.DataFrame({"b": [1, 2, -1.001]}), ["b"], True,
                r" \(after adding 1\)"
            ],
            [
                pd.DataFrame({
                    "b": [1, 2, -1.001],
                    "a": [1, 2, 3]
                }),
                ["a", "b"],
                True,
                r" \(after adding 1\)",
            ],
        ),
    )
    def test_negative_values_raise_exception(self, df, columns, add_1,
                                             extra_exception_text):
        """Test that an exception is raised if negative values are passed in transform."""

        x = LogTransformer(columns=columns, add_1=add_1, drop=True)

        with pytest.raises(
                ValueError,
                match=
                f"values less than or equal to 0 in columns{extra_exception_text}, make greater than 0 before using transform",
        ):

            x.transform(df)
class TestTransform:
    """Tests for BaseImputer.transform."""
    def expected_df_1():
        """Expected output of test_expected_output_1."""

        df = pd.DataFrame({
            "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
            "b": ["a", "b", "c", "d", "e", "f", np.NaN],
            "c": ["a", "b", "c", "d", "e", "f", np.NaN],
        })

        df["c"] = df["c"].astype("category")

        return df

    def expected_df_2():
        """Expected output of test_expected_output_2."""

        df2 = pd.DataFrame({
            "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN],
            "b": ["a", "b", "c", "d", "e", "f", "g"],
            "c": ["a", "b", "c", "d", "e", "f", np.NaN],
        })

        df2["c"] = df2["c"].astype("category")

        return df2

    def expected_df_3():
        """Expected output of test_expected_output_3."""

        df3 = pd.DataFrame({
            "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN],
            "b": ["a", "b", "c", "d", "e", "f", "g"],
            "c": ["a", "b", "c", "d", "e", "f", "f"],
        })

        df3["c"] = df3["c"].astype("category")

        return df3

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=BaseImputer.transform,
                                  expected_arguments=["self", "X"])

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_2(), expected_df_1()) +
        h.index_preserved_params(d.create_df_2(), expected_df_1()),
    )
    def test_expected_output_1(self, df, expected):
        """Test that transform is giving the expected output when applied to float column."""

        x1 = BaseImputer()
        x1.columns = ["a"]
        x1.impute_values_ = {"a": 7}

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="ArbitraryImputer transform col a",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_2(), expected_df_2()) +
        h.index_preserved_params(d.create_df_2(), expected_df_2()),
    )
    def test_expected_output_2(self, df, expected):
        """Test that transform is giving the expected output when applied to object column."""

        x1 = BaseImputer()
        x1.columns = ["b"]
        x1.impute_values_ = {"b": "g"}

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="ArbitraryImputer transform col b",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_2(), expected_df_3()) +
        h.index_preserved_params(d.create_df_2(), expected_df_3()),
    )
    def test_expected_output_3(self, df, expected):
        """Test that transform is giving the expected output when applied to object and categorical columns."""

        x1 = BaseImputer()
        x1.columns = ["b", "c"]
        x1.impute_values_ = {"b": "g", "c": "f"}

        df_transformed = x1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="ArbitraryImputer transform col b, c",
        )

    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_1()

        x = BaseImputer()
        x.columns = []

        expected_call_args = {
            0: {
                "args": (["impute_values_"], ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseImputer.transform called."""

        df = d.create_df_2()

        x = BaseImputer()
        x.columns = []
        x.impute_values_ = {}

        expected_call_args = {0: {"args": (d.create_df_2(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)
class TestTransform(object):
    """Tests for ModeImputer.transform()."""
    def expected_df_1():
        """Expected output for test_nulls_imputed_correctly."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6, np.NaN],
            "b": [1, 2, 3, np.NaN, 7, 8, 9],
            "c": [np.NaN, 1, 2, 3, -4, -5, -6],
        })

        for col in ["a", "b", "c"]:

            df[col].loc[df[col].isnull()] = df[col].mode()[0]

        return df

    def expected_df_2():
        """Expected output for test_nulls_imputed_correctly_2."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6, np.NaN],
            "b": [1, 2, 3, np.NaN, 7, 8, 9],
            "c": [np.NaN, 1, 2, 3, -4, -5, -6],
        })

        for col in ["a"]:

            df[col].loc[df[col].isnull()] = df[col].mode()[0]

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=ModeImputer.transform,
                                  expected_arguments=["self", "X"])

    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_1()

        x = ModeImputer(columns="a")

        x.fit(df)

        expected_call_args = {
            0: {
                "args": (["impute_values_"], ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_1()

        x = ModeImputer(columns="a")

        x.fit(df)

        expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1()) +
        h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_nulls_imputed_correctly(self, df, expected):
        """Test missing values are filled with the correct values."""

        x = ModeImputer(columns=["a", "b", "c"])

        # set the impute values dict directly rather than fitting x on df so test works with helpers
        x.impute_values_ = {"a": 1.0, "b": 1.0, "c": -6.0}

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check nulls filled correctly in transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_2()) +
        h.index_preserved_params(d.create_df_3(), expected_df_2()),
    )
    def test_nulls_imputed_correctly_2(self, df, expected):
        """Test missing values are filled with the correct values - and unrelated columns are not changed."""

        x = ModeImputer(columns=["a"])

        # set the impute values dict directly rather than fitting x on df so test works with helpers
        x.impute_values_ = {"a": 1.0}

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check nulls filled correctly in transform",
        )

    def test_learnt_values_not_modified(self):
        """Test that the impute_values_ from fit are not changed in transform."""

        df = d.create_df_3()

        x = ModeImputer(columns=["a", "b", "c"])

        x.fit(df)

        x2 = ModeImputer(columns=["a", "b", "c"])

        x2.fit_transform(df)

        h.assert_equal_dispatch(
            expected=x.impute_values_,
            actual=x2.impute_values_,
            msg="Impute values not changed in transform",
        )
class TestTransform(object):
    """Tests for DataFrameMethodTransformer.transform()."""
    def expected_df_1():
        """Expected output of test_expected_output_single_columns_assignment."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6, np.NaN],
            "b": [1, 2, 3, np.NaN, 7, 8, 9],
            "c": [np.NaN, 1, 2, 3, -4, -5, -6],
            "d": [1.0, 3.0, 5.0, 3.0, 3.0, 3.0, 3.0],
        })

        return df

    def expected_df_2():
        """Expected output of test_expected_output_multi_columns_assignment."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6, np.NaN],
            "b": [1, 2, 3, np.NaN, 7, 8, 9],
            "c": [np.NaN, 1, 2, 3, -4, -5, -6],
            "d": [0.5, 1.0, 1.5, np.NaN, 3.5, 4.0, 4.5],
            "e": [np.NaN, 0.5, 1.0, 1.5, -2.0, -2.5, -3.0],
        })

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=DataFrameMethodTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = DataFrameMethodTransformer(new_column_name="d",
                                       pd_method_name="sum",
                                       columns=["b", "c"])

        expected_call_args = {0: {"args": (df.copy(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1()) +
        h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_expected_output_single_columns_assignment(self, df, expected):
        """Test a single column output from transform gives expected results."""

        x = DataFrameMethodTransformer(
            new_column_name="d",
            pd_method_name="sum",
            columns=["b", "c"],
            pd_method_kwargs={"axis": 1},
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="DataFrameMethodTransformer sum columns b and c",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_2()) +
        h.index_preserved_params(d.create_df_3(), expected_df_2()),
    )
    def test_expected_output_multi_columns_assignment(self, df, expected):
        """Test a multiple column output from transform gives expected results."""

        x = DataFrameMethodTransformer(
            new_column_name=["d", "e"],
            pd_method_name="div",
            columns=["b", "c"],
            pd_method_kwargs={"other": 2},
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="DataFrameMethodTransformer divide by 2 columns b and c",
        )

    @pytest.mark.parametrize(
        "df, new_column_name, pd_method_name, columns, pd_method_kwargs",
        [
            (d.create_df_3(), ["d", "e"], "div", ["b", "c"], {
                "other": 2
            }),
            (d.create_df_3(), "d", "sum", ["b", "c"], {
                "axis": 1
            }),
            (d.create_df_3(), ["d", "e"], "cumprod", ["b", "c"], {
                "axis": 1
            }),
            (d.create_df_3(), ["d", "e", "f"], "mod", ["a", "b", "c"], {
                "other": 2
            }),
            (d.create_df_3(), ["d", "e", "f"], "le", ["a", "b", "c"], {
                "other": 0
            }),
            (d.create_df_3(), ["d", "e"], "abs", ["a", "b"], {}),
        ],
    )
    def test_pandas_method_called(self, mocker, df, new_column_name,
                                  pd_method_name, columns, pd_method_kwargs):
        """Test that the pandas method is called as expected (with kwargs passed) during transform."""

        spy = mocker.spy(pd.DataFrame, pd_method_name)

        x = DataFrameMethodTransformer(
            new_column_name=new_column_name,
            pd_method_name=pd_method_name,
            columns=columns,
            pd_method_kwargs=pd_method_kwargs,
        )

        x.transform(df)

        # pull out positional and keyword args to target the call
        call_args = spy.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        # test keyword are as expected
        h.assert_dict_equal_msg(
            actual=call_kwargs,
            expected=pd_method_kwargs,
            msg_tag=f"""Keyword arg assert for {pd_method_name}""",
        )

        # test positional args are as expected
        h.assert_list_tuple_equal_msg(
            actual=call_pos_args,
            expected=(df[columns], ),
            msg_tag=f"""Positional arg assert for {pd_method_name}""",
        )
class TestTransform(object):
    """Tests for BaseTransformer.transform()."""

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=BaseTransformer.transform, expected_arguments=["self", "X"]
        )

    def test_columns_check_called(self, mocker):
        """Test that self.columns_check is called during transform."""

        df = d.create_df_1()

        x = BaseTransformer(columns="a")

        expected_call_args = {0: {"args": (df,), "kwargs": {}}}

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "columns_check", expected_call_args
        ):

            x.transform(X=df)

    def test_non_pd_type_error(self):
        """Test an error is raised if y is not passed as a pd.DataFrame."""

        x = BaseTransformer(columns="a")

        with pytest.raises(ValueError):

            x.transform(X=[1, 2, 3, 4, 5, 6])

    def test_df_copy_called(self, mocker):
        """Test pd.DataFrame.copy is called if copy is True."""

        df = d.create_df_1()

        x = BaseTransformer(columns="a", copy=True)

        expected_call_args = {0: {"args": (), "kwargs": {}}}

        with h.assert_function_call(
            mocker, pandas.DataFrame, "copy", expected_call_args, return_value=df
        ):

            x.transform(X=df)

    def test_no_rows_error(self):
        """Test an error is raised if X has no rows."""

        x = BaseTransformer(columns="a")

        df = pandas.DataFrame(columns=["a"])

        with pytest.raises(ValueError, match=re.escape("X has no rows; (0, 1)")):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), d.create_df_1())
        + h.index_preserved_params(d.create_df_1(), d.create_df_1()),
    )
    def test_X_returned(self, df, expected):
        """Test that X is returned from transform."""

        x = BaseTransformer(columns="a", copy=True)

        df_transformed = x.transform(X=df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check X returned from transform",
        )
Example #18
0
class TestTransform(object):
    """Tests for the transform method on MappingTransformer."""

    def expected_df_1():
        """Expected output for test_expected_output."""

        df = pd.DataFrame(
            {"a": ["a", "b", "c", "d", "e", "f"], "b": [1, 2, 3, 4, 5, 6]}
        )

        return df

    def expected_df_2():
        """Expected output for test_non_specified_values_unchanged."""

        df = pd.DataFrame(
            {"a": [5, 6, 7, 4, 5, 6], "b": ["z", "y", "x", "d", "e", "f"]}
        )

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=MappingTransformer.transform,
            expected_arguments=["self", "X"],
            expected_default_values=None,
        )

    def test_super_transform_call(self, mocker):
        """Test the call to BaseMappingTransformMixin.transform."""

        df = d.create_df_1()

        mapping = {
            "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"},
            "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
        }

        x = MappingTransformer(mappings=mapping)

        spy = mocker.spy(tubular.mapping.BaseMappingTransformMixin, "transform")

        x.transform(df)

        assert (
            spy.call_count == 1
        ), "unexpected number of calls to BaseMappingTransformMixin.transform"

        call_args = spy.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        expected_kwargs = {}

        assert (
            call_kwargs == expected_kwargs
        ), "unexpected kwargs in BaseMappingTransformMixin.transform call"

        expected_pos_args = (x, d.create_df_1())

        assert (
            expected_pos_args[0] == call_pos_args[0]
        ), "unexpected 1st positional arg in BaseMappingTransformMixin.transform call"

        h.assert_equal_dispatch(
            expected_pos_args[1],
            call_pos_args[1],
            "unexpected 2ns positional arg in BaseMappingTransformMixin.transform call",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_1())
        + h.index_preserved_params(d.create_df_1(), expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test that transform is giving the expected output."""

        mapping = {
            "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"},
            "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
        }

        x = MappingTransformer(mappings=mapping)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from mapping transformer",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_2())
        + h.index_preserved_params(d.create_df_1(), expected_df_2()),
    )
    def test_non_specified_values_unchanged(self, df, expected):
        """Test that values not specified in mappings are left unchanged in transform."""

        mapping = {"a": {1: 5, 2: 6, 3: 7}, "b": {"a": "z", "b": "y", "c": "x"}}

        x = MappingTransformer(mappings=mapping)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from mapping transformer",
        )

    def test_mappings_unchanged(self):
        """Test that mappings is unchanged in transform."""

        df = d.create_df_1()

        mapping = {
            "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"},
            "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
        }

        preserve_original_value_mapping = {
            "a": ReturnKeyDict(mapping["a"]),
            "b": ReturnKeyDict(mapping["b"]),
        }

        x = MappingTransformer(mappings=mapping)

        x.transform(df)

        h.assert_equal_dispatch(
            actual=x.mappings,
            expected=preserve_original_value_mapping,
            msg="MappingTransformer.transform has changed self.mappings unexpectedly",
        )
class TestTransform(object):
    """Tests for CutTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output."""

        df = d.create_df_9()

        df["d"] = pd.Series(["c", "b", "a", "d", "e", "f"], dtype="category")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=CutTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_call(self, mocker):
        """Test the call to BaseTransformer.transform is as expected."""

        df = d.create_df_9()

        x = CutTransformer(column="a",
                           new_column_name="Y",
                           cut_kwargs={"bins": 3})

        expected_call_args = {0: {"args": (d.create_df_9(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_9(),
        ):

            x.transform(df)

    def test_pd_cut_call(self, mocker):
        """Test the call to pd.cut is as expected."""

        df = d.create_df_9()

        x = CutTransformer(
            column="a",
            new_column_name="a_cut",
            cut_kwargs={
                "bins": 3,
                "right": False,
                "precision": 2
            },
        )

        expected_call_args = {
            0: {
                "args": (d.create_df_9()["a"], ),
                "kwargs": {
                    "bins": 3,
                    "right": False,
                    "precision": 2
                },
            }
        }

        with h.assert_function_call(mocker,
                                    pandas,
                                    "cut",
                                    expected_call_args,
                                    return_value=[1, 2, 3, 4, 5, 6]):

            x.transform(df)

    def test_output_from_cut_assigned_to_column(self, mocker):
        """Test that the output from pd.cut is assigned to column with name new_column_name."""

        df = d.create_df_9()

        x = CutTransformer(column="c",
                           new_column_name="c_new",
                           cut_kwargs={"bins": 2})

        cut_output = [1, 2, 3, 4, 5, 6]

        mocker.patch("pandas.cut", return_value=cut_output)

        df_transformed = x.transform(df)

        assert (df_transformed["c_new"].tolist() == cut_output
                ), "unexpected values assigned to c_new column"

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_9(), expected_df_1()) +
        h.index_preserved_params(d.create_df_9(), expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test input data is transformed as expected."""

        cut_1 = CutTransformer(
            column="c",
            new_column_name="d",
            cut_kwargs={
                "bins": [0, 1, 2, 3, 4, 5, 6],
                "ordered": False,
                "labels": ["a", "b", "c", "d", "e", "f"],
            },
        )

        df_transformed = cut_1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="CutTransformer.transform output",
        )

    def test_non_numeric_column_error(self):
        """Test that an exception is raised if the column to discretise is not numeric."""

        df = d.create_df_8()

        x = CutTransformer(column="b", new_column_name="d")

        with pytest.raises(TypeError,
                           match="b should be a numeric dtype but got object"):

            x.transform(df)
Example #20
0
class TestTransform(object):
    """Tests for CappingTransformer.transform()."""

    def expected_df_1():
        """Expected output from test_expected_output_min_and_max."""

        df = pd.DataFrame(
            {
                "a": [2, 2, 3, 4, 5, 5, np.NaN],
                "b": [1, 2, 3, np.NaN, 7, 7, 7],
                "c": [np.NaN, 1, 2, 3, 0, 0, 0],
            }
        )

        return df

    def expected_df_2():
        """Expected output from test_expected_output_max."""

        df = pd.DataFrame(
            {
                "a": [2, 2, 3, 4, 5, 6, 7, np.NaN],
                "b": ["a", "b", "c", "d", "e", "f", "g", np.NaN],
                "c": ["a", "b", "c", "d", "e", "f", "g", np.NaN],
            }
        )

        df["c"] = df["c"].astype("category")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=CappingTransformer.transform, expected_arguments=["self", "X"]
        )

    def test_check_is_fitted_call_count(self, mocker):
        """Test there are 2 calls to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        with h.assert_function_call_count(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", 2
        ):

            x.transform(df)

    def test_check_is_fitted_call_1(self, mocker):
        """Test the first call to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {
            0: {"args": (["capping_values"],), "kwargs": {}},
            1: {"args": (["_replacement_values"],), "kwargs": {}},
        }

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args
        ):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {0: {"args": (d.create_df_3(),), "kwargs": {}}}

        with h.assert_function_call(
            mocker,
            tubular.base.BaseTransformer,
            "transform",
            expected_call_args,
            return_value=d.create_df_3(),
        ):

            x.transform(df)

    def test_learnt_values_not_modified(self):
        """Test that the replacements from fit are not changed in transform."""

        capping_values_dict = {"a": [2, 5], "b": [-1, 8]}

        df = d.create_df_3()

        x = CappingTransformer(capping_values_dict)

        x.transform(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={"capping_values": capping_values_dict},
            msg="Attributes for CappingTransformer set in init",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_3(), expected_df_1())
        + h.index_preserved_params(d.create_df_3(), expected_df_1()),
    )
    def test_expected_output_min_and_max_combinations(self, df, expected):
        """Test that capping is applied correctly in transform."""

        x = CappingTransformer(
            capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]}
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_4(), expected_df_2())
        + h.index_preserved_params(d.create_df_4(), expected_df_2()),
    )
    def test_non_cap_column_left_untouched(self, df, expected):
        """Test that capping is applied only to specific columns, others remain the same."""

        x = CappingTransformer(capping_values={"a": [2, 10]})

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed",
        )

    def test_non_numeric_column_error(self):
        """Test that transform will raise an error if a column to transform is not numeric."""

        df = d.create_df_5()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]})

        with pytest.raises(
            TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]"
        ):

            x.transform(df)

    def test_quantile_not_fit_error(self):
        """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        with pytest.raises(
            ValueError,
            match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)

    def test_replacement_values_dict_not_set_error(self):
        """Test that transform will raise an error if _replacement_values is an empty dict."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        # manually set attribute to get past the capping_values attribute is an empty dict exception
        x.capping_values = {"a": [1, 4]}

        with pytest.raises(
            ValueError,
            match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)

    def test_attributes_unchanged_from_transform(self):
        """Test that attributes are unchanged after transform is run."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x.fit(df)

        x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x2.fit(df)

        x2.transform(df)

        assert (
            x.capping_values == x2.capping_values
        ), "capping_values attribute modified in transform"
        assert (
            x._replacement_values == x2._replacement_values
        ), "_replacement_values attribute modified in transform"
        assert (
            x.weights_column == x2.weights_column
        ), "weights_column attribute modified in transform"
        assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"
Example #21
0
class TestTransform(object):
    """Tests for the transform method on CrossColumnMappingTransformer."""

    def expected_df_1():
        """Expected output for test_expected_output."""

        df = pd.DataFrame(
            {"a": [1, 2, 3, 4, 5, 6], "b": ["aa", "bb", "cc", "dd", "ee", "ff"]}
        )

        return df

    def expected_df_2():
        """Expected output for test_non_specified_values_unchanged."""

        df = pd.DataFrame(
            {"a": [1, 2, 3, 4, 5, 6], "b": ["aa", "bb", "cc", "d", "e", "f"]}
        )

        return df

    def expected_df_3():
        """Expected output for test_multiple_mappings_ordered_dict."""

        df = pd.DataFrame(
            {
                "a": [4, 2, 2, 1, 3],
                "b": ["x", "z", "y", "x", "x"],
                "c": ["cc", "dd", "bb", "cc", "cc"],
            }
        )

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=CrossColumnMappingTransformer.transform,
            expected_arguments=["self", "X"],
            expected_default_values=None,
        )

    def test_check_is_fitted_call(self, mocker):
        """Test the call to check_is_fitted."""

        df = d.create_df_1()

        mapping = {"a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"}}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b")

        expected_call_args = {0: {"args": (["adjust_column"],), "kwargs": {}}}

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args
        ):

            x.transform(df)

    def test_super_transform_call(self, mocker):
        """Test the call to BaseMappingTransformer.transform."""

        df = d.create_df_1()

        mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b")

        expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}}

        with h.assert_function_call(
            mocker,
            tubular.mapping.BaseMappingTransformer,
            "transform",
            expected_call_args,
            return_value=d.create_df_1(),
        ):

            x.transform(df)

    def test_adjust_col_not_in_x_error(self):
        """Test that an exception is raised if the adjust_column is not present in the dataframe."""

        df = d.create_df_1()

        mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="c")

        with pytest.raises(ValueError, match="variable c is not in X"):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_1())
        + h.index_preserved_params(d.create_df_1(), expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test that transform is giving the expected output."""

        mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column mapping transformer",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_2())
        + h.index_preserved_params(d.create_df_1(), expected_df_2()),
    )
    def test_non_specified_values_unchanged(self, df, expected):
        """Test that values not specified in mappings are left unchanged in transform."""

        mapping = {"a": {1: "aa", 2: "bb", 3: "cc"}}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column mapping transformer",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_7(), expected_df_3())
        + h.index_preserved_params(d.create_df_7(), expected_df_3()),
    )
    def test_multiple_mappings_ordered_dict(self, df, expected):
        """Test that mappings by multiple columns using an ordered dict gives the expected output in transform"""

        mapping = OrderedDict()

        mapping["a"] = {1: "aa", 2: "bb"}
        mapping["b"] = {"x": "cc", "z": "dd"}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="c")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column mapping transformer",
        )

    def test_mappings_unchanged(self):
        """Test that mappings is unchanged in transform."""

        df = d.create_df_1()

        mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b")

        x.transform(df)

        h.assert_equal_dispatch(
            expected=mapping,
            actual=x.mappings,
            msg="CrossColumnMappingTransformer.transform has changed self.mappings unexpectedly",
        )
Example #22
0
class TestTransform(object):
    """Tests for OrdinalEncoderTransformer.transform()."""
    def expected_df_1():
        """Expected output for ."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6],
            "b": [1, 2, 3, 4, 5, 6],
            "c": ["a", "b", "c", "d", "e", "f"],
            "d": [1, 2, 3, 4, 5, 6],
            "e": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
            "f": [1, 1, 1, 2, 2, 2],
        })

        df["c"] = df["c"].astype("category")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=OrdinalEncoderTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_mappable_rows called."""

        df = d.create_OrdinalEncoderTransformer_test_df()

        x = OrdinalEncoderTransformer(response_column="a", columns="b")

        x.fit(df)

        expected_call_args = {0: {"args": (df, ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.nominal.BaseNominalTransformer,
                "check_mappable_rows",
                expected_call_args,
        ):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseMappingTransformMixin.transform called."""

        df = d.create_OrdinalEncoderTransformer_test_df()

        x = OrdinalEncoderTransformer(response_column="a", columns="b")

        x.fit(df)

        expected_call_args = {
            0: {
                "args": (
                    x,
                    d.create_OrdinalEncoderTransformer_test_df(),
                ),
                "kwargs": {},
            }
        }

        with h.assert_function_call(
                mocker,
                tubular.mapping.BaseMappingTransformMixin,
                "transform",
                expected_call_args,
                return_value=d.create_OrdinalEncoderTransformer_test_df(),
        ):

            x.transform(df)

    def test_learnt_values_not_modified(self):
        """Test that the mappings from fit are not changed in transform."""

        df = d.create_OrdinalEncoderTransformer_test_df()

        x = OrdinalEncoderTransformer(response_column="a", columns="b")

        x.fit(df)

        x2 = OrdinalEncoderTransformer(response_column="a", columns="b")

        x2.fit(df)

        x2.transform(df)

        h.assert_equal_dispatch(
            expected=x.mappings,
            actual=x2.mappings,
            msg="Mean response values not changed in transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_OrdinalEncoderTransformer_test_df(),
                            expected_df_1()) +
        h.index_preserved_params(d.create_OrdinalEncoderTransformer_test_df(),
                                 expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test that the output is expected from transform."""

        x = OrdinalEncoderTransformer(response_column="a",
                                      columns=["b", "d", "f"])

        # set the impute values dict directly rather than fitting x on df so test works with helpers
        x.mappings = {
            "b": {
                "a": 1,
                "b": 2,
                "c": 3,
                "d": 4,
                "e": 5,
                "f": 6
            },
            "d": {
                1: 1,
                2: 2,
                3: 3,
                4: 4,
                5: 5,
                6: 6
            },
            "f": {
                False: 1,
                True: 2
            },
        }

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in OrdinalEncoderTransformer.transform",
        )

    def test_nulls_introduced_in_transform_error(self):
        """Test that transform will raise an error if nulls are introduced."""

        df = d.create_OrdinalEncoderTransformer_test_df()

        x = OrdinalEncoderTransformer(response_column="a",
                                      columns=["b", "d", "f"])

        x.fit(df)

        df["b"] = "z"

        with pytest.raises(
                ValueError,
                match=
                "nulls would be introduced into column b from levels not present in mapping",
        ):

            x.transform(df)
Example #23
0
class TestTransform(object):
    """Tests for the transform method on CrossColumnAddTransformer."""
    def expected_df_1():
        """Expected output from test_expected_output."""

        df = pd.DataFrame({
            "a": [2.1, 3.2, 4.3, 5.4, 6.5, 7.6],
            "b": ["a", "b", "c", "d", "e", "f"]
        })

        return df

    def expected_df_2():
        """Expected output from test_non_specified_values_unchanged."""

        df = pd.DataFrame({
            "a": [2.1, 3.2, 3, 4, 5, 6],
            "b": ["a", "b", "c", "d", "e", "f"]
        })

        return df

    def expected_df_3():
        """Expected output from test_multiple_mappings_expected_output."""

        df = pd.DataFrame({
            "a": [4.1, 5.1, 4.1, 4, 8, 10.2, 7, 8, 9, np.NaN],
            "b": ["a", "a", "a", "d", "e", "f", "g", np.NaN, np.NaN, np.NaN],
            "c": ["a", "a", "c", "c", "e", "e", "f", "g", "h", np.NaN],
        })

        df["c"] = df["c"].astype("category")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=CrossColumnAddTransformer.transform,
            expected_arguments=["self", "X"],
            expected_default_values=None,
        )

    def test_check_is_fitted_call(self, mocker):
        """Test the call to check_is_fitted."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        expected_call_args = {0: {"args": (["adjust_column"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_call(self, mocker):
        """Test the call to BaseMappingTransformer.transform."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_1(),
        ):

            x.transform(df)

    def test_adjust_col_not_in_x_error(self):
        """Test that an exception is raised if the adjust_column is not present in the dataframe."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="c")

        with pytest.raises(ValueError, match="variable c is not in X"):

            x.transform(df)

    def test_adjust_col_not_numeric_error(self):
        """Test that an exception is raised if the adjust_column is not numeric."""

        df = d.create_df_2()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="c")

        with pytest.raises(TypeError,
                           match="variable c must have numeric dtype."):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_1()) +
        h.index_preserved_params(d.create_df_1(), expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test that transform is giving the expected output."""

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column add transformer",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_2()) +
        h.index_preserved_params(d.create_df_1(), expected_df_2()),
    )
    def test_non_specified_values_unchanged(self, df, expected):
        """Test that values not specified in mappings are left unchanged in transform."""

        mapping = {"b": {"a": 1.1, "b": 1.2}}

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column add transformer",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_5(), expected_df_3()) +
        h.index_preserved_params(d.create_df_5(), expected_df_3()),
    )
    def test_multiple_mappings_expected_output(self, df, expected):
        """Test that mappings by multiple columns are both applied in transform"""

        mapping = {"b": {"a": 1.1, "f": 1.2}, "c": {"a": 2, "e": 3}}

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column add transformer",
        )

    def test_mappings_unchanged(self):
        """Test that mappings is unchanged in transform."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        x.transform(df)

        h.assert_equal_dispatch(
            expected=mapping,
            actual=x.mappings,
            msg=
            "CrossColumnAddTransformer.transform has changed self.mappings unexpectedly",
        )
class TestTransform(object):
    """Tests for NominalToIntegerTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output."""

        df = pd.DataFrame({
            "a": [1, 2, 3, 4, 5, 6],
            "b": ["a", "b", "c", "d", "e", "f"]
        })

        df["a"] = df["a"].replace(
            {k: i
             for i, k in enumerate(df["a"].unique())})

        df["b"] = df["b"].replace(
            {k: i
             for i, k in enumerate(df["b"].unique())})

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=NominalToIntegerTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_1()

        x = NominalToIntegerTransformer(columns=["a", "b"])

        x.fit(df)

        expected_call_args = {0: {"args": (["mappings"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_1()

        x = NominalToIntegerTransformer(columns="a")

        x.fit(df)

        expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_1(),
        ):

            x.transform(df)

    def test_learnt_values_not_modified(self):
        """Test that the mappings from fit are not changed in transform."""

        df = d.create_df_1()

        x = NominalToIntegerTransformer(columns=["a", "b"])

        x.fit(df)

        x2 = NominalToIntegerTransformer(columns=["a", "b"])

        x2.fit_transform(df)

        h.assert_equal_dispatch(
            expected=x.mappings,
            actual=x2.mappings,
            msg="Impute values not changed in transform",
        )

    def test_non_mappable_rows_raises_error(self):
        """Test that rows that cannot be mapped result in an exception."""

        df = d.create_df_1()

        x = NominalToIntegerTransformer(columns=["a", "b"])

        x.fit(df)

        df["a"] = df["a"] + 1

        with pytest.raises(
                ValueError,
                match=
                "nulls would be introduced into column a from levels not present in mapping",
        ):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_1()) +
        h.index_preserved_params(d.create_df_1(), expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test that the output is expected from transform."""

        x = NominalToIntegerTransformer(columns=["a", "b"])

        # set the mapping dict directly rather than fitting x on df so test works with helpers
        x.mappings = {
            "a": {
                1: 0,
                2: 1,
                3: 2,
                4: 3,
                5: 4,
                6: 5
            },
            "b": {
                "a": 0,
                "b": 1,
                "c": 2,
                "d": 3,
                "e": 4,
                "f": 5
            },
        }

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in NominalToIntegerTransformer.transform",
        )
class TestInverseTransform(object):
    """Tests for NominalToIntegerTransformer.inverse_transform()."""
    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=NominalToIntegerTransformer.inverse_transform,
            expected_arguments=["self", "X"],
        )

    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_1()

        x = NominalToIntegerTransformer(columns=["a", "b"])

        x.fit(df)

        df_transformed = x.transform(df)

        expected_call_args = {0: {"args": (["mappings"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.inverse_transform(df_transformed)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), d.create_df_1()) +
        h.index_preserved_params(d.create_df_1(), d.create_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test that transform then inverse_transform gets back to the original df."""

        x = NominalToIntegerTransformer(columns=["a", "b"])

        # set the mapping dict directly rather than fitting x on df so test works with helpers
        x.mappings = {
            "a": {
                1: 0,
                2: 1,
                3: 2,
                4: 3,
                5: 4,
                6: 5
            },
            "b": {
                "a": 0,
                "b": 1,
                "c": 2,
                "d": 3,
                "e": 4,
                "f": 5
            },
        }

        df_transformed = x.transform(df)

        df_transformed_back = x.inverse_transform(df_transformed)

        h.assert_frame_equal_msg(
            actual=df_transformed_back,
            expected=expected,
            msg_tag="transform reverse does not get back to original",
        )

    def test_non_mappable_rows_raises_error(self):
        """Test that rows that cannot be mapped result in an exception."""

        x = NominalToIntegerTransformer(columns=["a", "b"])

        df = d.create_df_1()

        x.fit(df)

        df_transformed = x.transform(df)

        df_transformed["b"] = df_transformed["b"] + 1

        with pytest.raises(
                ValueError,
                match=
                "nulls introduced from levels not present in mapping for column: b",
        ):

            x.inverse_transform(df_transformed)

    def test_learnt_values_not_modified(self):
        """Test that the mappings from fit are not changed in inverse_transform."""

        df = d.create_df_1()

        x = NominalToIntegerTransformer(columns=["a", "b"])

        x.fit(df)

        x2 = NominalToIntegerTransformer(columns=["a", "b"])

        x2.fit(df)

        df_transformed = x2.transform(df)

        x2.inverse_transform(df_transformed)

        h.assert_equal_dispatch(
            expected=x.mappings,
            actual=x2.mappings,
            msg="Impute values not changed in inverse_transform",
        )
Example #26
0
class TestTransform(object):
    """Tests for OneHotEncodingTransformer.transform()."""

    def expected_df_1():
        """Expected output for test_expected_output."""

        df = pd.DataFrame(
            {
                "a": [4, 2, 2, 1, 3],
                "b": ["x", "z", "y", "x", "x"],
                "c": ["c", "a", "a", "c", "b"],
            }
        )

        df["c"] = df["c"].astype("category")

        df["b_x"] = [1.0, 0.0, 0.0, 1.0, 1.0]
        df["b_y"] = [0.0, 0.0, 1.0, 0.0, 0.0]
        df["b_z"] = [0.0, 1.0, 0.0, 0.0, 0.0]

        return df

    def expected_df_2():
        """Expected output for test_unseen_categories_encoded_as_all_zeroes."""

        df = pd.DataFrame(
            {
                "a": [1, 5, 2, 3, 3],
                "b": ["w", "w", "z", "y", "x"],
                "c": ["a", "a", "c", "b", "a"],
            },
            index=[10, 15, 200, 251, 59],
        )

        df["c"] = df["c"].astype("category")

        df["a_1"] = [1.0, 0.0, 0.0, 0.0, 0.0]
        df["a_2"] = [0.0, 0.0, 1.0, 0.0, 0.0]
        df["a_3"] = [0.0, 0.0, 0.0, 1.0, 1.0]
        df["a_4"] = [0.0, 0.0, 0.0, 0.0, 0.0]
        df["b_x"] = [0.0, 0.0, 0.0, 0.0, 1.0]
        df["b_y"] = [0.0, 0.0, 0.0, 1.0, 0.0]
        df["b_z"] = [0.0, 0.0, 1.0, 0.0, 0.0]
        df["c_a"] = [1.0, 1.0, 0.0, 0.0, 1.0]
        df["c_b"] = [0.0, 0.0, 0.0, 1.0, 0.0]
        df["c_c"] = [0.0, 0.0, 1.0, 0.0, 0.0]

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=OneHotEncodingTransformer.transform, expected_arguments=["self", "X"]
        )

    def test_columns_check_call(self, mocker):
        """Test the first call to BaseTransformer columns_check."""

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        x.fit(df)

        expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}}

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "columns_check", expected_call_args
        ):

            x.transform(df)

    def test_check_is_fitted_first_call(self, mocker):
        """Test the calls to BaseTransformer check_is_fitted."""

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        x.fit(df)

        expected_call_args = {
            0: {"args": (["separator"],), "kwargs": {}},
            1: {"args": (["drop_original"],), "kwargs": {}},
        }

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args
        ):

            x.transform(df)

    def test_non_numeric_column_error_1(self):
        """Test that transform will raise an error if a column to transform has nulls."""

        df_train = d.create_df_1()
        df_test = d.create_df_2()

        x = OneHotEncodingTransformer(columns=["b"])

        x.fit(df_train)

        with pytest.raises(
            ValueError, match="column b has nulls - replace before proceeding"
        ):

            x.transform(df_test)

    def test_base_nominal_transformer_transform_called(self, mocker):
        """Test that BaseNominalTransformer.transform called."""

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        x.fit(df)

        mocker.patch(
            "tubular.nominal.BaseNominalTransformer.transform",
            return_value=d.create_df_1(),
        )

        x.transform(df)

        assert (
            tubular.nominal.BaseNominalTransformer.transform.call_count == 1
        ), f"Not enough calls to BaseNominalTransformer.transform -\n  Expected: 1\n  Actual: {tubular.nominal.BaseNominalTransformer.transform.call_count}"

        call_args = tubular.nominal.BaseNominalTransformer.transform.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        h.assert_equal_dispatch(
            expected={},
            actual=call_kwargs,
            msg="kwargs for BaseNominalTransformer.transform in OneHotEncodingTransformer.init",
        )

        expected_pos_args = (x, d.create_df_1())

        assert (
            len(call_pos_args) == 2
        ), f"Unepxected number of positional args in BaseNominalTransformer.transform call -\n  Expected: 2\n  Actual: {len(call_pos_args)}"

        h.assert_frame_equal_msg(
            expected=expected_pos_args[1],
            actual=call_pos_args[1],
            msg_tag="X positional arg in BaseNominalTransformer.transform call",
        )

        assert (
            expected_pos_args[0] == call_pos_args[0]
        ), "self positional arg in BaseNominalTransformer.transform call"

    def test_one_hot_encoder_transform_called(self, mocker):
        """Test that OneHotEncoder.transform called."""

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        x.fit(df)

        mocker.patch("sklearn.preprocessing.OneHotEncoder.transform")

        x.transform(df)

        assert (
            sklearn.preprocessing.OneHotEncoder.transform.call_count == 1
        ), f"Not enough calls to OneHotEncoder.transform -\n  Expected: 1\n  Actual: {sklearn.preprocessing.OneHotEncoder.transform.call_count}"

        call_args = sklearn.preprocessing.OneHotEncoder.transform.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        h.assert_equal_dispatch(
            expected={},
            actual=call_kwargs,
            msg="kwargs for OneHotEncodingTransformer.transform in BaseTransformer.init",
        )

        assert (
            len(call_pos_args) == 2
        ), f"Unepxected number of positional args in OneHotEncodingTransformer.transform call -\n  Expected: 2\n  Actual: {len(call_pos_args)}"

        assert (
            call_pos_args[0] is x
        ), f"Unexpected positional arg (self, index 1) in OneHotEncodingTransformer.transform call -\n  Expected: self\n  Actual: {call_pos_args[0]}"

        h.assert_frame_equal_msg(
            expected=d.create_df_1()[["b"]],
            actual=call_pos_args[1],
            msg_tag="X positional arg in OneHotEncodingTransformer.transform call",
        )

    @pytest.mark.parametrize(
        "df_test, expected",
        h.row_by_row_params(d.create_df_7(), expected_df_1())
        + h.index_preserved_params(d.create_df_7(), expected_df_1()),
    )
    def test_expected_output(self, df_test, expected):
        """Test that OneHotEncodingTransformer.transform encodes the feature correctly.

        Also tests that OneHotEncodingTransformer.transform does not modify unrelated columns.
        """

        # transformer is fit on the whole dataset separately from the input df to work with the decorators
        df_train = d.create_df_7()
        x = OneHotEncodingTransformer(columns="b")
        x.fit(df_train)

        df_transformed = x.transform(df_test)

        h.assert_frame_equal_msg(
            expected=expected,
            actual=df_transformed,
            msg_tag="Unspecified columns changed in transform",
        )

    def test_categories_not_modified(self):
        """Test that the categories from fit are not changed in transform."""

        df_train = d.create_df_1()
        df_test = d.create_df_7()

        x = OneHotEncodingTransformer(columns=["a", "b"], verbose=False)
        x2 = OneHotEncodingTransformer(columns=["a", "b"], verbose=False)

        x.fit(df_train)
        x2.fit(df_train)

        x.transform(df_test)

        h.assert_equal_dispatch(
            expected=list(x2.categories_[0]),
            actual=list(x.categories_[0]),
            msg="categories_ (index 0) modified during transform",
        )

        h.assert_equal_dispatch(
            expected=list(x2.categories_[1]),
            actual=list(x.categories_[1]),
            msg="categories_ (index 1) modified during transform",
        )

    def test_renaming_feature_works_as_expected(self):
        """Test OneHotEncodingTransformer.transform() is renaming features correctly."""

        df = d.create_df_7()
        df = df[["b", "c"]]

        x = OneHotEncodingTransformer(
            columns=["b", "c"], separator="|", drop_original=True
        )

        x.fit(df)

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=["b|x", "b|y", "b|z", "c|a", "c|b", "c|c"],
            actual=list(df_transformed.columns.values),
            msg="renaming columns feature in OneHotEncodingTransformer.transform",
        )

    def test_warning_generated_by_unseen_categories(self):
        """Test OneHotEncodingTransformer.transform triggers a warning for unseen categories."""

        df_train = d.create_df_7()
        df_test = d.create_df_8()

        x = OneHotEncodingTransformer(verbose=True)

        x.fit(df_train)

        with pytest.warns(Warning):

            x.transform(df_test)

    @pytest.mark.parametrize(
        "df_test, expected",
        h.row_by_row_params(d.create_df_8(), expected_df_2())
        + h.index_preserved_params(d.create_df_8(), expected_df_2()),
    )
    def test_unseen_categories_encoded_as_all_zeroes(self, df_test, expected):
        """Test OneHotEncodingTransformer.transform encodes unseen categories correctly (all 0s)."""

        # transformer is fit on the whole dataset separately from the input df to work with the decorators
        df_train = d.create_df_7()
        x = OneHotEncodingTransformer(columns=["a", "b", "c"], verbose=False)
        x.fit(df_train)

        df_transformed = x.transform(df_test)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="unseen category rows not encoded as 0s",
        )

    def test_original_columns_dropped_when_specified(self):
        """Test OneHotEncodingTransformer.transform drops original columns get when specified."""

        df = d.create_df_7()

        x = OneHotEncodingTransformer(columns=["a", "b", "c"], drop_original=True)

        x.fit(df)

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=["a", "b", "c"],
            actual=[
                x for x in df.columns.values if x not in df_transformed.columns.values
            ],
            msg="original columns not dropped",
        )

    def test_original_columns_kept_when_specified(self):
        """Test OneHotEncodingTransformer.transform keeps original columns when specified."""

        df = d.create_df_7()

        x = OneHotEncodingTransformer(drop_original=False)

        x.fit(df)

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=list(set()),
            actual=list(set(["a", "b", "c"]) - set(df_transformed.columns)),
            msg="original columns not kept",
        )