Example #1
0
    def test_super_fit_call(self, mocker):
        """Test the call to CappingTransformer.fit."""

        spy = mocker.spy(tubular.capping.CappingTransformer, "fit")

        df = d.create_df_9()

        x = OutOfRangeNullTransformer(quantiles={
            "a": [0.1, 1],
            "b": [0.5, None]
        },
                                      weights_column="c")

        x.fit(df)

        assert (spy.call_count == 1
                ), "unexpected number of calls to CappingTransformer.fit"

        call_args = spy.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        assert call_pos_args == (
            x, ), "unexpected positional args in CappingTransformer.fit call"

        expected_kwargs = {"X": d.create_df_9(), "y": None}

        h.assert_equal_dispatch(
            expected=expected_kwargs,
            actual=call_kwargs,
            msg="unexpected kwargs in CappingTransformer.fit call",
        )
    def test_pd_cut_call(self, mocker):
        """Test the call to pd.cut is as expected."""

        df = d.create_df_9()

        x = CutTransformer(
            column="a",
            new_column_name="a_cut",
            cut_kwargs={
                "bins": 3,
                "right": False,
                "precision": 2
            },
        )

        expected_call_args = {
            0: {
                "args": (d.create_df_9()["a"], ),
                "kwargs": {
                    "bins": 3,
                    "right": False,
                    "precision": 2
                },
            }
        }

        with h.assert_function_call(mocker,
                                    pandas,
                                    "cut",
                                    expected_call_args,
                                    return_value=[1, 2, 3, 4, 5, 6]):

            x.transform(df)
Example #3
0
    def test_prepare_quantiles_call_weight(self, mocker):
        """Test the call to prepare_quantiles if weights_column is set."""

        df = d.create_df_9()

        x = CappingTransformer(
            quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column="c"
        )

        expected_call_args = {
            0: {
                "args": (d.create_df_9()["a"], [0.1, 1], d.create_df_9()["c"]),
                "kwargs": {},
            },
            1: {
                "args": (d.create_df_9()["b"], [0.5, None], d.create_df_9()["c"]),
                "kwargs": {},
            },
        }

        with h.assert_function_call(
            mocker,
            tubular.capping.CappingTransformer,
            "prepare_quantiles",
            expected_call_args,
        ):

            x.fit(df)
class TestTransform(object):
    """Tests for NullIndicator.transform()"""
    def expected_df_1():
        """Expected output for test_null_indicator_columns_correct."""

        df = pd.DataFrame({
            "a": [1, 2, np.nan, 4, np.nan, 6],
            "b": [np.nan, 5, 4, 3, 2, 1],
            "c": [3, 2, 1, 4, 5, 6],
            "b_nulls": [1, 0, 0, 0, 0, 0],
            "c_nulls": [0, 0, 0, 0, 0, 0],
        })

        df[["b_nulls", "c_nulls"]] = df[["b_nulls", "c_nulls"]].astype("int32")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=NullIndicator.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_1()

        x = NullIndicator(columns="a")

        expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_9(), expected_df_1()) +
        h.index_preserved_params(d.create_df_9(), expected_df_1()),
    )
    def test_null_indicator_columns_correct(self, df, expected):
        """Test that the created indicator column is correct - and unrelated columns are unchanged"""

        x = NullIndicator(columns=["b", "c"])

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Check null indicator columns created correctly in transform.",
        )
Example #5
0
    def test_prepare_quantiles_output_set_attributes(self, mocker, weights_column):
        """Test the output of prepare_quantiles is set to capping_values and_replacement_values attributes."""

        df = d.create_df_9()

        x = CappingTransformer(
            quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column=weights_column
        )

        mocked_return_values = [["aaaa", "bbbb"], [1234, None]]

        mocker.patch(
            "tubular.capping.CappingTransformer.prepare_quantiles",
            side_effect=mocked_return_values,
        )

        x.fit(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={
                "capping_values": {
                    "a": mocked_return_values[0],
                    "b": mocked_return_values[1],
                },
                "_replacement_values": {
                    "a": mocked_return_values[0],
                    "b": mocked_return_values[1],
                },
            },
            msg="weighted_quantile output set to capping_values, _replacement_values attributes",
        )
Example #6
0
    def test_super_fit_call(self, mocker):
        """Test the call to BaseTransformer.fit."""

        df = d.create_df_9()

        x = CappingTransformer(
            quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column="c"
        )

        expected_call_args = {0: {"args": (d.create_df_9(), None), "kwargs": {}}}

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "fit", expected_call_args
        ):

            x.fit(df)
Example #7
0
    def test_attributes_unchanged_from_transform(self):
        """Test that attributes are unchanged after transform is run."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x.fit(df)

        x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x2.fit(df)

        x2.transform(df)

        assert (
            x.capping_values == x2.capping_values
        ), "capping_values attribute modified in transform"
        assert (
            x._replacement_values == x2._replacement_values
        ), "_replacement_values attribute modified in transform"
        assert (
            x.weights_column == x2.weights_column
        ), "weights_column attribute modified in transform"
        assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"
    def expected_df_1():
        """Expected output for test_expected_output."""

        df = d.create_df_9()

        df["d"] = pd.Series(["c", "b", "a", "d", "e", "f"], dtype="category")

        return df
    def test_super_transform_call(self, mocker):
        """Test the call to BaseTransformer.transform is as expected."""

        df = d.create_df_9()

        x = CutTransformer(column="a",
                           new_column_name="Y",
                           cut_kwargs={"bins": 3})

        expected_call_args = {0: {"args": (d.create_df_9(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_9(),
        ):

            x.transform(df)
Example #10
0
    def test_quantile_not_fit_error(self):
        """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        with pytest.raises(
            ValueError,
            match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)
Example #11
0
    def test_fit_returns_self(self):
        """Test fit returns self?"""

        df = d.create_df_9()

        x = OutOfRangeNullTransformer(quantiles={
            "a": [0.1, 1],
            "b": [0.5, None]
        },
                                      weights_column="c")

        x_fitted = x.fit(df)

        assert (
            x_fitted is x
        ), "Returned value from OutOfRangeNullTransformer.fit not as expected."
Example #12
0
    def test_replacement_values_dict_not_set_error(self):
        """Test that transform will raise an error if _replacement_values is an empty dict."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        # manually set attribute to get past the capping_values attribute is an empty dict exception
        x.capping_values = {"a": [1, 4]}

        with pytest.raises(
            ValueError,
            match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)
    def test_output_from_cut_assigned_to_column(self, mocker):
        """Test that the output from pd.cut is assigned to column with name new_column_name."""

        df = d.create_df_9()

        x = CutTransformer(column="c",
                           new_column_name="c_new",
                           cut_kwargs={"bins": 2})

        cut_output = [1, 2, 3, 4, 5, 6]

        mocker.patch("pandas.cut", return_value=cut_output)

        df_transformed = x.transform(df)

        assert (df_transformed["c_new"].tolist() == cut_output
                ), "unexpected values assigned to c_new column"
Example #14
0
    def test_quantile_combinations_handled(self, quantiles, weights_column):
        """Test that a given combination of None and non-None quantile values can be calculated successfully."""

        df = d.create_df_9()

        x = CappingTransformer(
            quantiles={"a": quantiles}, weights_column=weights_column
        )

        try:

            x.fit(df)

        except Exception as err:

            pytest.fail(
                f"unexpected exception when calling fit with quantiles {quantiles} - {err}"
            )
Example #15
0
    def test_set_replacement_values_called(self, mocker):
        """Test that init calls OutOfRangeNullTransformer.set_replacement_values during fit."""

        df = d.create_df_9()

        x = OutOfRangeNullTransformer(quantiles={
            "a": [0.1, 1],
            "b": [0.5, None]
        },
                                      weights_column="c")

        expected_call_args = {0: {"args": (), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.capping.OutOfRangeNullTransformer,
                "set_replacement_values",
                expected_call_args,
        ):

            x.fit(df)
class TestTransform(object):
    """Tests for CutTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output."""

        df = d.create_df_9()

        df["d"] = pd.Series(["c", "b", "a", "d", "e", "f"], dtype="category")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=CutTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_call(self, mocker):
        """Test the call to BaseTransformer.transform is as expected."""

        df = d.create_df_9()

        x = CutTransformer(column="a",
                           new_column_name="Y",
                           cut_kwargs={"bins": 3})

        expected_call_args = {0: {"args": (d.create_df_9(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_9(),
        ):

            x.transform(df)

    def test_pd_cut_call(self, mocker):
        """Test the call to pd.cut is as expected."""

        df = d.create_df_9()

        x = CutTransformer(
            column="a",
            new_column_name="a_cut",
            cut_kwargs={
                "bins": 3,
                "right": False,
                "precision": 2
            },
        )

        expected_call_args = {
            0: {
                "args": (d.create_df_9()["a"], ),
                "kwargs": {
                    "bins": 3,
                    "right": False,
                    "precision": 2
                },
            }
        }

        with h.assert_function_call(mocker,
                                    pandas,
                                    "cut",
                                    expected_call_args,
                                    return_value=[1, 2, 3, 4, 5, 6]):

            x.transform(df)

    def test_output_from_cut_assigned_to_column(self, mocker):
        """Test that the output from pd.cut is assigned to column with name new_column_name."""

        df = d.create_df_9()

        x = CutTransformer(column="c",
                           new_column_name="c_new",
                           cut_kwargs={"bins": 2})

        cut_output = [1, 2, 3, 4, 5, 6]

        mocker.patch("pandas.cut", return_value=cut_output)

        df_transformed = x.transform(df)

        assert (df_transformed["c_new"].tolist() == cut_output
                ), "unexpected values assigned to c_new column"

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_9(), expected_df_1()) +
        h.index_preserved_params(d.create_df_9(), expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test input data is transformed as expected."""

        cut_1 = CutTransformer(
            column="c",
            new_column_name="d",
            cut_kwargs={
                "bins": [0, 1, 2, 3, 4, 5, 6],
                "ordered": False,
                "labels": ["a", "b", "c", "d", "e", "f"],
            },
        )

        df_transformed = cut_1.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="CutTransformer.transform output",
        )

    def test_non_numeric_column_error(self):
        """Test that an exception is raised if the column to discretise is not numeric."""

        df = d.create_df_8()

        x = CutTransformer(column="b", new_column_name="d")

        with pytest.raises(TypeError,
                           match="b should be a numeric dtype but got object"):

            x.transform(df)
Example #17
0
class TestPrepareQuantiles(object):
    """Tests for the CappingTransformer.prepare_quantiles method."""

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=CappingTransformer.prepare_quantiles,
            expected_arguments=["self", "values", "quantiles", "sample_weight"],
            expected_default_values=(None,),
        )

    @pytest.mark.parametrize(
        "values, quantiles, sample_weight, expected_quantiles",
        [
            (d.create_df_9()["a"], [0.1, 0.6], d.create_df_9()["c"], [0.1, 0.6]),
            (d.create_df_9()["b"], [0.1, None], d.create_df_9()["c"], [0.1]),
            (d.create_df_9()["a"], [None, 0.6], d.create_df_9()["c"], [0.6]),
            (d.create_df_9()["b"], [0.1, 0.6], None, [0.1, 0.6]),
            (d.create_df_9()["a"], [0.1, None], None, [0.1]),
            (d.create_df_9()["b"], [None, 0.6], None, [0.6]),
        ],
    )
    def test_weighted_quantile_call(
        self, mocker, values, quantiles, sample_weight, expected_quantiles
    ):
        """Test the call to weighted_quantile, inlcuding the filtering out of None values."""

        x = CappingTransformer(quantiles={"a": [0.1, 1], "b": [0.5, None]})

        mocked = mocker.patch("tubular.capping.CappingTransformer.weighted_quantile")

        x.prepare_quantiles(values, quantiles, sample_weight)

        assert (
            mocked.call_count == 1
        ), f"unexpected number of calls to weighted_quantile, expecting 1 but got {mocked.call_count}"

        call_args = mocked.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        expected_pos_args = (values, expected_quantiles, sample_weight)

        assert (
            call_pos_args == expected_pos_args
        ), f"unexpected positional args in call to weighted_quantile, expecting {expected_pos_args} but got {call_pos_args}"

        assert (
            call_kwargs == {}
        ), f"unexpected kwargs in call to weighted_quantile, expecting None but got {call_kwargs}"

    @pytest.mark.parametrize(
        "values, quantiles, sample_weight, expected_results",
        [
            (d.create_df_9()["a"], [0.1, 0.6], d.create_df_9()["c"], ["aaaa"]),
            (d.create_df_9()["b"], [0.1, None], d.create_df_9()["c"], ["aaaa", None]),
            (d.create_df_9()["a"], [None, 0.6], d.create_df_9()["c"], [None, "aaaa"]),
            (d.create_df_9()["b"], [0.1, 0.6], None, ["aaaa"]),
            (d.create_df_9()["a"], [0.1, None], None, ["aaaa", None]),
            (d.create_df_9()["b"], [None, 0.6], None, [None, "aaaa"]),
        ],
    )
    def test_output_from_weighted_quantile_returned(
        self, mocker, values, quantiles, sample_weight, expected_results
    ):
        """Test the output from weighted_quantile is returned from the function, inlcuding None values added back in."""

        x = CappingTransformer(quantiles={"a": [0.1, 1], "b": [0.5, None]})

        mocker.patch(
            "tubular.capping.CappingTransformer.weighted_quantile",
            return_value=["aaaa"],
        )

        results = x.prepare_quantiles(values, quantiles, sample_weight)

        assert (
            results == expected_results
        ), f"unexpected value returned from prepare_quantiles, expecting {results} but got {expected_results}"