Exemple #1
0
    def test_prepare_quantiles_call_weight(self, mocker):
        """Test the call to prepare_quantiles if weights_column is set."""

        df = d.create_df_9()

        x = CappingTransformer(
            quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column="c"
        )

        expected_call_args = {
            0: {
                "args": (d.create_df_9()["a"], [0.1, 1], d.create_df_9()["c"]),
                "kwargs": {},
            },
            1: {
                "args": (d.create_df_9()["b"], [0.5, None], d.create_df_9()["c"]),
                "kwargs": {},
            },
        }

        with h.assert_function_call(
            mocker,
            tubular.capping.CappingTransformer,
            "prepare_quantiles",
            expected_call_args,
        ):

            x.fit(df)
Exemple #2
0
    def test_prepare_quantiles_output_set_attributes(self, mocker, weights_column):
        """Test the output of prepare_quantiles is set to capping_values and_replacement_values attributes."""

        df = d.create_df_9()

        x = CappingTransformer(
            quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column=weights_column
        )

        mocked_return_values = [["aaaa", "bbbb"], [1234, None]]

        mocker.patch(
            "tubular.capping.CappingTransformer.prepare_quantiles",
            side_effect=mocked_return_values,
        )

        x.fit(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={
                "capping_values": {
                    "a": mocked_return_values[0],
                    "b": mocked_return_values[1],
                },
                "_replacement_values": {
                    "a": mocked_return_values[0],
                    "b": mocked_return_values[1],
                },
            },
            msg="weighted_quantile output set to capping_values, _replacement_values attributes",
        )
Exemple #3
0
    def test_attributes_unchanged_from_transform(self):
        """Test that attributes are unchanged after transform is run."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x.fit(df)

        x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        x2.fit(df)

        x2.transform(df)

        assert (
            x.capping_values == x2.capping_values
        ), "capping_values attribute modified in transform"
        assert (
            x._replacement_values == x2._replacement_values
        ), "_replacement_values attribute modified in transform"
        assert (
            x.weights_column == x2.weights_column
        ), "weights_column attribute modified in transform"
        assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"
Exemple #4
0
    def test_weighted_quantile_call(
        self, mocker, values, quantiles, sample_weight, expected_quantiles
    ):
        """Test the call to weighted_quantile, inlcuding the filtering out of None values."""

        x = CappingTransformer(quantiles={"a": [0.1, 1], "b": [0.5, None]})

        mocked = mocker.patch("tubular.capping.CappingTransformer.weighted_quantile")

        x.prepare_quantiles(values, quantiles, sample_weight)

        assert (
            mocked.call_count == 1
        ), f"unexpected number of calls to weighted_quantile, expecting 1 but got {mocked.call_count}"

        call_args = mocked.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        expected_pos_args = (values, expected_quantiles, sample_weight)

        assert (
            call_pos_args == expected_pos_args
        ), f"unexpected positional args in call to weighted_quantile, expecting {expected_pos_args} but got {call_pos_args}"

        assert (
            call_kwargs == {}
        ), f"unexpected kwargs in call to weighted_quantile, expecting None but got {call_kwargs}"
Exemple #5
0
    def test_negative_values_in_weights_error(self):
        """Test that an exception is raised if there are negative values in sample_weight."""

        x = CappingTransformer(capping_values={"a": [2, 10]})

        with pytest.raises(ValueError, match="negative weights in sample weights"):

            x.weighted_quantile([2, 3, 4, 5], [0, 1], [2, -0.01])
Exemple #6
0
    def test_capping_values_both_null_error(self):
        """Test that an exception is raised if both capping_values are null."""

        x = CappingTransformer(capping_values={"a": [1, 3], "b": [None, -1]})

        with pytest.raises(ValueError, match="both values are None for key a"):

            x.check_capping_values_dict(
                capping_values_dict={"a": [None, None], "b": [None, 1]}, dict_name="eee"
            )
Exemple #7
0
    def test_zero_total_weight_error(self):
        """Test that an exception is raised if the total sample weights are 0."""

        x = CappingTransformer(capping_values={"a": [2, 10]})

        with pytest.raises(
            ValueError, match="total sample weights are not greater than 0"
        ):

            x.weighted_quantile([2, 3, 4, 5], [0, 1], [0, 0])
Exemple #8
0
    def test_non_cap_column_left_untouched(self, df, expected):
        """Test that capping is applied only to specific columns, others remain the same."""

        x = CappingTransformer(capping_values={"a": [2, 10]})

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed",
        )
Exemple #9
0
    def test_check_is_fitted_call_count(self, mocker):
        """Test there are 2 calls to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        with h.assert_function_call_count(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", 2
        ):

            x.transform(df)
Exemple #10
0
    def test_non_numeric_column_error(self):
        """Test that transform will raise an error if a column to transform is not numeric."""

        df = d.create_df_5()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]})

        with pytest.raises(
            TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]"
        ):

            x.transform(df)
Exemple #11
0
    def test_quantiles_none_error(self):
        """Test that an exception is raised if quantiles is None when fit is run."""

        with pytest.warns(
            UserWarning,
            match="quantiles not set so no fitting done in CappingTransformer",
        ):

            df = d.create_df_3()

            x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

            x.fit(df)
Exemple #12
0
    def test_capping_values_non_str_key_error(self):
        """Test that an exception is raised if capping_values_dict has any non str keys."""

        x = CappingTransformer(capping_values={"a": [1, 3], "b": [None, -1]})

        with pytest.raises(
            TypeError,
            match=r"all keys in bbb should be str, but got \<class 'int'\>",
        ):

            x.check_capping_values_dict(
                capping_values_dict={"a": [1, 3], 1: [None, -1]}, dict_name="bbb"
            )
Exemple #13
0
    def test_capping_values_non_list_item_error(self):
        """Test that an exception is raised if capping_values_dict has any non list items."""

        x = CappingTransformer(capping_values={"a": [1, 3], "b": [None, -1]})

        with pytest.raises(
            TypeError,
            match=r"each item in ccc should be a list, but got \<class 'tuple'\> for key b",
        ):

            x.check_capping_values_dict(
                capping_values_dict={"a": [1, 3], "b": (None, -1)}, dict_name="ccc"
            )
Exemple #14
0
    def test_capping_values_non_length_2_list_item_error(self):
        """Test that an exception is raised if capping_values_dict has any non length 2 list items."""

        x = CappingTransformer(capping_values={"a": [1, 3], "b": [None, -1]})

        with pytest.raises(
            ValueError,
            match="each item in ddd should be length 2, but got 1 for key b",
        ):

            x.check_capping_values_dict(
                capping_values_dict={"a": [1, 3], "b": [None]}, dict_name="ddd"
            )
Exemple #15
0
    def test_capping_values_non_numeric_error(self):
        """Test that an exception is raised if capping_values_dict contains any non-nulls and non-numeric values."""

        x = CappingTransformer(capping_values={"a": [1, 3], "b": [None, -1]})

        with pytest.raises(
            TypeError,
            match=r"each item in eee lists must contain numeric values or None, got \<class 'str'\> for key a",
        ):

            x.check_capping_values_dict(
                capping_values_dict={"b": [1, 3], "a": [None, "a"]}, dict_name="eee"
            )
Exemple #16
0
    def test_lower_value_gte_upper_value_error(self):
        """Test that an exception is raised if capping_values_dict[0] >= capping_values_dict[1]."""

        x = CappingTransformer(capping_values={"a": [1, 2], "b": [None, -1]})

        with pytest.raises(
            ValueError,
            match="lower value is greater than or equal to upper value for key a",
        ):

            x.check_capping_values_dict(
                capping_values_dict={"a": [4, 3], "b": [None, -1]}, dict_name="eee"
            )
Exemple #17
0
    def test_capping_value_nan_inf_error(self, value):
        """Test that an exception is raised if capping_values are np.nan or np.inf values."""

        x = CappingTransformer(capping_values={"a": [1, 3], "b": [None, 1]})

        with pytest.raises(
            ValueError,
            match="item in eee lists contains numpy NaN or Inf values",
        ):

            x.check_capping_values_dict(
                capping_values_dict={"b": [1, 3], "a": [None, value]}, dict_name="eee"
            )
Exemple #18
0
    def test_capping_values_not_dict_error(self):
        """Test that an exception is raised if capping_values_dict is not a dict."""

        x = CappingTransformer(capping_values={"a": [1, 3], "b": [None, -1]})

        with pytest.raises(
            TypeError,
            match="aaa should be dict of columns and capping values",
        ):

            x.check_capping_values_dict(
                capping_values_dict=("a", [1, 3], "b", [None, -1]), dict_name="aaa"
            )
Exemple #19
0
    def test_quantile_not_fit_error(self):
        """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        with pytest.raises(
            ValueError,
            match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)
Exemple #20
0
    def test_expected_output_min_and_max_combinations(self, df, expected):
        """Test that capping is applied correctly in transform."""

        x = CappingTransformer(
            capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]}
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform",
        )
Exemple #21
0
    def test_super_fit_call(self, mocker):
        """Test the call to BaseTransformer.fit."""

        df = d.create_df_9()

        x = CappingTransformer(
            quantiles={"a": [0.1, 1], "b": [0.5, None]}, weights_column="c"
        )

        expected_call_args = {0: {"args": (d.create_df_9(), None), "kwargs": {}}}

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "fit", expected_call_args
        ):

            x.fit(df)
Exemple #22
0
    def test_replacement_values_dict_not_set_error(self):
        """Test that transform will raise an error if _replacement_values is an empty dict."""

        df = d.create_df_9()

        x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]})

        # manually set attribute to get past the capping_values attribute is an empty dict exception
        x.capping_values = {"a": [1, 4]}

        with pytest.raises(
            ValueError,
            match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet",
        ):

            x.transform(df)
Exemple #23
0
    def test_learnt_values_not_modified(self):
        """Test that the replacements from fit are not changed in transform."""

        capping_values_dict = {"a": [2, 5], "b": [-1, 8]}

        df = d.create_df_3()

        x = CappingTransformer(capping_values_dict)

        x.transform(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={"capping_values": capping_values_dict},
            msg="Attributes for CappingTransformer set in init",
        )
Exemple #24
0
    def test_output_from_weighted_quantile_returned(
        self, mocker, values, quantiles, sample_weight, expected_results
    ):
        """Test the output from weighted_quantile is returned from the function, inlcuding None values added back in."""

        x = CappingTransformer(quantiles={"a": [0.1, 1], "b": [0.5, None]})

        mocker.patch(
            "tubular.capping.CappingTransformer.weighted_quantile",
            return_value=["aaaa"],
        )

        results = x.prepare_quantiles(values, quantiles, sample_weight)

        assert (
            results == expected_results
        ), f"unexpected value returned from prepare_quantiles, expecting {results} but got {expected_results}"
Exemple #25
0
    def test_expected_output(
        self, values, sample_weight, quantiles, expected_quantiles
    ):
        """Test that weighted_quantile gives the expected outputs."""

        x = CappingTransformer(capping_values={"a": [2, 10]})

        values = pd.Series(values)

        actual = x.weighted_quantile(values, quantiles, sample_weight)

        # round to 1dp to avoid mismatches due to numerical precision
        actual_rounded_1_dp = list(np.round(actual, 1))

        assert (
            actual_rounded_1_dp == expected_quantiles
        ), "unexpected weighted quantiles calculated"
Exemple #26
0
    def test_check_is_fitted_call_1(self, mocker):
        """Test the first call to BaseTransformer check_is_fitted in transform."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {
            0: {"args": (["capping_values"],), "kwargs": {}},
            1: {"args": (["_replacement_values"],), "kwargs": {}},
        }

        with h.assert_function_call(
            mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args
        ):

            x.transform(df)
Exemple #27
0
    def test_quantile_combinations_handled(self, quantiles, weights_column):
        """Test that a given combination of None and non-None quantile values can be calculated successfully."""

        df = d.create_df_9()

        x = CappingTransformer(
            quantiles={"a": quantiles}, weights_column=weights_column
        )

        try:

            x.fit(df)

        except Exception as err:

            pytest.fail(
                f"unexpected exception when calling fit with quantiles {quantiles} - {err}"
            )
Exemple #28
0
    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_3()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]})

        expected_call_args = {0: {"args": (d.create_df_3(),), "kwargs": {}}}

        with h.assert_function_call(
            mocker,
            tubular.base.BaseTransformer,
            "transform",
            expected_call_args,
            return_value=d.create_df_3(),
        ):

            x.transform(df)
Exemple #29
0
    def test_capping_values_quantiles_both_none_error(self):
        """Test that an exception is raised if both capping_values and quantiles are passed as None."""

        with pytest.raises(
            ValueError,
            match="both capping_values and quantiles are None, either supply capping values in the "
            "capping_values argument or supply quantiles that can be learnt in the fit method",
        ):

            CappingTransformer(capping_values=None, quantiles=None)
Exemple #30
0
    def test_quantiles_outside_range_error(self, out_range_value):
        """Test that an exception is raised if quanties contain values outisde [0, 1] range."""

        with pytest.raises(
            ValueError,
            match=rf"quantile values must be in the range \[0, 1\] but got {out_range_value} for key f",
        ):

            CappingTransformer(
                quantiles={"e": [0.1, 0.9], "f": [out_range_value, None]}
            )