def test_expected_output(self, df, expected):
        """Test that transform then inverse_transform gets back to the original df."""

        x = NominalToIntegerTransformer(columns=["a", "b"])

        # set the mapping dict directly rather than fitting x on df so test works with helpers
        x.mappings = {
            "a": {
                1: 0,
                2: 1,
                3: 2,
                4: 3,
                5: 4,
                6: 5
            },
            "b": {
                "a": 0,
                "b": 1,
                "c": 2,
                "d": 3,
                "e": 4,
                "f": 5
            },
        }

        df_transformed = x.transform(df)

        df_transformed_back = x.inverse_transform(df_transformed)

        h.assert_frame_equal_msg(
            actual=df_transformed_back,
            expected=expected,
            msg_tag="transform reverse does not get back to original",
        )
    def test_expected_output(self, df, expected):
        """Test that the output is expected from transform."""

        x = NominalToIntegerTransformer(columns=["a", "b"])

        # set the mapping dict directly rather than fitting x on df so test works with helpers
        x.mappings = {
            "a": {
                1: 0,
                2: 1,
                3: 2,
                4: 3,
                5: 4,
                6: 5
            },
            "b": {
                "a": 0,
                "b": 1,
                "c": 2,
                "d": 3,
                "e": 4,
                "f": 5
            },
        }

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in NominalToIntegerTransformer.transform",
        )
Example #3
0
    def test_non_cap_column_left_untouched(self, df, expected):
        """Test that capping is applied only to specific columns, others remain the same."""

        x = CappingTransformer(capping_values={"a": [2, 10]})

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed",
        )
Example #4
0
def test_exception_no_print():
    """Test an assert error is raised (with correct info) in case of exception coming from assert_frame_equal and
    print_actual_and_expected is False.
    """

    df = pd.DataFrame({"a": [1, 2, 3]})
    df2 = pd.DataFrame({"a": [1, 2, 4]})

    with pytest.raises(AssertionError, match="a"):

        h.assert_frame_equal_msg(
            expected=df, actual=df2, msg_tag="a", print_actual_and_expected=False
        )
Example #5
0
    def test_expected_output(self, df, expected):
        """Test that transform is giving the expected output."""

        mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column mapping transformer",
        )
Example #6
0
    def test_expected_output(self, df, expected):
        """Test that transform is giving the expected output."""

        mapping = {"b": {"a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6}}

        x = CrossColumnMultiplyTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column multiply transformer",
        )
Example #7
0
    def test_non_specified_values_unchanged(self, df, expected):
        """Test that values not specified in mappings are left unchanged in transform."""

        mapping = {"a": {1: 5, 2: 6, 3: 7}, "b": {"a": "z", "b": "y", "c": "x"}}

        x = MappingTransformer(mappings=mapping)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from mapping transformer",
        )
Example #8
0
    def test_expected_output_min_and_max_combinations(self, df, expected):
        """Test that capping is applied correctly in transform."""

        x = CappingTransformer(
            capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]}
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in CappingTransformer.transform",
        )
Example #9
0
    def test_non_specified_values_unchanged(self, df, expected):
        """Test that values not specified in mappings are left unchanged in transform."""

        mapping = {"b": {"a": 1.1, "b": 1.2}}

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column add transformer",
        )
Example #10
0
    def test_multiple_mappings_expected_output(self, df, expected):
        """Test that mappings by multiple columns are both applied in transform"""

        mapping = {"b": {"a": 1.1, "f": 1.2}, "c": {"a": 2, "e": 3}}

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column add transformer",
        )
Example #11
0
    def test_expected_output_no_weight(self, df, expected):
        """Test that the output is expected from transform."""

        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a"]}

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )
Example #12
0
    def test_expected_output_nulls(self, df, expected):
        """Test that the output is expected from transform, when columns are nulls."""

        x = DateDiffLeapYearTransformer(column_lower="a",
                                        column_upper="b",
                                        new_column_name="c",
                                        drop_cols=False)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in DateDiffLeapYearTransformer.transform (nulls)",
        )
Example #13
0
    def test_multiple_mappings_ordered_dict(self, df, expected):
        """Test that mappings by multiple columns using an ordered dict gives the expected output in transform"""

        mapping = OrderedDict()

        mapping["a"] = {1: "aa", 2: "bb"}
        mapping["b"] = {"x": "cc", "z": "dd"}

        x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="c")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column mapping transformer",
        )
Example #14
0
    def test_expected_output_no_weight_single_row_na(self):
        """test output from a single row transform with np.NaN value remains the same,
        the type is perserved if using existing dataframe, so need to create a new dataframe"""

        one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]})
        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]}

        one_row_df_transformed = x.transform(one_row_df)

        h.assert_frame_equal_msg(
            actual=one_row_df_transformed,
            expected=one_row_df,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )
Example #15
0
    def test_expected_output(self, df, expected):
        """Test that transform is giving the expected output."""

        mapping = {
            "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"},
            "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6},
        }

        x = MappingTransformer(mappings=mapping)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from mapping transformer",
        )
Example #16
0
    def test_base_nominal_transformer_transform_called(self, mocker):
        """Test that BaseNominalTransformer.transform called."""

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        x.fit(df)

        mocker.patch(
            "tubular.nominal.BaseNominalTransformer.transform",
            return_value=d.create_df_1(),
        )

        x.transform(df)

        assert (
            tubular.nominal.BaseNominalTransformer.transform.call_count == 1
        ), f"Not enough calls to BaseNominalTransformer.transform -\n  Expected: 1\n  Actual: {tubular.nominal.BaseNominalTransformer.transform.call_count}"

        call_args = tubular.nominal.BaseNominalTransformer.transform.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        h.assert_equal_dispatch(
            expected={},
            actual=call_kwargs,
            msg="kwargs for BaseNominalTransformer.transform in OneHotEncodingTransformer.init",
        )

        expected_pos_args = (x, d.create_df_1())

        assert (
            len(call_pos_args) == 2
        ), f"Unepxected number of positional args in BaseNominalTransformer.transform call -\n  Expected: 2\n  Actual: {len(call_pos_args)}"

        h.assert_frame_equal_msg(
            expected=expected_pos_args[1],
            actual=call_pos_args[1],
            msg_tag="X positional arg in BaseNominalTransformer.transform call",
        )

        assert (
            expected_pos_args[0] == call_pos_args[0]
        ), "self positional arg in BaseNominalTransformer.transform call"
Example #17
0
    def test_expected_output(self, df_test, expected):
        """Test that OneHotEncodingTransformer.transform encodes the feature correctly.

        Also tests that OneHotEncodingTransformer.transform does not modify unrelated columns.
        """

        # transformer is fit on the whole dataset separately from the input df to work with the decorators
        df_train = d.create_df_7()
        x = OneHotEncodingTransformer(columns="b")
        x.fit(df_train)

        df_transformed = x.transform(df_test)

        h.assert_frame_equal_msg(
            expected=expected,
            actual=df_transformed,
            msg_tag="Unspecified columns changed in transform",
        )
    def test_expected_output_no_overwrite(self, df, expected):
        """Test a single column output from transform gives expected results, when not overwriting the original column."""

        x = SeriesDtMethodTransformer(
            new_column_name="a_year",
            pd_method_name="year",
            column="a",
            pd_method_kwargs={},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesDtMethodTransformer.transform with find, not overwriting original column",
        )
    def test_expected_output_callable(self, df, expected):
        """Test transform gives expected results, when pd_method_name is a callable."""

        x = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesDtMethodTransformer.transform with to_period",
        )
Example #20
0
    def test_expected_output_overwrite(self, df, expected):
        """Test a single column output from transform gives expected results, when overwriting the original column."""

        x = SeriesStrMethodTransformer(
            new_column_name="b",
            pd_method_name="pad",
            columns=["b"],
            pd_method_kwargs={"width": 10},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesStrMethodTransformer.transform with pad, overwriting original column",
        )
    def test_expected_output_nulls(self, df, expected):
        """Test that the output is expected from transform, when columns are nulls."""

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="Y",
            units="Y",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in DateDifferenceTransformer.transform (nulls)",
        )
Example #22
0
    def test_expected_output_drop_cols_false(self, df, expected):
        """Test that the output is expected from transform, when drop_cols is False.

        This tests positive year gaps , negative year gaps, and missing values.

        """

        x = DateDiffLeapYearTransformer(column_lower="a",
                                        column_upper="b",
                                        new_column_name="c",
                                        drop_cols=False)

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in DateDiffLeapYearTransformer.transform (without drop_cols)",
        )
Example #23
0
    def test_one_hot_encoder_transform_called(self, mocker):
        """Test that OneHotEncoder.transform called."""

        df = d.create_df_1()

        x = OneHotEncodingTransformer(columns="b")

        x.fit(df)

        mocker.patch("sklearn.preprocessing.OneHotEncoder.transform")

        x.transform(df)

        assert (
            sklearn.preprocessing.OneHotEncoder.transform.call_count == 1
        ), f"Not enough calls to OneHotEncoder.transform -\n  Expected: 1\n  Actual: {sklearn.preprocessing.OneHotEncoder.transform.call_count}"

        call_args = sklearn.preprocessing.OneHotEncoder.transform.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        h.assert_equal_dispatch(
            expected={},
            actual=call_kwargs,
            msg="kwargs for OneHotEncodingTransformer.transform in BaseTransformer.init",
        )

        assert (
            len(call_pos_args) == 2
        ), f"Unepxected number of positional args in OneHotEncodingTransformer.transform call -\n  Expected: 2\n  Actual: {len(call_pos_args)}"

        assert (
            call_pos_args[0] is x
        ), f"Unexpected positional arg (self, index 1) in OneHotEncodingTransformer.transform call -\n  Expected: self\n  Actual: {call_pos_args[0]}"

        h.assert_frame_equal_msg(
            expected=d.create_df_1()[["b"]],
            actual=call_pos_args[1],
            msg_tag="X positional arg in OneHotEncodingTransformer.transform call",
        )
Example #24
0
def test_pandas_assert_frame_called(mocker):
    """Test the call to pandas.testing.assert_frame_equal."""

    df = pd.DataFrame({"a": [1, 2, 3]})
    df2 = pd.DataFrame({"a": [1, 2, 3]})

    spy = mocker.spy(pandas.testing, "assert_frame_equal")

    h.assert_frame_equal_msg(expected=df, actual=df2, msg_tag="a", check_dtype=True)

    assert (
        spy.call_count == 1
    ), f"Unexpected number of call to pd.testing.assert_frame_equal -\n  Expected: 1\n  Actual: {spy.call_count}"

    call_1_args = spy.call_args_list[0]
    call_1_pos_args = call_1_args[0]
    call_1_kwargs = call_1_args[1]

    call_1_expected_kwargs = {"check_dtype": True}

    call_1_expected_pos_args = (df, df2)

    assert len(call_1_expected_kwargs.keys()) == len(
        call_1_kwargs.keys()
    ), f"Unexpected number of kwargs -\n  Expected: {len(call_1_expected_kwargs.keys())}\n  Actual: {len(call_1_kwargs.keys())}"

    assert (
        call_1_expected_kwargs["check_dtype"] == call_1_kwargs["check_dtype"]
    ), f"""check_dtype kwarg unexpected -\n  Expected {call_1_expected_kwargs['check_dtype']}\n  Actual: {call_1_kwargs['check_dtype']}"""

    assert len(call_1_expected_pos_args) == len(
        call_1_pos_args
    ), f"Unexpected number of kwargs -\n  Expected: {len(call_1_expected_pos_args)}\n  Actual: {len(call_1_pos_args)}"

    pd.testing.assert_frame_equal(call_1_expected_pos_args[0], call_1_pos_args[0])

    pd.testing.assert_frame_equal(call_1_expected_pos_args[1], call_1_pos_args[1])
Example #25
0
    def test_expected_output(self, df, expected):
        """Test that the output is expected from transform."""

        x = OrdinalEncoderTransformer(response_column="a",
                                      columns=["b", "d", "f"])

        # set the impute values dict directly rather than fitting x on df so test works with helpers
        x.mappings = {
            "b": {
                "a": 1,
                "b": 2,
                "c": 3,
                "d": 4,
                "e": 5,
                "f": 6
            },
            "d": {
                1: 1,
                2: 2,
                3: 3,
                4: 4,
                5: 5,
                6: 6
            },
            "f": {
                False: 1,
                True: 2
            },
        }

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in OrdinalEncoderTransformer.transform",
        )