def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_datediff_test_df()

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="Y",
            units="Y",
            copy=True,
            verbose=False,
        )

        expected_call_args = {
            0: {
                "args": (d.create_datediff_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_datediff_test_df(),
        ):

            x.transform(df)
    def test_attributes_unchanged_by_transform(self):
        """Test that attributes set in init are unchanged by the transform method."""

        df = d.create_datediff_test_df()

        x = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        x2 = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        x.transform(df)

        assert (
            x.new_column_name == x2.new_column_name
        ), "new_column_name changed by SeriesDtMethodTransformer.transform"
        assert (
            x.pd_method_name == x2.pd_method_name
        ), "pd_method_name changed by SeriesDtMethodTransformer.transform"
        assert (x.columns == x2.columns
                ), "columns changed by SeriesDtMethodTransformer.transform"
        assert (
            x.pd_method_kwargs == x2.pd_method_kwargs
        ), "pd_method_kwargs changed by SeriesDtMethodTransformer.transform"
    def expected_df_3():
        """Expected output of test_expected_output_callable."""

        df = d.create_datediff_test_df()

        df["b_new"] = df["b"].dt.to_period("M")

        return df
    def expected_df_2():
        """Expected output of test_expected_output_overwrite."""

        df = d.create_datediff_test_df()

        df["a"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985]

        return df
    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_datediff_test_df()

        x = SeriesDtMethodTransformer(new_column_name="a2",
                                      pd_method_name="year",
                                      column="a")

        expected_call_args = {
            0: {
                "args": (d.create_datediff_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)
Esempio n. 6
0
    def test_super_transform_call(self, mocker):
        """Test the call to BaseTransformer.transform is as expected."""

        df = d.create_datediff_test_df()

        to_dt = ToDatetimeTransformer(column="a", new_column_name="Y")

        expected_call_args = {
            0: {
                "args": (d.create_datediff_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_datediff_test_df(),
        ):

            to_dt.transform(df)
class TestTransform(object):
    """Tests for DateDifferenceTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output_units_Y."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "Y": [
                26.59340677135105,
                19.76757257798535,
                0.0,
                0.08487511721664373,
                -0.08236536912690427,
                -2.915756882984136,
                -3.082769210410435,
                29.999247075573077,
            ],
        })
        return df

    def expected_df_2():
        """Expected output for test_expected_output_units_M."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "M": [
                319.12088125621256,
                237.21087093582423,
                0.0,
                1.0185014065997249,
                -0.9883844295228512,
                -34.989082595809634,
                -36.993230524925224,
                359.9909649068769,
            ],
        })
        return df

    def expected_df_3():
        """Expected output for test_expected_output_units_D."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "D": [
                9713.042372685186,
                7219.957627314815,
                0.0,
                31.0,
                -30.083333333333332,
                -1064.9583333333333,
                -1125.9583333333333,
                10957.0,
            ],
        })
        return df

    def expected_df_4():
        """Expected output for test_expected_output_units_h."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "h": [
                233113.01694444445,
                173278.98305555555,
                0.0,
                744.0,
                -722.0,
                -25559.0,
                -27023.0,
                262968.0,
            ],
        })
        return df

    def expected_df_5():
        """Expected output for test_expected_output_units_m."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "m": [
                13986781.016666668,
                10396738.983333332,
                0.0,
                44640.0,
                -43320.0,
                -1533540.0,
                -1621380.0,
                15778080.0,
            ],
        })
        return df

    def expected_df_6():
        """Expected output for test_expected_output_units_s."""

        df = pd.DataFrame({
            "a": [
                datetime.datetime(1993, 9, 27, 11, 58, 58),
                datetime.datetime(2000, 3, 19, 12, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 11, 59, 59),
                datetime.datetime(2018, 10, 10, 10, 59, 59),
                datetime.datetime(2018, 12, 10, 11, 59, 59),
                datetime.datetime(1985, 7, 23, 11, 59, 59),
            ],
            "b": [
                datetime.datetime(2020, 5, 1, 12, 59, 59),
                datetime.datetime(2019, 12, 25, 11, 58, 58),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 11, 10, 11, 59, 59),
                datetime.datetime(2018, 9, 10, 9, 59, 59),
                datetime.datetime(2015, 11, 10, 11, 59, 59),
                datetime.datetime(2015, 11, 10, 12, 59, 59),
                datetime.datetime(2015, 7, 23, 11, 59, 59),
            ],
            "s": [
                839206861.0,
                623804339.0,
                0.0,
                2678400.0,
                -2599200.0,
                -92012400.0,
                -97282800.0,
                946684800.0,
            ],
        })
        return df

    def expected_df_7():
        """Expected output for test_expected_output_nulls."""

        df = pd.DataFrame(
            {
                "a": [
                    datetime.datetime(1993, 9, 27, 11, 58, 58),
                    np.NaN,
                ],
                "b": [
                    np.NaN,
                    datetime.datetime(2019, 12, 25, 11, 58, 58),
                ],
                "Y": [
                    np.NaN,
                    np.NaN,
                ],
            },
            index=[0, 1],
        )

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=DateDifferenceTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_datediff_test_df()

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="Y",
            units="Y",
            copy=True,
            verbose=False,
        )

        expected_call_args = {
            0: {
                "args": (d.create_datediff_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_datediff_test_df(),
        ):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_1()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_1()),
    )
    def test_expected_output_units_Y(self, df, expected):
        """Test that the output is expected from transform, when units is Y.

        This tests positive year gaps and negative year gaps.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="Y",
            units="Y",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_2()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_2()),
    )
    def test_expected_output_units_M(self, df, expected):
        """Test that the output is expected from transform, when units is M.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="M",
            units="M",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_3()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_3()),
    )
    def test_expected_output_units_D(self, df, expected):
        """Test that the output is expected from transform, when units is D.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="D",
            units="D",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_4()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_4()),
    )
    def test_expected_output_units_h(self, df, expected):
        """Test that the output is expected from transform, when units is h.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="h",
            units="h",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_5()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_5()),
    )
    def test_expected_output_units_m(self, df, expected):
        """Test that the output is expected from transform, when units is m.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="m",
            units="m",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_6()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_6()),
    )
    def test_expected_output_units_s(self, df, expected):
        """Test that the output is expected from transform, when units is s.

        This tests positive month gaps, negative month gaps, and missing values.

        """

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="s",
            units="s",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_equal_dispatch(
            expected=expected,
            actual=df_transformed,
            msg="Unexpected values in DateDifferenceYearTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_nulls_df(), expected_df_7())
        + h.index_preserved_params(d.create_datediff_test_nulls_df(),
                                   expected_df_7()),
    )
    def test_expected_output_nulls(self, df, expected):
        """Test that the output is expected from transform, when columns are nulls."""

        x = DateDifferenceTransformer(
            column_lower="a",
            column_upper="b",
            new_column_name="Y",
            units="Y",
            copy=True,
            verbose=False,
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in DateDifferenceTransformer.transform (nulls)",
        )
class TestTransform(object):
    """Tests for SeriesDtMethodTransformer.transform()."""
    def expected_df_1():
        """Expected output of test_expected_output_no_overwrite."""

        df = d.create_datediff_test_df()

        df["a_year"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985]

        return df

    def expected_df_2():
        """Expected output of test_expected_output_overwrite."""

        df = d.create_datediff_test_df()

        df["a"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985]

        return df

    def expected_df_3():
        """Expected output of test_expected_output_callable."""

        df = d.create_datediff_test_df()

        df["b_new"] = df["b"].dt.to_period("M")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=SeriesDtMethodTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_datediff_test_df()

        x = SeriesDtMethodTransformer(new_column_name="a2",
                                      pd_method_name="year",
                                      column="a")

        expected_call_args = {
            0: {
                "args": (d.create_datediff_test_df(), ),
                "kwargs": {}
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "transform", expected_call_args):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_1()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_1()),
    )
    def test_expected_output_no_overwrite(self, df, expected):
        """Test a single column output from transform gives expected results, when not overwriting the original column."""

        x = SeriesDtMethodTransformer(
            new_column_name="a_year",
            pd_method_name="year",
            column="a",
            pd_method_kwargs={},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesDtMethodTransformer.transform with find, not overwriting original column",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_2()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_2()),
    )
    def test_expected_output_overwrite(self, df, expected):
        """Test a single column output from transform gives expected results, when overwriting the original column."""

        x = SeriesDtMethodTransformer(
            new_column_name="a",
            pd_method_name="year",
            column="a",
            pd_method_kwargs={},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesDtMethodTransformer.transform with pad, overwriting original column",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_datediff_test_df(), expected_df_3()) +
        h.index_preserved_params(d.create_datediff_test_df(), expected_df_3()),
    )
    def test_expected_output_callable(self, df, expected):
        """Test transform gives expected results, when pd_method_name is a callable."""

        x = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in SeriesDtMethodTransformer.transform with to_period",
        )

    def test_attributes_unchanged_by_transform(self):
        """Test that attributes set in init are unchanged by the transform method."""

        df = d.create_datediff_test_df()

        x = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        x2 = SeriesDtMethodTransformer(
            new_column_name="b_new",
            pd_method_name="to_period",
            column="b",
            pd_method_kwargs={"freq": "M"},
        )

        x.transform(df)

        assert (
            x.new_column_name == x2.new_column_name
        ), "new_column_name changed by SeriesDtMethodTransformer.transform"
        assert (
            x.pd_method_name == x2.pd_method_name
        ), "pd_method_name changed by SeriesDtMethodTransformer.transform"
        assert (x.columns == x2.columns
                ), "columns changed by SeriesDtMethodTransformer.transform"
        assert (
            x.pd_method_kwargs == x2.pd_method_kwargs
        ), "pd_method_kwargs changed by SeriesDtMethodTransformer.transform"