Esempio n. 1
0
    def it_can_apply_ordinal_encoder(
        self,
        request,
        columns,
        derived_columns,
        expected_new_columns,
    ):
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset = Dataset(data_file="fake/path0")
        sk_fit_transform_ = method_mock(request, sk_preproc.OrdinalEncoder,
                                        "fit_transform")
        sk_fit_transform_.return_value = pd.Series(range(100))
        ordinal_encoder = fop.OrdinalEncoder(
            columns=columns,
            derived_columns=derived_columns,
        )

        encoded_dataset = ordinal_encoder._apply(dataset)

        assert encoded_dataset is not None
        assert encoded_dataset is not dataset
        assert isinstance(encoded_dataset, Dataset)
        for col in expected_new_columns:
            assert col in encoded_dataset.data.columns
        get_df_from_csv_.assert_called_once_with("fake/path0")
        assert len(sk_fit_transform_.call_args_list) == len(columns)
        pd.testing.assert_frame_equal(
            sk_fit_transform_.call_args_list[0][0][1], df[[columns[0]]])
Esempio n. 2
0
    def it_knows_its_encoder(self):
        ordinal_encoder = fop.OrdinalEncoder(columns=["col0"],
                                             derived_columns=["col1"])

        encoder_attr = ordinal_encoder.encoder

        assert isinstance(encoder_attr, sk_preproc.OrdinalEncoder)
Esempio n. 3
0
def test_ordinal_encoder(csv, columns, derived_columns, expected_csv):
    dataset = Dataset(data_file=csv)
    expected_df = load_expectation(expected_csv, type_="csv")
    ordinal_encoder = fop.OrdinalEncoder(columns=columns,
                                         derived_columns=derived_columns)

    encoded_dataset = ordinal_encoder(dataset)

    pd.testing.assert_frame_equal(encoded_dataset.data, expected_df)
Esempio n. 4
0
    def it_knows_if_equal(self, other, expected_equal):
        feat_op = fop.OrdinalEncoder(
            columns=["exam_num_col_0"],
            derived_columns=["encoded_exam_num_col_0"],
        )

        equal = feat_op == other

        assert type(equal) == bool
        assert equal == expected_equal
Esempio n. 5
0
    def it_construct_from_args(self, request):
        _init_ = initializer_mock(request, fop.OrdinalEncoder)

        ordinal_encoder = fop.OrdinalEncoder(columns=["col0"],
                                             derived_columns=["col1"])

        _init_.assert_called_once_with(
            ANY,
            columns=["col0"],
            derived_columns=["col1"],
        )
        assert isinstance(ordinal_encoder, fop.OrdinalEncoder)
Esempio n. 6
0
    def and_it_validates_its_arguments(self, request):
        validate_columns_ = method_mock(request, fop.OrdinalEncoder,
                                        "_validate_single_element_columns")
        validate_derived_columns_ = method_mock(
            request, fop.OrdinalEncoder,
            "_validate_single_element_derived_columns")

        ordinal_encoder = fop.OrdinalEncoder(columns=["col0"],
                                             derived_columns=["col1"])

        validate_columns_.assert_called_once_with(ordinal_encoder, ["col0"])
        validate_derived_columns_.assert_called_once_with(
            ordinal_encoder, ["col1"])
Esempio n. 7
0
    def it_can_encode_with_template_call(self, request):
        _apply_ = method_mock(request, fop.OrdinalEncoder, "_apply")
        track_history_ = method_mock(request, Dataset, "track_history")
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset_in = Dataset(data_file="fake/path0")
        dataset_out = Dataset(data_file="fake/path0")
        _apply_.return_value = dataset_out
        ordinal_encoder = fop.OrdinalEncoder(
            columns=["exam_num_col_0"],
            derived_columns=["exam_str_col_0"],
        )

        replaced_dataset = ordinal_encoder(dataset_in)

        _apply_.assert_called_once_with(ordinal_encoder, dataset_in)
        track_history_.assert_called_once_with(replaced_dataset,
                                               ordinal_encoder)
        assert replaced_dataset is dataset_out
Esempio n. 8
0
class DescribeOrdinalEncoder:
    def it_construct_from_args(self, request):
        _init_ = initializer_mock(request, fop.OrdinalEncoder)

        ordinal_encoder = fop.OrdinalEncoder(columns=["col0"],
                                             derived_columns=["col1"])

        _init_.assert_called_once_with(
            ANY,
            columns=["col0"],
            derived_columns=["col1"],
        )
        assert isinstance(ordinal_encoder, fop.OrdinalEncoder)

    def and_it_validates_its_arguments(self, request):
        validate_columns_ = method_mock(request, fop.OrdinalEncoder,
                                        "_validate_single_element_columns")
        validate_derived_columns_ = method_mock(
            request, fop.OrdinalEncoder,
            "_validate_single_element_derived_columns")

        ordinal_encoder = fop.OrdinalEncoder(columns=["col0"],
                                             derived_columns=["col1"])

        validate_columns_.assert_called_once_with(ordinal_encoder, ["col0"])
        validate_derived_columns_.assert_called_once_with(
            ordinal_encoder, ["col1"])

    def it_knows_its_encoder(self):
        ordinal_encoder = fop.OrdinalEncoder(columns=["col0"],
                                             derived_columns=["col1"])

        encoder_attr = ordinal_encoder.encoder

        assert isinstance(encoder_attr, sk_preproc.OrdinalEncoder)

    @pytest.mark.parametrize(
        "columns, derived_columns, expected_new_columns",
        [
            (
                ["exam_str_col_0"],
                ["col1"],
                ["col1"],
            ),
            (
                ["exam_str_col_0"],
                None,
                [],
            ),
        ],
    )
    def it_can_apply_ordinal_encoder(
        self,
        request,
        columns,
        derived_columns,
        expected_new_columns,
    ):
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset = Dataset(data_file="fake/path0")
        sk_fit_transform_ = method_mock(request, sk_preproc.OrdinalEncoder,
                                        "fit_transform")
        sk_fit_transform_.return_value = pd.Series(range(100))
        ordinal_encoder = fop.OrdinalEncoder(
            columns=columns,
            derived_columns=derived_columns,
        )

        encoded_dataset = ordinal_encoder._apply(dataset)

        assert encoded_dataset is not None
        assert encoded_dataset is not dataset
        assert isinstance(encoded_dataset, Dataset)
        for col in expected_new_columns:
            assert col in encoded_dataset.data.columns
        get_df_from_csv_.assert_called_once_with("fake/path0")
        assert len(sk_fit_transform_.call_args_list) == len(columns)
        pd.testing.assert_frame_equal(
            sk_fit_transform_.call_args_list[0][0][1], df[[columns[0]]])

    def it_can_encode_with_template_call(self, request):
        _apply_ = method_mock(request, fop.OrdinalEncoder, "_apply")
        track_history_ = method_mock(request, Dataset, "track_history")
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset_in = Dataset(data_file="fake/path0")
        dataset_out = Dataset(data_file="fake/path0")
        _apply_.return_value = dataset_out
        ordinal_encoder = fop.OrdinalEncoder(
            columns=["exam_num_col_0"],
            derived_columns=["exam_str_col_0"],
        )

        replaced_dataset = ordinal_encoder(dataset_in)

        _apply_.assert_called_once_with(ordinal_encoder, dataset_in)
        track_history_.assert_called_once_with(replaced_dataset,
                                               ordinal_encoder)
        assert replaced_dataset is dataset_out

    @pytest.mark.parametrize(
        "other, expected_equal",
        [
            (
                fop.OrdinalEncoder(
                    columns=["exam_num_col_0"],
                    derived_columns=["encoded_exam_num_col_0"],
                ),
                True,
            ),
            (
                fop.OrdinalEncoder(
                    columns=["exam_num_col_1"],
                    derived_columns=["encoded_exam_num_col_0"],
                ),
                False,
            ),
            (
                fop.OrdinalEncoder(
                    columns=["exam_num_col_0"],
                    derived_columns=["encoded_exam_num_col_1"],
                ),
                False,
            ),
            (
                fop.OrdinalEncoder(
                    columns=["exam_num_col_1"],
                    derived_columns=["encoded_exam_num_col_1"],
                ),
                False,
            ),
            (dict(), False),
        ],
    )
    def it_knows_if_equal(self, other, expected_equal):
        feat_op = fop.OrdinalEncoder(
            columns=["exam_num_col_0"],
            derived_columns=["encoded_exam_num_col_0"],
        )

        equal = feat_op == other

        assert type(equal) == bool
        assert equal == expected_equal