Beispiel #1
0
    def it_can_apply_replace_strings(self, request, columns, derived_columns,
                                     expected_new_columns, expected_inplace):
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset = Dataset(data_file="fake/path0")
        pd_replace_ = method_mock(request, pd.Series, "replace")
        pd_replace_.return_value = pd.Series([0] * 100)
        replace_strings = replace_strings = fop.ReplaceStrings(
            columns=columns,
            derived_columns=derived_columns,
            replacement_map={"a": "b"})

        replaced_dataset = replace_strings._apply(dataset)

        assert replaced_dataset is not None
        assert replaced_dataset is not dataset
        assert isinstance(replaced_dataset, Dataset)
        for col in expected_new_columns:
            assert col in replaced_dataset.data.columns
        get_df_from_csv_.assert_called_once_with("fake/path0")
        assert len(pd_replace_.call_args_list) == len(columns)
        pd.testing.assert_series_equal(pd_replace_.call_args_list[0][0][0],
                                       df[columns[0]])
        assert pd_replace_.call_args_list[0][1] == {
            "inplace": expected_inplace,
            "to_replace": {
                "a": "b"
            },
        }
Beispiel #2
0
def test_replace_strings(csv, columns, derived_columns, expected_csv):
    dataset = Dataset(data_file=csv)
    expected_df = load_expectation(expected_csv, type_="csv")
    replace_strings = fop.ReplaceStrings(columns=columns,
                                         derived_columns=derived_columns,
                                         replacement_map={"d": "a"})

    replaced_dataset = replace_strings(dataset)

    pd.testing.assert_frame_equal(replaced_dataset.data, expected_df)
Beispiel #3
0
    def it_construct_from_args(self, request):
        _init_ = initializer_mock(request, fop.ReplaceStrings)

        replace_strings = fop.ReplaceStrings(columns=["col0"],
                                             derived_columns=["col1"],
                                             replacement_map={"a": "b"})

        _init_.assert_called_once_with(ANY,
                                       columns=["col0"],
                                       derived_columns=["col1"],
                                       replacement_map={"a": "b"})
        assert isinstance(replace_strings, fop.ReplaceStrings)
Beispiel #4
0
    def it_knows_if_equal(self, other, expected_equal):
        feat_op = fop.ReplaceStrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={
                "a": "b",
                "c": "d"
            },
        )

        equal = feat_op == other

        assert type(equal) == bool
        assert equal == expected_equal
Beispiel #5
0
    def it_knows_its_str(self):
        feat_op = fop.ReplaceStrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={
                "a": "b",
                "c": "d"
            },
        )

        _str = str(feat_op)

        assert type(_str) == str
        assert _str == (
            "ReplaceStrings(\n\tcolumns=['exam_num_col_0'],\n\treplacement_map="
            "{'a': 'b', 'c': 'd'},\n\tderived_columns=['replaced_exam_num_col_0'],\n)"
        )
Beispiel #6
0
    def and_it_validates_its_arguments(self, request):
        validate_columns_ = method_mock(request, fop.ReplaceStrings,
                                        "_validate_single_element_columns")
        validate_derived_columns_ = method_mock(
            request, fop.ReplaceStrings,
            "_validate_single_element_derived_columns")
        validate_replacement_map = method_mock(request, fop.ReplaceStrings,
                                               "_validate_replacement_map")

        replace_strings = fop.ReplaceStrings(columns=["col0"],
                                             derived_columns=["col1"],
                                             replacement_map={"a": "b"})

        validate_columns_.assert_called_once_with(replace_strings, ["col0"])
        validate_derived_columns_.assert_called_once_with(
            replace_strings, ["col1"])
        validate_replacement_map.assert_called_once_with(
            replace_strings, {"a": "b"})
Beispiel #7
0
    def it_can_replace_with_template_call(self, request):
        _apply_ = method_mock(request, fop.ReplaceStrings, "_apply")
        track_history_ = method_mock(request, Dataset, "track_history")
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset_in = Dataset(data_file="fake/path0")
        dataset_out = Dataset(data_file="fake/path0")
        _apply_.return_value = dataset_out
        replace_strings = fop.ReplaceStrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={"a": "b"},
        )

        replaced_dataset = replace_strings(dataset_in)

        _apply_.assert_called_once_with(replace_strings, dataset_in)
        track_history_.assert_called_once_with(replaced_dataset,
                                               replace_strings)
        assert replaced_dataset is dataset_out
Beispiel #8
0
def replacestrings_exam_num_col_0_replaced_exam_num_col_0_a_b():
    return fop.ReplaceStrings(
        columns=["exam_num_col_0"],
        derived_columns=["replaced_exam_num_col_0"],
        replacement_map={"a": "b"},
    )
Beispiel #9
0
class DescribeReplaceStrings:
    def it_construct_from_args(self, request):
        _init_ = initializer_mock(request, fop.ReplaceStrings)

        replace_strings = fop.ReplaceStrings(columns=["col0"],
                                             derived_columns=["col1"],
                                             replacement_map={"a": "b"})

        _init_.assert_called_once_with(ANY,
                                       columns=["col0"],
                                       derived_columns=["col1"],
                                       replacement_map={"a": "b"})
        assert isinstance(replace_strings, fop.ReplaceStrings)

    def and_it_validates_its_arguments(self, request):
        validate_columns_ = method_mock(request, fop.ReplaceStrings,
                                        "_validate_single_element_columns")
        validate_derived_columns_ = method_mock(
            request, fop.ReplaceStrings,
            "_validate_single_element_derived_columns")
        validate_replacement_map = method_mock(request, fop.ReplaceStrings,
                                               "_validate_replacement_map")

        replace_strings = fop.ReplaceStrings(columns=["col0"],
                                             derived_columns=["col1"],
                                             replacement_map={"a": "b"})

        validate_columns_.assert_called_once_with(replace_strings, ["col0"])
        validate_derived_columns_.assert_called_once_with(
            replace_strings, ["col1"])
        validate_replacement_map.assert_called_once_with(
            replace_strings, {"a": "b"})

    @pytest.mark.parametrize(
        "columns, derived_columns, expected_new_columns, expected_inplace",
        [
            (["exam_num_col_0"], ["col1"], ["col1"], False),
            (["exam_num_col_0"], None, [], True),
        ],
    )
    def it_can_apply_replace_strings(self, request, columns, derived_columns,
                                     expected_new_columns, expected_inplace):
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset = Dataset(data_file="fake/path0")
        pd_replace_ = method_mock(request, pd.Series, "replace")
        pd_replace_.return_value = pd.Series([0] * 100)
        replace_strings = replace_strings = fop.ReplaceStrings(
            columns=columns,
            derived_columns=derived_columns,
            replacement_map={"a": "b"})

        replaced_dataset = replace_strings._apply(dataset)

        assert replaced_dataset is not None
        assert replaced_dataset is not dataset
        assert isinstance(replaced_dataset, Dataset)
        for col in expected_new_columns:
            assert col in replaced_dataset.data.columns
        get_df_from_csv_.assert_called_once_with("fake/path0")
        assert len(pd_replace_.call_args_list) == len(columns)
        pd.testing.assert_series_equal(pd_replace_.call_args_list[0][0][0],
                                       df[columns[0]])
        assert pd_replace_.call_args_list[0][1] == {
            "inplace": expected_inplace,
            "to_replace": {
                "a": "b"
            },
        }

    def it_can_replace_with_template_call(self, request):
        _apply_ = method_mock(request, fop.ReplaceStrings, "_apply")
        track_history_ = method_mock(request, Dataset, "track_history")
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset_in = Dataset(data_file="fake/path0")
        dataset_out = Dataset(data_file="fake/path0")
        _apply_.return_value = dataset_out
        replace_strings = fop.ReplaceStrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={"a": "b"},
        )

        replaced_dataset = replace_strings(dataset_in)

        _apply_.assert_called_once_with(replace_strings, dataset_in)
        track_history_.assert_called_once_with(replaced_dataset,
                                               replace_strings)
        assert replaced_dataset is dataset_out

    @pytest.mark.parametrize(
        "other, expected_equal",
        [
            (
                fop.ReplaceStrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "a": "b",
                        "c": "d"
                    },
                ),
                True,
            ),
            (
                fop.ReplaceStrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "c": "d",
                        "a": "b"
                    },
                ),
                True,
            ),
            (
                fop.ReplaceStrings(
                    columns=["exam_num_col_1"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "a": "b",
                        "c": "d"
                    },
                ),
                False,
            ),
            (
                fop.ReplaceStrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_1"],
                    replacement_map={
                        "a": "b",
                        "c": "d"
                    },
                ),
                False,
            ),
            (
                fop.ReplaceStrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "a": "b",
                    },
                ),
                False,
            ),
            (
                fop.ReplaceStrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "c": "b",
                    },
                ),
                False,
            ),
            (dict(), False),
        ],
    )
    def it_knows_if_equal(self, other, expected_equal):
        feat_op = fop.ReplaceStrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={
                "a": "b",
                "c": "d"
            },
        )

        equal = feat_op == other

        assert type(equal) == bool
        assert equal == expected_equal

    def it_knows_its_str(self):
        feat_op = fop.ReplaceStrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={
                "a": "b",
                "c": "d"
            },
        )

        _str = str(feat_op)

        assert type(_str) == str
        assert _str == (
            "ReplaceStrings(\n\tcolumns=['exam_num_col_0'],\n\treplacement_map="
            "{'a': 'b', 'c': 'd'},\n\tderived_columns=['replaced_exam_num_col_0'],\n)"
        )
Beispiel #10
0
class DescribeTrousse:
    @pytest.mark.parametrize(
        "operations",
        [
            (),
            (fop.FillNA(
                columns=["nan"],
                value=0,
            ), ),
            (
                fop.ReplaceStrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={"a": "b"},
                ),
                fop.FillNA(columns=["nan"], value=0),
            ),
        ],
    )
    def it_contructs_from_args(self, request, operations):
        _init_ = initializer_mock(request, fop.Trousse)

        trousse = fop.Trousse(*operations)

        _init_.assert_called_once_with(ANY, *operations)
        assert isinstance(trousse, fop.Trousse)

    def it_knows_its_operations(
        self,
        replacestrings_exam_num_col_0_replaced_exam_num_col_0_a_b,
        fillna_col0_col1,
    ):
        trousse = fop.Trousse(
            replacestrings_exam_num_col_0_replaced_exam_num_col_0_a_b,
            fillna_col0_col1)

        operations = trousse.operations

        assert type(operations) == tuple
        assert operations == (
            replacestrings_exam_num_col_0_replaced_exam_num_col_0_a_b,
            fillna_col0_col1,
        )

    def it_knows_how_to_call(
        self,
        request,
        replacestrings_exam_num_col_0_replaced_exam_num_col_0_a_b,
        fillna_col0_col1,
    ):
        dataset_in = instance_mock(request, Dataset, "in")
        dataset_out_1 = instance_mock(request, Dataset, "1")
        dataset_out_2 = instance_mock(request, Dataset, "2")
        _call_replacestrings = method_mock(request, fop.ReplaceStrings,
                                           "__call__")
        _call_replacestrings.return_value = dataset_out_1
        _call_fillna = method_mock(request, fop.FillNA, "__call__")
        _call_fillna.return_value = dataset_out_2
        trousse = fop.Trousse(
            replacestrings_exam_num_col_0_replaced_exam_num_col_0_a_b,
            fillna_col0_col1)

        new_dataset = trousse(dataset_in)

        _call_replacestrings.assert_called_once_with(
            replacestrings_exam_num_col_0_replaced_exam_num_col_0_a_b,
            dataset_in)
        _call_fillna.assert_called_once_with(fillna_col0_col1, dataset_out_1)
        assert isinstance(new_dataset, Dataset)
        assert new_dataset == dataset_out_2

    def it_knows_its_str(self, fillna_col0_col1, fillna_col1_col4):
        trousse = fop.Trousse(fillna_col0_col1, fillna_col1_col4)

        _str = str(trousse)

        assert type(_str) == str
        assert _str == (
            "Trousse: (FillNA(\n\tcolumns=['col0'],\n\tvalue=0,\n\t"
            "derived_columns=['col1'],\n), FillNA(\n\tcolumns=['col1']"
            ",\n\tvalue=0,\n\tderived_columns=['col4'],\n))")