Example #1
0
    def test_with_dict_2_values(self, todf_cls, df):
        pipe = Pipeline([
            ('union', DictFeatureUnion([
                ('b', ItemSelector('b')),
                ('a', ItemSelector('a')),
            ])),
            ('to_df', todf_cls()),
        ])

        result = pipe.fit_transform(df)
        assert result.equals(df)
Example #2
0
    def test_with_dict_column_name_raises(self, todf_cls, df):
        pipe = Pipeline([
            ('union', DictFeatureUnion([
                ('b', ItemSelector('b')),
                ('a', ItemSelector('a')),
            ])),
            ('to_df', todf_cls(columns=['c', 'd'])),
        ])

        with pytest.raises(ValueError) as exc:
            pipe.fit_transform(df)

        expected = ("ToDataFrame with explicit column names cannot "
                    "transform a dictionary because the dictionary's "
                    "keys already determine the column names.")
        assert str(exc.value) == expected
Example #3
0
    def test_with_df(self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector(['a', 'b'])),
            ('to_df', todf_cls()),
        ])

        result = pipe.fit_transform(df)
        assert result.equals(df)
Example #4
0
    def test_with_series(self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector('a')),
            ('to_df', todf_cls()),
        ])

        result = pipe.fit_transform(df)
        expected = df[['a']]
        assert result.equals(expected)
Example #5
0
    def test_with_series_column_name(self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector('a')),
            ('to_df', todf_cls(columns=['c'])),
        ])

        result = pipe.fit_transform(df)
        expected = df.rename(columns={'a': 'c'})[['c']]
        assert result.equals(expected)
Example #6
0
    def test_with_dict_1_value(self, todf_cls, df):
        pipe = Pipeline([
            ('union', DictFeatureUnion([
                ('a', ItemSelector('a')),
            ])),
            ('to_df', todf_cls()),
        ])

        result = pipe.fit_transform(df)
        expected = df[['a']]
        assert result.equals(expected)
Example #7
0
    def test_with_2d_array(self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector(['a', 'b'])),
            ('values', FunctionTransformer(
                lambda x: x.values, validate=False)),
            ('to_df', todf_cls()),
        ])

        result = pipe.fit_transform(df)
        expected = df.rename(columns={'a': 0, 'b': 1})
        assert result.equals(expected)
Example #8
0
    def test_with_list(self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector('a')),
            ('values', FunctionTransformer(
                lambda x: x.values.tolist(), validate=False)),
            ('to_df', todf_cls()),
        ])

        result = pipe.fit_transform(df)
        expected = df.rename(columns={'a': 0})[[0]]
        assert result.equals(expected)
Example #9
0
    def test_with_series_wrong_number_of_column_names_raises(
            self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector('a')),
            ('to_df', todf_cls(columns=['c', 'd'])),
        ])

        with pytest.raises(ValueError) as exc:
            pipe.fit_transform(df)

        expected = ("ToDataFrame with more than one column name cannot "
                    "transform a Series object.")
        assert str(exc.value) == expected
Example #10
0
    def test_with_df_column_name_raises(self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector(['a', 'b'])),
            ('to_df', todf_cls(columns=['c', 'd'])),
        ])

        with pytest.raises(ValueError) as exc:
            pipe.fit_transform(df)

        expected = ("ToDataFrame with explicit column names cannot "
                    "transform a DataFrame because the DataFrame's "
                    "columns already determine the column names.")
        assert str(exc.value) == expected
Example #11
0
    def test_with_list_wrong_number_of_column_names_raises(self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector('a')),
            ('values', FunctionTransformer(
                lambda x: x.values.tolist(), validate=False)),
            ('to_df', todf_cls(columns=['c', 'd'])),
        ])

        with pytest.raises(ValueError) as exc:
            pipe.fit_transform(df)

        expected = ("ToDataFrame with more than one column name cannot "
                    "transform a list.")
        assert str(exc.value) == expected
Example #12
0
    def test_with_array_wrong_number_of_column_names_raises(
            self, todf_cls, df):
        pipe = Pipeline([
            ('select', ItemSelector(['a', 'b'])),
            ('values', FunctionTransformer(
                lambda x: x.values, validate=False)),
            ('to_df', todf_cls(columns=['c', 'd', 'e'])),
        ])

        with pytest.raises(ValueError) as exc:
            pipe.fit_transform(df)

        expected = ("ToDataFrame was given data with 2 columns but "
                    "was initialized with 3 column names.")
        assert str(exc.value) == expected
Example #13
0
    def test_with_dict_columns_sorted(self, todf_cls):
        import string
        az = string.ascii_lowercase
        df = pd.DataFrame({c: np.arange(3) for c in az})

        pipe = Pipeline([
            ('union', DictFeatureUnion([
                (c, ItemSelector(c)) for c in az[::-1]
            ])),
            ('to_df', todf_cls()),
        ])

        result = pipe.fit_transform(df)
        cols = result.columns.tolist()
        assert cols == sorted(cols)