def test_with_dict_2_values(self, todf_cls, df): pipe = Pipeline([ ('union', DictFeatureUnion([ ('b', ItemSelector('b')), ('a', ItemSelector('a')), ])), ('to_df', todf_cls()), ]) result = pipe.fit_transform(df) assert result.equals(df)
def test_with_dict_column_name_raises(self, todf_cls, df): pipe = Pipeline([ ('union', DictFeatureUnion([ ('b', ItemSelector('b')), ('a', ItemSelector('a')), ])), ('to_df', todf_cls(columns=['c', 'd'])), ]) with pytest.raises(ValueError) as exc: pipe.fit_transform(df) expected = ("ToDataFrame with explicit column names cannot " "transform a dictionary because the dictionary's " "keys already determine the column names.") assert str(exc.value) == expected
def test_with_df(self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector(['a', 'b'])), ('to_df', todf_cls()), ]) result = pipe.fit_transform(df) assert result.equals(df)
def test_with_series(self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector('a')), ('to_df', todf_cls()), ]) result = pipe.fit_transform(df) expected = df[['a']] assert result.equals(expected)
def test_with_series_column_name(self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector('a')), ('to_df', todf_cls(columns=['c'])), ]) result = pipe.fit_transform(df) expected = df.rename(columns={'a': 'c'})[['c']] assert result.equals(expected)
def test_with_dict_1_value(self, todf_cls, df): pipe = Pipeline([ ('union', DictFeatureUnion([ ('a', ItemSelector('a')), ])), ('to_df', todf_cls()), ]) result = pipe.fit_transform(df) expected = df[['a']] assert result.equals(expected)
def test_with_2d_array(self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector(['a', 'b'])), ('values', FunctionTransformer( lambda x: x.values, validate=False)), ('to_df', todf_cls()), ]) result = pipe.fit_transform(df) expected = df.rename(columns={'a': 0, 'b': 1}) assert result.equals(expected)
def test_with_list(self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector('a')), ('values', FunctionTransformer( lambda x: x.values.tolist(), validate=False)), ('to_df', todf_cls()), ]) result = pipe.fit_transform(df) expected = df.rename(columns={'a': 0})[[0]] assert result.equals(expected)
def test_with_series_wrong_number_of_column_names_raises( self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector('a')), ('to_df', todf_cls(columns=['c', 'd'])), ]) with pytest.raises(ValueError) as exc: pipe.fit_transform(df) expected = ("ToDataFrame with more than one column name cannot " "transform a Series object.") assert str(exc.value) == expected
def test_with_df_column_name_raises(self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector(['a', 'b'])), ('to_df', todf_cls(columns=['c', 'd'])), ]) with pytest.raises(ValueError) as exc: pipe.fit_transform(df) expected = ("ToDataFrame with explicit column names cannot " "transform a DataFrame because the DataFrame's " "columns already determine the column names.") assert str(exc.value) == expected
def test_with_list_wrong_number_of_column_names_raises(self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector('a')), ('values', FunctionTransformer( lambda x: x.values.tolist(), validate=False)), ('to_df', todf_cls(columns=['c', 'd'])), ]) with pytest.raises(ValueError) as exc: pipe.fit_transform(df) expected = ("ToDataFrame with more than one column name cannot " "transform a list.") assert str(exc.value) == expected
def test_with_array_wrong_number_of_column_names_raises( self, todf_cls, df): pipe = Pipeline([ ('select', ItemSelector(['a', 'b'])), ('values', FunctionTransformer( lambda x: x.values, validate=False)), ('to_df', todf_cls(columns=['c', 'd', 'e'])), ]) with pytest.raises(ValueError) as exc: pipe.fit_transform(df) expected = ("ToDataFrame was given data with 2 columns but " "was initialized with 3 column names.") assert str(exc.value) == expected
def test_with_dict_columns_sorted(self, todf_cls): import string az = string.ascii_lowercase df = pd.DataFrame({c: np.arange(3) for c in az}) pipe = Pipeline([ ('union', DictFeatureUnion([ (c, ItemSelector(c)) for c in az[::-1] ])), ('to_df', todf_cls()), ]) result = pipe.fit_transform(df) cols = result.columns.tolist() assert cols == sorted(cols)