def test_feature_union_weights(): # test feature union with transformer weights X = iris.data y = iris.target pca = PCA(n_components=2, svd_solver='randomized', random_state=0) select = SelectKBest(k=1) # test using fit followed by transform fs = FeatureUnion([("pca", pca), ("select", select)], transformer_weights={"pca": 10}) fs.fit(X, y) X_transformed = fs.transform(X) # test using fit_transform fs = FeatureUnion([("pca", pca), ("select", select)], transformer_weights={"pca": 10}) X_fit_transformed = fs.fit_transform(X, y) # test it works with transformers missing fit_transform fs = FeatureUnion([("mock", Transf()), ("pca", pca), ("select", select)], transformer_weights={"mock": 10}) X_fit_transformed_wo_method = fs.fit_transform(X, y) # check against expected result # We use a different pca object to control the random_state stream assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X)) assert_array_equal(X_transformed[:, -1], select.fit_transform(X, y).ravel()) assert_array_almost_equal(X_fit_transformed[:, :-1], 10 * pca.fit_transform(X)) assert_array_equal(X_fit_transformed[:, -1], select.fit_transform(X, y).ravel()) assert X_fit_transformed_wo_method.shape == (X.shape[0], 7)
def test_feature_union_warns_with_none(): msg = (r"Using None as a transformer is deprecated in version 0\.22 and " r"will be removed in version 0\.24\. Please use 'drop' instead\.") with pytest.warns(FutureWarning, match=msg): union = FeatureUnion([('multi1', None), ('multi2', Mult())]) X = [[1, 2, 3], [4, 5, 6]] with pytest.warns(FutureWarning, match=msg): union.fit_transform(X)
def test_feature_union(): # basic sanity check for feature union X = iris.data X -= X.mean(axis=0) y = iris.target svd = TruncatedSVD(n_components=2, random_state=0) select = SelectKBest(k=1) fs = FeatureUnion([("svd", svd), ("select", select)]) fs.fit(X, y) X_transformed = fs.transform(X) assert X_transformed.shape == (X.shape[0], 3) # check if it does the expected thing assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X)) assert_array_equal(X_transformed[:, -1], select.fit_transform(X, y).ravel()) # test if it also works for sparse input # We use a different svd object to control the random_state stream fs = FeatureUnion([("svd", svd), ("select", select)]) X_sp = sparse.csr_matrix(X) X_sp_transformed = fs.fit_transform(X_sp, y) assert_array_almost_equal(X_transformed, X_sp_transformed.toarray()) # Test clone fs2 = assert_no_warnings(clone, fs) assert fs.transformer_list[0][1] is not fs2.transformer_list[0][1] # test setting parameters fs.set_params(select__k=2) assert fs.fit_transform(X, y).shape == (X.shape[0], 4) # test it works with transformers missing fit_transform fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)]) X_transformed = fs.fit_transform(X, y) assert X_transformed.shape == (X.shape[0], 8) # test error if some elements do not support transform assert_raises_regex( TypeError, 'All estimators should implement fit and ' 'transform.*\\bNoTrans\\b', FeatureUnion, [("transform", Transf()), ("no_transform", NoTrans())]) # test that init accepts tuples fs = FeatureUnion((("svd", svd), ("select", select))) fs.fit(X, y)
def test_feature_union_fit_params(): # Regression test for issue: #15117 class Dummy(TransformerMixin, BaseEstimator): def fit(self, X, y=None, **fit_params): if fit_params != {'a': 0}: raise ValueError return self def transform(self, X, y=None): return X X, y = iris.data, iris.target t = FeatureUnion([('dummy0', Dummy()), ('dummy1', Dummy())]) with pytest.raises(ValueError): t.fit(X, y) with pytest.raises(ValueError): t.fit_transform(X, y) t.fit(X, y, a=0) t.fit_transform(X, y, a=0)
def test_set_feature_union_step_drop(drop): mult2 = Mult(2) mult2.get_feature_names = lambda: ['x2'] mult3 = Mult(3) mult3.get_feature_names = lambda: ['x3'] X = np.asarray([[1]]) ft = FeatureUnion([('m2', mult2), ('m3', mult3)]) assert_array_equal([[2, 3]], ft.fit(X).transform(X)) assert_array_equal([[2, 3]], ft.fit_transform(X)) assert ['m2__x2', 'm3__x3'] == ft.get_feature_names() with pytest.warns(None) as record: ft.set_params(m2=drop) assert_array_equal([[3]], ft.fit(X).transform(X)) assert_array_equal([[3]], ft.fit_transform(X)) assert ['m3__x3'] == ft.get_feature_names() assert record if drop is None else not record with pytest.warns(None) as record: ft.set_params(m3=drop) assert_array_equal([[]], ft.fit(X).transform(X)) assert_array_equal([[]], ft.fit_transform(X)) assert [] == ft.get_feature_names() assert record if drop is None else not record with pytest.warns(None) as record: # check we can change back ft.set_params(m3=mult3) assert_array_equal([[3]], ft.fit(X).transform(X)) assert record if drop is None else not record with pytest.warns(None) as record: # Check 'drop' step at construction time ft = FeatureUnion([('m2', drop), ('m3', mult3)]) assert_array_equal([[3]], ft.fit(X).transform(X)) assert_array_equal([[3]], ft.fit_transform(X)) assert ['m3__x3'] == ft.get_feature_names() assert record if drop is None else not record
def test_feature_union_parallel(): # test that n_jobs work for FeatureUnion X = JUNK_FOOD_DOCS fs = FeatureUnion([ ("words", CountVectorizer(analyzer='word')), ("chars", CountVectorizer(analyzer='char')), ]) fs_parallel = FeatureUnion([ ("words", CountVectorizer(analyzer='word')), ("chars", CountVectorizer(analyzer='char')), ], n_jobs=2) fs_parallel2 = FeatureUnion([ ("words", CountVectorizer(analyzer='word')), ("chars", CountVectorizer(analyzer='char')), ], n_jobs=2) fs.fit(X) X_transformed = fs.transform(X) assert X_transformed.shape[0] == len(X) fs_parallel.fit(X) X_transformed_parallel = fs_parallel.transform(X) assert X_transformed.shape == X_transformed_parallel.shape assert_array_equal(X_transformed.toarray(), X_transformed_parallel.toarray()) # fit_transform should behave the same X_transformed_parallel2 = fs_parallel2.fit_transform(X) assert_array_equal(X_transformed.toarray(), X_transformed_parallel2.toarray()) # transformers should stay fit after fit_transform X_transformed_parallel2 = fs_parallel2.transform(X) assert_array_equal(X_transformed.toarray(), X_transformed_parallel2.toarray())