Example #1
0
def test_feature_union_warns_with_none():
    msg = (r"Using None as a transformer is deprecated in version 0\.22 and "
           r"will be removed in version 0\.24\. Please use 'drop' instead\.")
    with pytest.warns(FutureWarning, match=msg):
        union = FeatureUnion([('multi1', None), ('multi2', Mult())])

    X = [[1, 2, 3], [4, 5, 6]]

    with pytest.warns(FutureWarning, match=msg):
        union.fit_transform(X)
Example #2
0
def test_feature_union_fit_params():
    # Regression test for issue: #15117
    class Dummy(TransformerMixin, BaseEstimator):
        def fit(self, X, y=None, **fit_params):
            if fit_params != {'a': 0}:
                raise ValueError
            return self

        def transform(self, X, y=None):
            return X

    X, y = iris.data, iris.target
    t = FeatureUnion([('dummy0', Dummy()), ('dummy1', Dummy())])
    with pytest.raises(ValueError):
        t.fit(X, y)

    with pytest.raises(ValueError):
        t.fit_transform(X, y)

    t.fit(X, y, a=0)
    t.fit_transform(X, y, a=0)
Example #3
0
def test_feature_union_feature_names():
    word_vect = CountVectorizer(analyzer="word")
    char_vect = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
    ft = FeatureUnion([("chars", char_vect), ("words", word_vect)])
    ft.fit(JUNK_FOOD_DOCS)
    feature_names = ft.get_feature_names()
    for feat in feature_names:
        assert "chars__" in feat or "words__" in feat
    assert len(feature_names) == 35

    ft = FeatureUnion([("tr1", Transf())]).fit([[1]])
    assert_raise_message(
        AttributeError, 'Transformer tr1 (type Transf) does not provide '
        'get_feature_names', ft.get_feature_names)
Example #4
0
def test_set_feature_union_step_drop(drop):
    mult2 = Mult(2)
    mult2.get_feature_names = lambda: ['x2']
    mult3 = Mult(3)
    mult3.get_feature_names = lambda: ['x3']
    X = np.asarray([[1]])

    ft = FeatureUnion([('m2', mult2), ('m3', mult3)])
    assert_array_equal([[2, 3]], ft.fit(X).transform(X))
    assert_array_equal([[2, 3]], ft.fit_transform(X))
    assert ['m2__x2', 'm3__x3'] == ft.get_feature_names()

    with pytest.warns(None) as record:
        ft.set_params(m2=drop)
        assert_array_equal([[3]], ft.fit(X).transform(X))
        assert_array_equal([[3]], ft.fit_transform(X))
    assert ['m3__x3'] == ft.get_feature_names()
    assert record if drop is None else not record

    with pytest.warns(None) as record:
        ft.set_params(m3=drop)
        assert_array_equal([[]], ft.fit(X).transform(X))
        assert_array_equal([[]], ft.fit_transform(X))
    assert [] == ft.get_feature_names()
    assert record if drop is None else not record

    with pytest.warns(None) as record:
        # check we can change back
        ft.set_params(m3=mult3)
        assert_array_equal([[3]], ft.fit(X).transform(X))
    assert record if drop is None else not record

    with pytest.warns(None) as record:
        # Check 'drop' step at construction time
        ft = FeatureUnion([('m2', drop), ('m3', mult3)])
        assert_array_equal([[3]], ft.fit(X).transform(X))
        assert_array_equal([[3]], ft.fit_transform(X))
    assert ['m3__x3'] == ft.get_feature_names()
    assert record if drop is None else not record
Example #5
0
def test_set_feature_union_steps():
    mult2 = Mult(2)
    mult2.get_feature_names = lambda: ['x2']
    mult3 = Mult(3)
    mult3.get_feature_names = lambda: ['x3']
    mult5 = Mult(5)
    mult5.get_feature_names = lambda: ['x5']

    ft = FeatureUnion([('m2', mult2), ('m3', mult3)])
    assert_array_equal([[2, 3]], ft.transform(np.asarray([[1]])))
    assert ['m2__x2', 'm3__x3'] == ft.get_feature_names()

    # Directly setting attr
    ft.transformer_list = [('m5', mult5)]
    assert_array_equal([[5]], ft.transform(np.asarray([[1]])))
    assert ['m5__x5'] == ft.get_feature_names()

    # Using set_params
    ft.set_params(transformer_list=[('mock', mult3)])
    assert_array_equal([[3]], ft.transform(np.asarray([[1]])))
    assert ['mock__x3'] == ft.get_feature_names()

    # Using set_params to replace single step
    ft.set_params(mock=mult5)
    assert_array_equal([[5]], ft.transform(np.asarray([[1]])))
    assert ['mock__x5'] == ft.get_feature_names()
Example #6
0
def test_feature_union_parallel():
    # test that n_jobs work for FeatureUnion
    X = JUNK_FOOD_DOCS

    fs = FeatureUnion([
        ("words", CountVectorizer(analyzer='word')),
        ("chars", CountVectorizer(analyzer='char')),
    ])

    fs_parallel = FeatureUnion([
        ("words", CountVectorizer(analyzer='word')),
        ("chars", CountVectorizer(analyzer='char')),
    ],
                               n_jobs=2)

    fs_parallel2 = FeatureUnion([
        ("words", CountVectorizer(analyzer='word')),
        ("chars", CountVectorizer(analyzer='char')),
    ],
                                n_jobs=2)

    fs.fit(X)
    X_transformed = fs.transform(X)
    assert X_transformed.shape[0] == len(X)

    fs_parallel.fit(X)
    X_transformed_parallel = fs_parallel.transform(X)
    assert X_transformed.shape == X_transformed_parallel.shape
    assert_array_equal(X_transformed.toarray(),
                       X_transformed_parallel.toarray())

    # fit_transform should behave the same
    X_transformed_parallel2 = fs_parallel2.fit_transform(X)
    assert_array_equal(X_transformed.toarray(),
                       X_transformed_parallel2.toarray())

    # transformers should stay fit after fit_transform
    X_transformed_parallel2 = fs_parallel2.transform(X)
    assert_array_equal(X_transformed.toarray(),
                       X_transformed_parallel2.toarray())
Example #7
0
def test_feature_union_weights():
    # test feature union with transformer weights
    X = iris.data
    y = iris.target
    pca = PCA(n_components=2, svd_solver='randomized', random_state=0)
    select = SelectKBest(k=1)
    # test using fit followed by transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    # test using fit_transform
    fs = FeatureUnion([("pca", pca), ("select", select)],
                      transformer_weights={"pca": 10})
    X_fit_transformed = fs.fit_transform(X, y)
    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", Transf()), ("pca", pca), ("select", select)],
                      transformer_weights={"mock": 10})
    X_fit_transformed_wo_method = fs.fit_transform(X, y)
    # check against expected result

    # We use a different pca object to control the random_state stream
    assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert_array_almost_equal(X_fit_transformed[:, :-1],
                              10 * pca.fit_transform(X))
    assert_array_equal(X_fit_transformed[:, -1],
                       select.fit_transform(X, y).ravel())
    assert X_fit_transformed_wo_method.shape == (X.shape[0], 7)
Example #8
0
def test_feature_union():
    # basic sanity check for feature union
    X = iris.data
    X -= X.mean(axis=0)
    y = iris.target
    svd = TruncatedSVD(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    fs = FeatureUnion([("svd", svd), ("select", select)])
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    assert X_transformed.shape == (X.shape[0], 3)

    # check if it does the expected thing
    assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())

    # test if it also works for sparse input
    # We use a different svd object to control the random_state stream
    fs = FeatureUnion([("svd", svd), ("select", select)])
    X_sp = sparse.csr_matrix(X)
    X_sp_transformed = fs.fit_transform(X_sp, y)
    assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())

    # Test clone
    fs2 = assert_no_warnings(clone, fs)
    assert fs.transformer_list[0][1] is not fs2.transformer_list[0][1]

    # test setting parameters
    fs.set_params(select__k=2)
    assert fs.fit_transform(X, y).shape == (X.shape[0], 4)

    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)])
    X_transformed = fs.fit_transform(X, y)
    assert X_transformed.shape == (X.shape[0], 8)

    # test error if some elements do not support transform
    assert_raises_regex(
        TypeError, 'All estimators should implement fit and '
        'transform.*\\bNoTrans\\b', FeatureUnion,
        [("transform", Transf()), ("no_transform", NoTrans())])

    # test that init accepts tuples
    fs = FeatureUnion((("svd", svd), ("select", select)))
    fs.fit(X, y)
Example #9
0
          (Pipeline([('transf', Transf()), ('noop',
                                            'passthrough'), ('clf',
                                                             FitParamT())]),
           r'\[Pipeline\].*\(step 1 of 3\) Processing transf.* total=.*\n'
           r'\[Pipeline\].*\(step 2 of 3\) Processing noop.* total=.*\n'
           r'\[Pipeline\].*\(step 3 of 3\) Processing clf.* total=.*\n$'),
          (Pipeline([('transf', Transf()), ('clf', None)]),
           r'\[Pipeline\].*\(step 1 of 2\) Processing transf.* total=.*\n'
           r'\[Pipeline\].*\(step 2 of 2\) Processing clf.* total=.*\n$'),
          (Pipeline([('transf', None), ('mult', Mult())]),
           r'\[Pipeline\].*\(step 1 of 2\) Processing transf.* total=.*\n'
           r'\[Pipeline\].*\(step 2 of 2\) Processing mult.* total=.*\n$'),
          (Pipeline([('transf', 'passthrough'), ('mult', Mult())]),
           r'\[Pipeline\].*\(step 1 of 2\) Processing transf.* total=.*\n'
           r'\[Pipeline\].*\(step 2 of 2\) Processing mult.* total=.*\n$'),
          (FeatureUnion([('mult1', Mult()), ('mult2', Mult())]),
           r'\[FeatureUnion\].*\(step 1 of 2\) Processing mult1.* total=.*\n'
           r'\[FeatureUnion\].*\(step 2 of 2\) Processing mult2.* total=.*\n$'
           ),
          (FeatureUnion([('mult1', 'drop'), ('mult2', Mult()), ('mult3',
                                                                'drop')]),
           r'\[FeatureUnion\].*\(step 1 of 1\) Processing mult2.* total=.*\n$'
           )], ['fit', 'fit_transform', 'fit_predict'])
    if hasattr(est, method) and not (method == 'fit_transform' and hasattr(
        est, 'steps') and isinstance(est.steps[-1][1], FitParamT)))


@pytest.mark.parametrize('est, pattern, method', parameter_grid_test_verbose)
def test_verbose(est, method, pattern, capsys):
    func = getattr(est, method)