def test_pipeline_ducktyping():
    pipeline = make_pipeline(Mult(5))
    pipeline.predict
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(Transf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline('passthrough')
    assert pipeline.steps[0] == ('passthrough', 'passthrough')
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(Transf(), NoInvTransf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    assert not hasattr(pipeline, 'inverse_transform')

    pipeline = make_pipeline(NoInvTransf(), Transf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    assert not hasattr(pipeline, 'inverse_transform')
def test_classes_property():
    X = iris.data
    y = iris.target

    reg = make_pipeline(SelectKBest(k=1), LinearRegression())
    reg.fit(X, y)
    assert_raises(AttributeError, getattr, reg, "classes_")

    clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0))
    assert_raises(AttributeError, getattr, clf, "classes_")
    clf.fit(X, y)
    assert_array_equal(clf.classes_, np.unique(y))
def test_make_pipeline_memory():
    cachedir = mkdtemp()
    if LooseVersion(joblib.__version__) < LooseVersion('0.12'):
        # Deal with change of API in joblib
        memory = joblib.Memory(cachedir=cachedir, verbose=10)
    else:
        memory = joblib.Memory(location=cachedir, verbose=10)
    pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory)
    assert pipeline.memory is memory
    pipeline = make_pipeline(DummyTransf(), SVC())
    assert pipeline.memory is None
    assert len(pipeline) == 2

    shutil.rmtree(cachedir)
def test_noise_sim(sim):
    p = Pipeline([('s', sim), ('c', DecisionTreeClassifier())])
    p.fit(X, y)
    p.predict(X)

    p = make_pipeline(sim, DecisionTreeClassifier())
    p.fit(X, y)
    p.predict(X)
def test_make_pipeline():
    t1 = Transf()
    t2 = Transf()
    pipe = make_pipeline(t1, t2)
    assert isinstance(pipe, Pipeline)
    assert pipe.steps[0][0] == "transf-1"
    assert pipe.steps[1][0] == "transf-2"

    pipe = make_pipeline(t1, t2, FitParamT())
    assert isinstance(pipe, Pipeline)
    assert pipe.steps[0][0] == "transf-1"
    assert pipe.steps[1][0] == "transf-2"
    assert pipe.steps[2][0] == "fitparamt"

    assert_raise_message(TypeError,
                         'Unknown keyword arguments: "random_parameter"',
                         make_pipeline,
                         t1,
                         t2,
                         random_parameter='rnd')
def test_score_samples_on_pipeline_without_score_samples():
    X = np.array([[1], [2]])
    y = np.array([1, 2])
    # Test that a pipeline does not have score_samples method when the final
    # step of the pipeline does not have score_samples defined.
    pipe = make_pipeline(LogisticRegression())
    pipe.fit(X, y)
    with pytest.raises(AttributeError,
                       match="'LogisticRegression' object has no attribute "
                       "'score_samples'"):
        pipe.score_samples(X)
def test_n_features_in_pipeline():
    # make sure pipelines delegate n_features_in to the first step

    X = [[1, 2], [3, 4], [5, 6]]
    y = [0, 1, 2]

    ss = StandardScaler()
    gbdt = HistGradientBoostingClassifier()
    pipe = make_pipeline(ss, gbdt)
    assert not hasattr(pipe, 'n_features_in_')
    pipe.fit(X, y)
    assert pipe.n_features_in_ == ss.n_features_in_ == 2

    # if the first step has the n_features_in attribute then the pipeline also
    # has it, even though it isn't fitted.
    ss = StandardScaler()
    gbdt = HistGradientBoostingClassifier()
    pipe = make_pipeline(ss, gbdt)
    ss.fit(X, y)
    assert pipe.n_features_in_ == ss.n_features_in_ == 2
    assert not hasattr(gbdt, 'n_features_in_')
Exemple #8
0
def test_example_two():
    from skclean.simulate_noise import UniformNoise
    from skclean.detectors import KDN
    from skclean.handlers import Filter
    from skclean.pipeline import Pipeline, make_pipeline  # Importing from skclean, not sklearn
    from skclean.utils import load_data

    X, y = load_data('breast_cancer')

    clf = Pipeline([
        ('scale', StandardScaler()),  # Scale features
        ('feat_sel', VarianceThreshold(.2)),  # Feature selection
        ('detector', KDN()),  # Detect mislabeled samples
        ('handler', Filter(SVC())
         ),  # Filter out likely mislabeled samples and then train a SVM
    ])

    clf_g = GridSearchCV(clf, {'detector__n_neighbors': [2, 5, 10]})
    n_clf_g = make_pipeline(UniformNoise(.3),
                            clf_g)  # Create label noise at the very first step

    print(cross_val_score(n_clf_g, X, y,
                          cv=5).mean())  # 5-fold cross validation
def test_pipeline_param_error():
    clf = make_pipeline(LogisticRegression())
    with pytest.raises(ValueError,
                       match="Pipeline.fit does not accept "
                       "the sample_weight parameter"):
        clf.fit([[0], [0]], [0, 1], sample_weight=[1, 1])
    ('d', KDN()),
    ('e', skclean.handlers.SampleWeight(dummy))
])


# Inside Pipeline
tmp_Handlers = []
for h in NOISE_HANDLERS:
    if h.iterative:  # Exclude iterative handlers
        continue
    ch = clone(h)
    ch.detector = None
    tmp_Handlers.append(ch)
preli_steps = [UniformNoise(.2, random_state=SEED), StandardScaler()]
all_comb = product(NOISE_DETECTORS, tmp_Handlers)
INSIDE_PIPE = [make_pipeline(*preli_steps + list(comb)) for comb in all_comb]

# Outside Pipe
OUTSIDE_PIPE = []
for h in NOISE_HANDLERS:
    for d in NOISE_DETECTORS:
        ch, d = clone(h), clone(d)
        ch.detector = d
        if 'random_state' in ch.get_params():  # trying to avoid flaky tests
            ch.set_params(random_state=42)
        OUTSIDE_PIPE.append(ch)

ALL_COMBS = INSIDE_PIPE + OUTSIDE_PIPE

ALL_ESTIMATORS = NOISE_SIMULATORS + NOISE_DETECTORS + ALL_COMBS + [PIPELINE]