Ejemplo n.º 1
0
def test_pipeline_ducktyping():
    pipeline = make_pipeline(Mult(5))
    pipeline.predict
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(Transf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(None)
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(Transf(), NoInvTransf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    assert not hasattr(pipeline, 'inverse_transform')

    pipeline = make_pipeline(NoInvTransf(), Transf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    assert not hasattr(pipeline, 'inverse_transform')
Ejemplo n.º 2
0
def test_make_pipeline():
    t1 = Transf()
    t2 = Transf()
    pipe = make_pipeline(t1, t2)
    assert isinstance(pipe, Pipeline)
    assert pipe.steps[0][0] == "transf-1"
    assert pipe.steps[1][0] == "transf-2"

    pipe = make_pipeline(t1, t2, FitParamT())
    assert isinstance(pipe, Pipeline)
    assert pipe.steps[0][0] == "transf-1"
    assert pipe.steps[1][0] == "transf-2"
    assert pipe.steps[2][0] == "fitparamt"
Ejemplo n.º 3
0
def test_classes_property():
    iris = load_iris()
    X = iris.data
    y = iris.target

    reg = make_pipeline(SelectKBest(k=1), LinearRegression())
    reg.fit(X, y)
    with raises(AttributeError):
        getattr(reg, "classes_")

    clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0))
    with raises(AttributeError):
        getattr(clf, "classes_")
    clf.fit(X, y)
    assert_array_equal(clf.classes_, np.unique(y))
Ejemplo n.º 4
0
def test_pipeline_fit_then_sample_3_samplers_with_sampler_last_estimator():
    X, y = make_classification(
        n_classes=2,
        class_sep=2,
        weights=[0.1, 0.9],
        n_informative=3,
        n_redundant=1,
        flip_y=0,
        n_features=20,
        n_clusters_per_class=1,
        n_samples=50000,
        random_state=0)

    rus = RandomUnderSampler(random_state=42)
    enn = ENN()
    pipeline = make_pipeline(rus, enn, rus)
    X_fit_sample_resampled, y_fit_sample_resampled = pipeline.fit_sample(X, y)
    pipeline = make_pipeline(rus, enn, rus)
    pipeline.fit(X, y)
    X_fit_then_sample_res, y_fit_then_sample_res = pipeline.sample(X, y)
    assert_array_equal(X_fit_sample_resampled, X_fit_then_sample_res)
    assert_array_equal(y_fit_sample_resampled, y_fit_then_sample_res)
Ejemplo n.º 5
0
def test_single_estimator():
    # Check singleton ensembles.
    X_train, X_test, y_train, y_test = train_test_split(imb_iris.data,
                                                        imb_iris.target,
                                                        random_state=0)

    clf1 = SMOTEBagging(base_estimator=KNeighborsClassifier(),
                        n_estimators=1,
                        bootstrap=False,
                        bootstrap_features=False,
                        random_state=0).fit(X_train, y_train)

    clf2 = make_pipeline(
        SMOTE(random_state=clf1.estimators_[0].steps[0][1].random_state),
        KNeighborsClassifier()).fit(X_train, y_train)

    assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))
Ejemplo n.º 6
0
def test_pipeline_none_sampler_sample():
    # Test pipeline using None step and a sampler
    X, y = make_classification(
        n_classes=2,
        class_sep=2,
        weights=[0.1, 0.9],
        n_informative=3,
        n_redundant=1,
        flip_y=0,
        n_features=20,
        n_clusters_per_class=1,
        n_samples=5000,
        random_state=0)

    rus = RandomUnderSampler(random_state=0)
    pipe = make_pipeline(None, rus)
    pipe.fit(X, y)
    pipe.sample(X, y)
Ejemplo n.º 7
0
def test_pipeline_none_classifier():
    # Test pipeline using None as preprocessing step and a classifier
    X, y = make_classification(
        n_classes=2,
        class_sep=2,
        weights=[0.1, 0.9],
        n_informative=3,
        n_redundant=1,
        flip_y=0,
        n_features=20,
        n_clusters_per_class=1,
        n_samples=5000,
        random_state=0)
    clf = LogisticRegression(random_state=0)
    pipe = make_pipeline(None, clf)
    pipe.fit(X, y)
    pipe.predict(X)
    pipe.predict_proba(X)
    pipe.decision_function(X)
    pipe.score(X, y)
Ejemplo n.º 8
0
def test_pipeline_none_transformer():
    # Test pipeline using None and a transformer that implements transform and
    # inverse_transform
    X, y = make_classification(
        n_classes=2,
        class_sep=2,
        weights=[0.1, 0.9],
        n_informative=3,
        n_redundant=1,
        flip_y=0,
        n_features=20,
        n_clusters_per_class=1,
        n_samples=5000,
        random_state=0)

    pca = PCA(whiten=True)
    pipe = make_pipeline(None, pca)
    pipe.fit(X, y)
    X_trans = pipe.transform(X)
    X_inversed = pipe.inverse_transform(X_trans)
    assert_array_almost_equal(X, X_inversed)
Ejemplo n.º 9
0
def test_bagging_with_pipeline():
    estimator = SMOTEBagging(make_pipeline(SelectKBest(k=1),
                                           DecisionTreeClassifier()),
                             max_features=2)
    estimator.fit(imb_iris.data, imb_iris.target).predict(imb_iris.data)