def test_pipeline_ducktyping(): pipeline = make_pipeline(Mult(5)) pipeline.predict pipeline.transform pipeline.inverse_transform pipeline = make_pipeline(Transf()) assert not hasattr(pipeline, 'predict') pipeline.transform pipeline.inverse_transform pipeline = make_pipeline(None) assert not hasattr(pipeline, 'predict') pipeline.transform pipeline.inverse_transform pipeline = make_pipeline(Transf(), NoInvTransf()) assert not hasattr(pipeline, 'predict') pipeline.transform assert not hasattr(pipeline, 'inverse_transform') pipeline = make_pipeline(NoInvTransf(), Transf()) assert not hasattr(pipeline, 'predict') pipeline.transform assert not hasattr(pipeline, 'inverse_transform')
def test_make_pipeline(): t1 = Transf() t2 = Transf() pipe = make_pipeline(t1, t2) assert isinstance(pipe, Pipeline) assert pipe.steps[0][0] == "transf-1" assert pipe.steps[1][0] == "transf-2" pipe = make_pipeline(t1, t2, FitParamT()) assert isinstance(pipe, Pipeline) assert pipe.steps[0][0] == "transf-1" assert pipe.steps[1][0] == "transf-2" assert pipe.steps[2][0] == "fitparamt"
def test_classes_property(): iris = load_iris() X = iris.data y = iris.target reg = make_pipeline(SelectKBest(k=1), LinearRegression()) reg.fit(X, y) with raises(AttributeError): getattr(reg, "classes_") clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0)) with raises(AttributeError): getattr(clf, "classes_") clf.fit(X, y) assert_array_equal(clf.classes_, np.unique(y))
def test_pipeline_fit_then_sample_3_samplers_with_sampler_last_estimator(): X, y = make_classification( n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=50000, random_state=0) rus = RandomUnderSampler(random_state=42) enn = ENN() pipeline = make_pipeline(rus, enn, rus) X_fit_sample_resampled, y_fit_sample_resampled = pipeline.fit_sample(X, y) pipeline = make_pipeline(rus, enn, rus) pipeline.fit(X, y) X_fit_then_sample_res, y_fit_then_sample_res = pipeline.sample(X, y) assert_array_equal(X_fit_sample_resampled, X_fit_then_sample_res) assert_array_equal(y_fit_sample_resampled, y_fit_then_sample_res)
def test_single_estimator(): # Check singleton ensembles. X_train, X_test, y_train, y_test = train_test_split(imb_iris.data, imb_iris.target, random_state=0) clf1 = SMOTEBagging(base_estimator=KNeighborsClassifier(), n_estimators=1, bootstrap=False, bootstrap_features=False, random_state=0).fit(X_train, y_train) clf2 = make_pipeline( SMOTE(random_state=clf1.estimators_[0].steps[0][1].random_state), KNeighborsClassifier()).fit(X_train, y_train) assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))
def test_pipeline_none_sampler_sample(): # Test pipeline using None step and a sampler X, y = make_classification( n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=5000, random_state=0) rus = RandomUnderSampler(random_state=0) pipe = make_pipeline(None, rus) pipe.fit(X, y) pipe.sample(X, y)
def test_pipeline_none_classifier(): # Test pipeline using None as preprocessing step and a classifier X, y = make_classification( n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=5000, random_state=0) clf = LogisticRegression(random_state=0) pipe = make_pipeline(None, clf) pipe.fit(X, y) pipe.predict(X) pipe.predict_proba(X) pipe.decision_function(X) pipe.score(X, y)
def test_pipeline_none_transformer(): # Test pipeline using None and a transformer that implements transform and # inverse_transform X, y = make_classification( n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=5000, random_state=0) pca = PCA(whiten=True) pipe = make_pipeline(None, pca) pipe.fit(X, y) X_trans = pipe.transform(X) X_inversed = pipe.inverse_transform(X_trans) assert_array_almost_equal(X, X_inversed)
def test_bagging_with_pipeline(): estimator = SMOTEBagging(make_pipeline(SelectKBest(k=1), DecisionTreeClassifier()), max_features=2) estimator.fit(imb_iris.data, imb_iris.target).predict(imb_iris.data)