def test_trained_pipeline(self): trainable_pipeline = StandardScaler() >> LogisticRegression() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) score = trained_pipeline.score(self.X_test, self.y_test) predictions = trained_pipeline.predict(self.X_test) from sklearn.metrics import accuracy_score accuracy = accuracy_score(self.y_test, predictions) self.assertEqual(accuracy, score)
def test_multiple_calls_with_classes(self): trainable_pipeline = StandardScaler() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) new_pipeline = trained_pipeline.freeze_trained() >> SGDClassifier() new_trained_pipeline = new_pipeline.partial_fit(self.X_train, self.y_train, classes=[0, 1, 2]) new_trained_pipeline = new_trained_pipeline.partial_fit( self.X_test, self.y_test, classes=[0, 1, 2]) _ = new_trained_pipeline.predict(self.X_test)
def test_call_on_trainable(self): trainable_pipeline = StandardScaler() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) new_pipeline = trained_pipeline.freeze_trained() >> SGDClassifier() new_pipeline.partial_fit(self.X_train, self.y_train, classes=[0, 1, 2]) new_pipeline.pretty_print() new_trained_pipeline = new_pipeline.partial_fit(self.X_test, self.y_test, classes=[0, 1, 2]) self.assertEqual(new_trained_pipeline, new_pipeline._trained) _ = new_trained_pipeline.predict(self.X_test) new_pipeline.partial_fit(self.X_train, self.y_train, classes=[0, 1, 2])
def test_second_call_with_different_classes_trainable(self): trainable_pipeline = StandardScaler() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) new_pipeline = trained_pipeline.freeze_trained() >> SGDClassifier() new_pipeline.partial_fit(self.X_train, self.y_train, classes=[0, 1, 2]) # Once SGDClassifier is trained, it has a classes_ attribute. self.assertTrue(self._last_impl_has(new_pipeline._trained, "classes_")) subset_labels = self.y_test[np.where(self.y_test != 0)] subset_X = self.X_test[0:len(subset_labels)] new_trained_pipeline = new_pipeline.partial_fit( subset_X, subset_labels) _ = new_trained_pipeline.predict(self.X_test)
def test_second_call_without_classes(self): trainable_pipeline = StandardScaler() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) new_pipeline = trained_pipeline.freeze_trained() >> SGDClassifier() new_trained_pipeline = new_pipeline.partial_fit(self.X_train, self.y_train, classes=[0, 1, 2]) # Once SGDClassifier is trained, it has a classes_ attribute. self.assertTrue(self._last_impl_has(new_trained_pipeline, "classes_")) new_trained_pipeline = new_trained_pipeline.partial_fit( self.X_test, self.y_test) _ = new_trained_pipeline.predict(self.X_test)
def test_remove_last2(self): pipeline = (StandardScaler() >> (PCA() & Nystroem() & PassiveAggressiveClassifier()) >> ConcatFeatures() >> NoOp() >> (PassiveAggressiveClassifier() & LogisticRegression())) with self.assertRaises(ValueError): pipeline.remove_last()
def test_remove_last4(self): pipeline = StandardScaler() >> ( PCA() & Nystroem() & PassiveAggressiveClassifier() ) >> ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier() new_pipeline = pipeline.remove_last(inplace=True) self.assertEqual(len(new_pipeline._steps), 6) self.assertEqual(len(pipeline._steps), 6)
def test_two_estimators_predict1(self): pipeline = ( StandardScaler() >> (PCA() & Nystroem() & PassiveAggressiveClassifier()) >> ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier()) trained = pipeline.fit(self.X_train, self.y_train) trained.predict(self.X_test)
def test_multiple_estimators_predict_predict_proba(self): pipeline = (StandardScaler() >> (LogisticRegression() & PCA()) >> ConcatFeatures() >> (NoOp() & LinearSVC()) >> ConcatFeatures() >> KNeighborsClassifier()) pipeline.fit(self.X_train, self.y_train) _ = pipeline.predict_proba(self.X_test) _ = pipeline.predict(self.X_test)
def test_remove_last5(self): pipeline = ( StandardScaler() >> (PCA() & Nystroem() & PassiveAggressiveClassifier()) >> ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier() ) pipeline.remove_last(inplace=True).freeze_trainable()
def test_two_estimators_predict_proba1(self): pipeline = ( StandardScaler() >> (PCA() & Nystroem() & GaussianNB()) >> ConcatFeatures() >> NoOp() >> GaussianNB() ) pipeline.fit(self.X_train, self.y_train) pipeline.predict_proba(self.X_test)
def test_two_estimators_predict_proba(self): pipeline = ( StandardScaler() >> (PCA() & Nystroem() & LogisticRegression()) >> ConcatFeatures() >> NoOp() >> LogisticRegression() ) trained = pipeline.fit(self.X_train, self.y_train) trained.predict_proba(self.X_test)
def test_two_estimators_predict_proba1(self): pipeline = StandardScaler() >> ( PCA() & Nystroem() & PassiveAggressiveClassifier() ) >> ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier() pipeline.fit(self.X_train, self.y_train) with self.assertRaises(ValueError): pipeline.predict_proba(self.X_test)
def test_schema_validation(self): trainable_okay = StandardScaler( with_mean=False) >> LogisticRegression() trained_okay = trainable_okay.fit(self.train_X, self.train_y) trainable_bad = StandardScaler(with_mean=True) >> LogisticRegression() with self.assertRaises(jsonschema.ValidationError): trained_bad = trainable_bad.fit(self.train_X, self.train_y)
def test_with_incompatible_estimator(self): trainable_pipeline = StandardScaler() >> LogisticRegression() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) with self.assertRaises(AttributeError): _ = trained_pipeline.score_samples(self.X_test)
def test_trained_pipeline(self): trainable_pipeline = StandardScaler() >> IsolationForest() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) _ = trained_pipeline.score_samples(self.X_test)
def test_trainable_pipeline(self): trainable_pipeline = StandardScaler() >> IsolationForest() trainable_pipeline.fit(self.X_train, self.y_train) with self.assertWarns(DeprecationWarning): _ = trainable_pipeline.score_samples(self.X_test)
def test_trained_pipeline(self): trainable_pipeline = StandardScaler() >> AdaBoostClassifier() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) _ = trained_pipeline.predict_log_proba(self.X_test)
def test_with_incompatible_estimator(self): trainable_pipeline = StandardScaler() >> IsolationForest() trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train) with self.assertRaises(AttributeError): _ = trained_pipeline.predict_log_proba(self.X_test)
def test_trainable_pipeline(self): trainable_pipeline = StandardScaler() >> AdaBoostClassifier() trainable_pipeline.fit(self.X_train, self.y_train) with self.assertWarns(DeprecationWarning): _ = trainable_pipeline.predict_log_proba(self.X_test)