def test_cv_folds(self): trainable_lr = LogisticRegression(n_jobs=1) iris = sklearn.datasets.load_iris() from sklearn.model_selection import KFold from lale.helpers import cross_val_score cv_results = cross_val_score(trainable_lr, iris.data, iris.target, cv=KFold(2)) self.assertEqual(len(cv_results), 2)
def f_min(op, X, y, num_folds=5): import numpy as np from lale.helpers import cross_val_score # try: scores = cross_val_score(op, X, y, cv=num_folds) return 1 - np.mean(scores) # Minimize!
def test_f_min(op, X, y, num_folds=5): from sklearn import datasets from lale.helpers import cross_val_score import numpy as np # try: scores = cross_val_score(op, X, y, cv=num_folds) return 1 - np.mean(scores) # Minimize!
def test_cv_scoring(self): trainable_lr = LogisticRegression(n_jobs=1) iris = sklearn.datasets.load_iris() from lale.helpers import cross_val_score from sklearn.metrics import confusion_matrix cv_results = cross_val_score(trainable_lr, iris.data, iris.target, scoring=confusion_matrix) self.assertEqual(len(cv_results), 5)
def test_comparison_with_scikit(self): import warnings warnings.filterwarnings("ignore") import sklearn.datasets import sklearn.utils from lale.helpers import cross_val_score from lale.lib.sklearn import PCA pca = PCA(n_components=3, random_state=42, svd_solver="arpack") nys = Nystroem(n_components=10, random_state=42) concat = ConcatFeatures() lr = LogisticRegression(random_state=42, C=0.1) trainable = (pca & nys) >> concat >> lr digits = sklearn.datasets.load_digits() X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42) cv_results = cross_val_score(trainable, X, y) cv_results = ["{0:.1%}".format(score) for score in cv_results] from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.linear_model import LogisticRegression as SklearnLR from sklearn.model_selection import cross_val_score from sklearn.pipeline import FeatureUnion, make_pipeline union = FeatureUnion( [ ( "pca", SklearnPCA(n_components=3, random_state=42, svd_solver="arpack"), ), ("nys", SklearnNystroem(n_components=10, random_state=42)), ] ) lr = SklearnLR(random_state=42, C=0.1) pipeline = make_pipeline(union, lr) scikit_cv_results = cross_val_score(pipeline, X, y, cv=5) scikit_cv_results = ["{0:.1%}".format(score) for score in scikit_cv_results] self.assertEqual(cv_results, scikit_cv_results) warnings.resetwarnings()
def test_clone_with_scikit2(self): lr = LogisticRegression() from sklearn.model_selection import cross_val_score from sklearn.metrics import accuracy_score, make_scorer from sklearn.datasets import load_iris pca = PCA() trainable = pca >> lr from sklearn.base import clone iris = load_iris() X, y = iris.data, iris.target trainable2 = clone(trainable) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = cross_val_score(trainable, X, y, scoring=make_scorer(accuracy_score), cv=2) result2 = cross_val_score(trainable2, X, y, scoring=make_scorer(accuracy_score), cv=2) for i in range(len(result)): self.assertEqual(result[i], result2[i]) # Testing clone with nested linear pipelines trainable = PCA() >> trainable trainable2 = clone(trainable) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = cross_val_score(trainable, X, y, scoring=make_scorer(accuracy_score), cv=2) result2 = cross_val_score(trainable2, X, y, scoring=make_scorer(accuracy_score), cv=2) for i in range(len(result)): self.assertEqual(result[i], result2[i])
def test_clone_operator_choice(self): from sklearn.model_selection import cross_val_score from sklearn.metrics import accuracy_score, make_scorer from sklearn.base import clone from sklearn.datasets import load_iris iris = load_iris() X, y = iris.data, iris.target lr = LogisticRegression() trainable = PCA() >> lr trainable_wrapper = make_sklearn_compat(trainable) trainable2 = clone(trainable_wrapper) with warnings.catch_warnings(): warnings.simplefilter("ignore") result = cross_val_score(trainable_wrapper, X, y, scoring=make_scorer(accuracy_score), cv=2) result2 = cross_val_score(trainable2, X, y, scoring=make_scorer(accuracy_score), cv=2) for i in range(len(result)): self.assertEqual(result[i], result2[i])
def test_cv_folds_scikit(self): trainable_lr = LogisticRegression(n_jobs=1) iris = sklearn.datasets.load_iris() from sklearn.model_selection import cross_val_score from sklearn.metrics import accuracy_score, make_scorer from sklearn.model_selection import KFold with warnings.catch_warnings(): warnings.simplefilter("ignore") cv_results = cross_val_score( trainable_lr, iris.data, iris.target, cv = KFold(2), scoring=make_scorer(accuracy_score)) self.assertEqual(len(cv_results), 2)
def test_resampler(self): from lale.lib.sklearn import PCA, Nystroem, LogisticRegression, RandomForestClassifier from lale.lib.lale import NoOp, ConcatFeatures X_train, y_train = self.X_train, self.y_train X_test, y_test = self.X_test, self.y_test import importlib module_name = ".".join(res_name.split('.')[0:-1]) class_name = res_name.split('.')[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) with self.assertRaises(ValueError): res = class_() #test_schemas_are_schemas lale.type_checking.validate_is_schema(class_.input_schema_fit()) lale.type_checking.validate_is_schema(class_.input_schema_predict()) lale.type_checking.validate_is_schema(class_.output_schema_predict()) lale.type_checking.validate_is_schema(class_.hyperparam_schema()) #test_init_fit_predict from lale.operators import make_pipeline pipeline1 = PCA() >> class_(operator=make_pipeline(LogisticRegression())) trained = pipeline1.fit(X_train, y_train) predictions = trained.predict(X_test) pipeline2 = class_(operator=make_pipeline(PCA(), LogisticRegression())) trained = pipeline2.fit(X_train, y_train) predictions = trained.predict(X_test) #test_with_hyperopt from lale.lib.lale import Hyperopt optimizer = Hyperopt(estimator=PCA >> class_(operator=make_pipeline(LogisticRegression())), max_evals = 1, show_progressbar=False) trained_optimizer = optimizer.fit(X_train, y_train) predictions = trained_optimizer.predict(X_test) pipeline3 = class_(operator= PCA() >> (Nystroem & NoOp) >> ConcatFeatures >> LogisticRegression()) optimizer = Hyperopt(estimator=pipeline3, max_evals = 1, show_progressbar=False) trained_optimizer = optimizer.fit(X_train, y_train) predictions = trained_optimizer.predict(X_test) pipeline4 = (PCA >> class_(operator=make_pipeline(Nystroem())) & class_(operator=make_pipeline(Nystroem()))) >> ConcatFeatures >> LogisticRegression() optimizer = Hyperopt(estimator=pipeline4, max_evals = 1, scoring='roc_auc', show_progressbar=False) trained_optimizer = optimizer.fit(X_train, y_train) predictions = trained_optimizer.predict(X_test) #test_cross_validation from lale.helpers import cross_val_score cv_results = cross_val_score(pipeline1, X_train, y_train, cv = 2) self.assertEqual(len(cv_results), 2) #test_to_json pipeline1.to_json()
def test_classifier(self): X_train, y_train = self.X_train, self.y_train X_test, y_test = self.X_test, self.y_test import importlib module_name = ".".join(clf_name.split('.')[0:-1]) class_name = clf_name.split('.')[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) clf = class_() #test_schemas_are_schemas lale.type_checking.validate_is_schema(clf.input_schema_fit()) lale.type_checking.validate_is_schema(clf.input_schema_predict()) lale.type_checking.validate_is_schema(clf.output_schema_predict()) lale.type_checking.validate_is_schema(clf.hyperparam_schema()) #test_init_fit_predict trained = clf.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #test_with_hyperopt from lale.lib.lale import Hyperopt hyperopt = Hyperopt(estimator=clf, max_evals=1) trained = hyperopt.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #test_cross_validation from lale.helpers import cross_val_score cv_results = cross_val_score(clf, X_train, y_train, cv=2) self.assertEqual(len(cv_results), 2) #test_with_gridsearchcv_auto_wrapped from sklearn.metrics import accuracy_score, make_scorer with warnings.catch_warnings(): warnings.simplefilter("ignore") from lale.lib.sklearn.gradient_boosting_classifier import GradientBoostingClassifierImpl from lale.lib.sklearn.mlp_classifier import MLPClassifierImpl if clf._impl_class() == GradientBoostingClassifierImpl: #because exponential loss does not work with iris dataset as it is not binary classification import lale.schemas as schemas clf = clf.customize_schema( loss=schemas.Enum(default='deviance', values=['deviance'])) grid_search = lale.lib.lale.GridSearchCV( estimator=clf, lale_num_samples=1, lale_num_grids=1, cv=2, scoring=make_scorer(accuracy_score)) grid_search.fit(X_train, y_train) #test_predict_on_trainable trained = clf.fit(X_train, y_train) clf.predict(X_train) #test_to_json clf.to_json() #test_in_a_pipeline pipeline = NoOp() >> clf trained = pipeline.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test)