def test_fit1(self): import warnings warnings.filterwarnings(action="ignore") from lale.lib.sklearn import MinMaxScaler, MLPClassifier pipeline = Batching( operator=MinMaxScaler() >> MLPClassifier(random_state=42), batch_size=112) trained = pipeline.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) lale_accuracy = accuracy_score(self.y_test, predictions) from sklearn.preprocessing import MinMaxScaler from sklearn.neural_network import MLPClassifier prep = MinMaxScaler() trained_prep = prep.partial_fit(self.X_train, self.y_train) X_transformed = trained_prep.transform(self.X_train) clf = MLPClassifier(random_state=42) import numpy as np trained_clf = clf.partial_fit(X_transformed, self.y_train, classes=np.unique(self.y_train)) predictions = trained_clf.predict(trained_prep.transform(self.X_test)) sklearn_accuracy = accuracy_score(self.y_test, predictions) self.assertEqual(lale_accuracy, sklearn_accuracy)
def test_get_named_pipeline(self): pipeline = MinMaxScaler() >> KNeighborsClassifier() trained_pipeline = pipeline.fit(self.X_train, self.y_train) fpr_scorer = make_scorer(compute_fpr, greater_is_better=False) nsga2_args = { 'scoring': ['accuracy', fpr_scorer], 'best_score': [1, 0], 'cv': 3, 'max_evals': 20, 'population_size': 10 } opt_last = OptimizeLast(estimator=trained_pipeline, last_optimizer=NSGA2, optimizer_args=nsga2_args) res_last = opt_last.fit(self.X_train, self.y_train) df_summary = res_last.summary() pareto_pipeline = res_last.get_pipeline(pipeline_name='p0') self.assertEqual(type(trained_pipeline), type(pareto_pipeline)) if (df_summary.shape[0] > 1): pareto_pipeline = res_last.get_pipeline(pipeline_name='p1') self.assertEqual(type(trained_pipeline), type(pareto_pipeline))
def test_planned_pipeline_3(self) : plan = ( ( MinMaxScaler() & NoOp() ) >> ConcatFeatures() >> ( StandardScaler & ( NoOp() | MinMaxScaler() ) ) >> ConcatFeatures() >> ( LogisticRegression | KNeighborsClassifier ) ) run_hyperopt_on_planned_pipeline(plan)
def test_pipeline_freeze_trained(self): from lale.lib.sklearn import MinMaxScaler, LogisticRegression trainable = MinMaxScaler() >> LogisticRegression() X = [[0.0], [1.0], [2.0]] y = [0.0, 0.0, 1.0] liquid = trainable.fit(X, y) frozen = liquid.freeze_trained() self.assertFalse(liquid.is_frozen_trained()) self.assertTrue(frozen.is_frozen_trained())
def test_trained_pipeline_freeze_trainable(self): from lale.lib.sklearn import MinMaxScaler, LogisticRegression from lale.operators import TrainedPipeline trainable = MinMaxScaler() >> LogisticRegression() X = [[0.0], [1.0], [2.0]] y = [0.0, 0.0, 1.0] liquid = trainable.fit(X, y) self.assertIsInstance(liquid, TrainedPipeline) self.assertFalse(liquid.is_frozen_trainable()) frozen = liquid.freeze_trainable() self.assertFalse(liquid.is_frozen_trainable()) self.assertTrue(frozen.is_frozen_trainable()) self.assertIsInstance(frozen, TrainedPipeline)
def test_fit2(self): import warnings warnings.filterwarnings(action="ignore") from lale.lib.sklearn import MinMaxScaler, MLPClassifier pipeline = Batching(operator=MinMaxScaler() >> MinMaxScaler(), batch_size=112) trained = pipeline.fit(self.X_train, self.y_train) lale_transforms = trained.transform(self.X_test) from sklearn.preprocessing import MinMaxScaler prep = MinMaxScaler() trained_prep = prep.partial_fit(self.X_train, self.y_train) X_transformed = trained_prep.transform(self.X_train) clf = MinMaxScaler() import numpy as np trained_clf = clf.partial_fit(X_transformed, self.y_train) sklearn_transforms = trained_clf.transform( trained_prep.transform(self.X_test)) for i in range(5): for j in range(2): self.assertAlmostEqual(lale_transforms[i, j], sklearn_transforms[i, j])
def test_fit3(self): from lale.lib.sklearn import MinMaxScaler, MLPClassifier, PCA pipeline = PCA() >> Batching( operator=MinMaxScaler() >> MLPClassifier(random_state=42), batch_size=10) trained = pipeline.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test)
def test_operator_choice(self): self.maxDiff = None from lale.json_operator import from_json, to_json from lale.lib.sklearn import PCA from lale.lib.sklearn import MinMaxScaler as Scl operator = PCA | Scl json_expected = { "class": "lale.operators.OperatorChoice", "operator": "OperatorChoice", "state": "planned", "steps": { "pca": { "class": PCA.class_name(), "state": "planned", "operator": "PCA", "label": "PCA", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html", }, "scl": { "class": Scl.class_name(), "state": "planned", "operator": "MinMaxScaler", "label": "Scl", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.min_max_scaler.html", }, }, } json = to_json(operator) self.assertEqual(json, json_expected) operator_2 = from_json(json) json_2 = to_json(operator_2) self.assertEqual(json_2, json_expected)
def test_astype_sklearn(self): from lale.lib.lale import ConcatFeatures from lale.lib.sklearn import PCA, LogisticRegression, MinMaxScaler, Nystroem pca = PCA(copy=False) logistic_regression = LogisticRegression(solver="saga", C=0.9) pipeline = ( MinMaxScaler() >> (pca & Nystroem()) >> ConcatFeatures >> logistic_regression ) expected = """from sklearn.preprocessing import MinMaxScaler from sklearn.decomposition import PCA from sklearn.kernel_approximation import Nystroem from sklearn.pipeline import make_union from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline pca = PCA(copy=False) union = make_union(pca, Nystroem()) logistic_regression = LogisticRegression(solver="saga", C=0.9) pipeline = make_pipeline(MinMaxScaler(), union, logistic_regression)""" printed = lale.pretty_print.to_string(pipeline, astype="sklearn") self._roundtrip(expected, printed)
def test_invalid_args(self): import jsonschema clf = LGBMClassifier() nsga2_args = { 'estimator': clf, 'cv': 3, 'max_evals': 50, 'population_size': 10 } # No scorer specified with self.assertRaises(jsonschema.exceptions.ValidationError): _ = NSGA2(**nsga2_args) # Less scorers provided with self.assertRaises(AssertionError): _ = NSGA2(scoring=['accuracy'], **nsga2_args) # Specify LALE Pipeline as estimator. It should raise # AssertionError as MOO over pipelines is not supported pipeline = MinMaxScaler() >> KNeighborsClassifier() fpr_scorer = make_scorer(compute_fpr, greater_is_better=False) with self.assertRaises(AssertionError): _ = NSGA2(estimator=pipeline, scoring=['accuracy', fpr_scorer])
def test_get_named_pipeline(self): from lale.lib.lale import Hyperopt, OptimizeLast pipeline = MinMaxScaler() >> KNeighborsClassifier() trained_pipeline = pipeline.fit(self.X_train, self.y_train) hyperopt_args = {"cv": 3, "max_evals": 2} opt_last = OptimizeLast( estimator=trained_pipeline, last_optimizer=Hyperopt, optimizer_args=hyperopt_args, ) res_last = opt_last.fit(self.X_train, self.y_train) pipeline2 = res_last.get_pipeline(pipeline_name="p1") if pipeline2 is not None: trained_pipeline2 = pipeline2.fit(self.X_train, self.y_train) _ = trained_pipeline2.predict(self.X_test) self.assertEqual(type(trained_pipeline), type(trained_pipeline2))
def test_batching_with_hyperopt(self): from lale.lib.sklearn import MinMaxScaler, SGDClassifier from lale.lib.lale import Hyperopt, Batching from sklearn.metrics import accuracy_score pipeline = Batching(operator=MinMaxScaler() >> SGDClassifier()) trained = pipeline.auto_configure(self.X_train, self.y_train, optimizer=Hyperopt, max_evals=1) predictions = trained.predict(self.X_test)
def test_decision_function_2(self): def my_scorer(estimator, X, y=None): return 1 from lale.lib.lale import Hyperopt from lale.lib.sklearn import MinMaxScaler hyperopt = Hyperopt( estimator=MinMaxScaler() >> IsolationForest(max_features=1.0, max_samples=1.0), max_evals=5, verbose=True, scoring=my_scorer, ) trained = hyperopt.fit(self.X_train) pipeline = trained.get_pipeline() assert pipeline is not None _ = pipeline.decision_function(self.X_test)
def test_pipeline_digits_scaler_j48(self): import sklearn.datasets import sklearn.utils digits = sklearn.datasets.load_digits() X_all, y_all = sklearn.utils.shuffle(digits.data, digits.target, random_state=42) holdout_size = 200 X_train, y_train = X_all[holdout_size:], y_all[holdout_size:] X_test, y_test = X_all[:holdout_size], y_all[:holdout_size] from lale.lib.sklearn import MinMaxScaler import lale.helpers scaler = MinMaxScaler() j48 = J48() trainable_pipe = scaler >> j48 print('before calling fit on pipeline') trained_pipe = trainable_pipe.fit(X_train, y_train) print('after calling fit on pipeline') lale.helpers.to_graphviz(trained_pipe) predicted = trained_pipe.predict(X_test)