def test_export_to_sklearn_pipeline_with_noop_3(self): # This test is probably unnecessary, but doesn't harm at this point lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier() >> NoOp() trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train) _ = trained_lale_pipeline.export_to_sklearn_pipeline()
def test_export_to_sklearn_pipeline_with_noop_4(self): lale_pipeline = NoOp() >> KNeighborsClassifier() trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train) sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline() self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
def test_classifier(self): X_train, y_train = self.X_train, self.y_train X_test, y_test = self.X_test, self.y_test import importlib module_name = ".".join(clf_name.split('.')[0:-1]) class_name = clf_name.split('.')[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) clf = class_() #test_schemas_are_schemas from lale.helpers import validate_is_schema validate_is_schema(clf.input_schema_fit()) validate_is_schema(clf.input_schema_predict()) validate_is_schema(clf.output_schema()) validate_is_schema(clf.hyperparam_schema()) #test_init_fit_predict trained = clf.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #test_with_hyperopt from lale.lib.lale import HyperoptClassifier hyperopt = HyperoptClassifier(model=clf, max_evals=1) trained = hyperopt.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) #test_cross_validation from lale.helpers import cross_val_score cv_results = cross_val_score(clf, X_train, y_train, cv=2) self.assertEqual(len(cv_results), 2) #test_with_gridsearchcv_auto_wrapped from sklearn.metrics import accuracy_score, make_scorer with warnings.catch_warnings(): warnings.simplefilter("ignore") from lale.lib.sklearn.gradient_boosting_classifier import GradientBoostingClassifierImpl from lale.lib.sklearn.mlp_classifier import MLPClassifierImpl if isinstance(clf._impl, GradientBoostingClassifierImpl): #because exponential loss does not work with iris dataset as it is not binary classification import lale.schemas as schemas clf = clf.customize_schema( loss=schemas.Enum(default='deviance', values=['deviance'])) if not isinstance(clf._impl, MLPClassifierImpl): #mlp fails due to issue #164. grid_search = LaleGridSearchCV( clf, lale_num_samples=1, lale_num_grids=1, cv=2, scoring=make_scorer(accuracy_score)) grid_search.fit(X_train, y_train) #test_predict_on_trainable trained = clf.fit(X_train, y_train) clf.predict(X_train) #test_to_json clf.to_json() #test_in_a_pipeline pipeline = NoOp() >> clf trained = pipeline.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test)
def test_no_partial_fit(self): pipeline = Batching(operator=NoOp() >> LogisticRegression()) with self.assertRaises(AttributeError): trained = pipeline.fit(self.X_train, self.y_train)
def test_classifier(self): X_train, y_train = self.X_train, self.y_train import importlib module_name = ".".join(clf_name.split(".")[0:-1]) class_name = clf_name.split(".")[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) clf = class_() # test_schemas_are_schemas lale.type_checking.validate_is_schema(clf.input_schema_fit()) lale.type_checking.validate_is_schema(clf.input_schema_predict()) lale.type_checking.validate_is_schema(clf.output_schema_predict()) lale.type_checking.validate_is_schema(clf.hyperparam_schema()) # test_init_fit_predict trained = clf.fit(self.X_train, self.y_train) _ = trained.predict(self.X_test) # test score _ = trained.score(self.X_test, self.y_test) from lale.lib.sklearn.gradient_boosting_classifier import ( GradientBoostingClassifier, ) if isinstance(clf, GradientBoostingClassifier): # type: ignore # because exponential loss does not work with iris dataset as it is not binary classification import lale.schemas as schemas clf = clf.customize_schema( loss=schemas.Enum(default="deviance", values=["deviance"])) # test_with_hyperopt from lale.lib.lale import Hyperopt hyperopt = Hyperopt(estimator=clf, max_evals=1, verbose=True) trained = hyperopt.fit(self.X_train, self.y_train) _ = trained.predict(self.X_test) # test_cross_validation from lale.helpers import cross_val_score cv_results = cross_val_score(clf, X_train, y_train, cv=2) self.assertEqual(len(cv_results), 2) # test_with_gridsearchcv_auto_wrapped from sklearn.metrics import accuracy_score, make_scorer with warnings.catch_warnings(): warnings.simplefilter("ignore") grid_search = lale.lib.lale.GridSearchCV( estimator=clf, lale_num_samples=1, lale_num_grids=1, cv=2, scoring=make_scorer(accuracy_score), ) grid_search.fit(X_train, y_train) # test_predict_on_trainable trained = clf.fit(X_train, y_train) clf.predict(X_train) # test_to_json clf.to_json() # test_in_a_pipeline pipeline = NoOp() >> clf trained = pipeline.fit(self.X_train, self.y_train) _ = trained.predict(self.X_test)
def test_planned_pipeline_3(self): plan = ((MinMaxScaler() & NoOp()) >> ConcatFeatures() >> (StandardScaler & (NoOp() | MinMaxScaler())) >> ConcatFeatures() >> (LogisticRegression | KNeighborsClassifier)) run_hyperopt_on_planned_pipeline(plan)
def test_remove_last5(self): pipeline = ( StandardScaler() >> (PCA() & Nystroem() & PassiveAggressiveClassifier()) >> ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier()) pipeline.remove_last(inplace=True).freeze_trainable()
def test_transform_schema_NoOp(self): from lale.datasets.data_schemas import to_schema for ds in [self._irisArr, self._irisDf, self._digits, self._housing, self._creditG, self._movies, self._drugRev]: s_input = to_schema(ds['X']) s_output = NoOp.transform_schema(s_input) self.assertIs(s_input, s_output)
def test_two_estimators_predict_proba1(self): pipeline = (StandardScaler() >> (PCA() & Nystroem() & GaussianNB()) >> ConcatFeatures() >> NoOp() >> GaussianNB()) pipeline.fit(self.X_train, self.y_train) pipeline.predict_proba(self.X_test)
def test_two_estimators_predict_proba(self): pipeline = (StandardScaler() >> (PCA() & Nystroem() & LogisticRegression()) >> ConcatFeatures() >> NoOp() >> LogisticRegression()) trained = pipeline.fit(self.X_train, self.y_train) trained.predict_proba(self.X_test)
def test_nested(self): self.maxDiff = None from lale.json_operator import from_json, to_json from lale.lib.lale import NoOp from lale.lib.sklearn import PCA from lale.lib.sklearn import LogisticRegression as LR operator = PCA >> (LR(C=0.09) | NoOp >> LR(C=0.19)) json_expected = { "class": "lale.operators.PlannedPipeline", "state": "planned", "edges": [["pca", "choice"]], "steps": { "pca": { "class": PCA.class_name(), "state": "planned", "operator": "PCA", "label": "PCA", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html", }, "choice": { "class": "lale.operators.OperatorChoice", "state": "planned", "operator": "OperatorChoice", "steps": { "lr_0": { "class": LR.class_name(), "state": "trainable", "operator": "LogisticRegression", "label": "LR", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html", "hyperparams": { "C": 0.09 }, "is_frozen_trainable": False, }, "pipeline_1": { "class": "lale.operators.TrainablePipeline", "state": "trainable", "edges": [["no_op", "lr_1"]], "steps": { "no_op": { "class": NoOp.class_name(), "state": "trained", "operator": "NoOp", "label": "NoOp", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html", "hyperparams": None, "coefs": None, "is_frozen_trainable": True, "is_frozen_trained": True, }, "lr_1": { "class": LR.class_name(), "state": "trainable", "operator": "LogisticRegression", "label": "LR", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html", "hyperparams": { "C": 0.19 }, "is_frozen_trainable": False, }, }, }, }, }, }, } json = to_json(operator) self.assertEqual(json, json_expected) operator_2 = from_json(json) json_2 = to_json(operator_2) self.assertEqual(json, json_2)
def test_pipeline_1(self): self.maxDiff = None from lale.json_operator import from_json, to_json from lale.lib.lale import ConcatFeatures, NoOp from lale.lib.sklearn import PCA from lale.lib.sklearn import LogisticRegression as LR operator = (PCA & NoOp) >> ConcatFeatures >> LR json_expected = { "class": "lale.operators.PlannedPipeline", "state": "planned", "edges": [ ["pca", "concat_features"], ["no_op", "concat_features"], ["concat_features", "lr"], ], "steps": { "pca": { "class": PCA.class_name(), "state": "planned", "operator": "PCA", "label": "PCA", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html", }, "no_op": { "class": NoOp.class_name(), "state": "trained", "operator": "NoOp", "label": "NoOp", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html", "hyperparams": None, "coefs": None, "is_frozen_trainable": True, "is_frozen_trained": True, }, "concat_features": { "class": ConcatFeatures.class_name(), "state": "trained", "operator": "ConcatFeatures", "label": "ConcatFeatures", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.concat_features.html", "hyperparams": None, "coefs": None, "is_frozen_trainable": True, "is_frozen_trained": True, }, "lr": { "class": LR.class_name(), "state": "planned", "operator": "LogisticRegression", "label": "LR", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html", }, }, } json = to_json(operator) self.assertEqual(json, json_expected) operator_2 = from_json(json) json_2 = to_json(operator_2) self.assertEqual(json, json_2)
def test_two_estimators_predict_proba1(self): pipeline = StandardScaler() >> ( PCA() & Nystroem() & PassiveAggressiveClassifier() ) >> ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier() pipeline.fit(self.X_train, self.y_train) pipeline.predict_proba(self.X_test)