def test_classifier(self): X_train, y_train = self.X_train, self.y_train X_test, y_test = self.X_test, self.y_test import importlib module_name = ".".join(clf_name.split(".")[0:-1]) class_name = clf_name.split(".")[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) clf = class_() # test_schemas_are_schemas lale.type_checking.validate_is_schema(clf.input_schema_fit()) lale.type_checking.validate_is_schema(clf.input_schema_predict()) lale.type_checking.validate_is_schema(clf.output_schema_predict()) lale.type_checking.validate_is_schema(clf.hyperparam_schema()) # test_init_fit_predict trained = clf.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) # test_with_hyperopt from lale.lib.lale import Hyperopt hyperopt = Hyperopt(estimator=clf, max_evals=1) trained = hyperopt.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test) # test_cross_validation from lale.helpers import cross_val_score cv_results = cross_val_score(clf, X_train, y_train, cv=2) self.assertEqual(len(cv_results), 2) # test_with_gridsearchcv_auto_wrapped from sklearn.metrics import accuracy_score, make_scorer with warnings.catch_warnings(): warnings.simplefilter("ignore") from lale.lib.sklearn.gradient_boosting_classifier import ( GradientBoostingClassifierImpl, ) if clf._impl_class() == GradientBoostingClassifierImpl: # because exponential loss does not work with iris dataset as it is not binary classification import lale.schemas as schemas clf = clf.customize_schema( loss=schemas.Enum(default="deviance", values=["deviance"])) grid_search = lale.lib.lale.GridSearchCV( estimator=clf, lale_num_samples=1, lale_num_grids=1, cv=2, scoring=make_scorer(accuracy_score), ) grid_search.fit(X_train, y_train) # test_predict_on_trainable trained = clf.fit(X_train, y_train) clf.predict(X_train) # test_to_json clf.to_json() # test_in_a_pipeline pipeline = NoOp() >> clf trained = pipeline.fit(self.X_train, self.y_train) predictions = trained.predict(self.X_test)
def test_resampler(self): from lale.lib.sklearn import PCA, LogisticRegression X_train, y_train = self.X_train, self.y_train X_test = self.X_test import importlib module_name = ".".join(res_name.split(".")[0:-1]) class_name = res_name.split(".")[-1] module = importlib.import_module(module_name) class_ = getattr(module, class_name) with self.assertRaises(ValidationError): _ = class_() # test_schemas_are_schemas lale.type_checking.validate_is_schema(class_.input_schema_fit()) lale.type_checking.validate_is_schema(class_.input_schema_predict()) lale.type_checking.validate_is_schema(class_.output_schema_predict()) lale.type_checking.validate_is_schema(class_.hyperparam_schema()) # test_init_fit_predict from lale.operators import make_pipeline pipeline1 = PCA() >> class_( operator=make_pipeline(LogisticRegression())) trained = pipeline1.fit(X_train, y_train) _ = trained.predict(X_test) pipeline2 = class_(operator=make_pipeline(PCA(), LogisticRegression())) trained = pipeline2.fit(X_train, y_train) _ = trained.predict(X_test) # test_with_hyperopt from lale.lib.lale import Hyperopt optimizer = Hyperopt( estimator=PCA >> class_(operator=make_pipeline(LogisticRegression())), max_evals=1, show_progressbar=False, ) trained_optimizer = optimizer.fit(X_train, y_train) _ = trained_optimizer.predict(X_test) pipeline3 = class_(operator=PCA() >> ( Nystroem & NoOp) >> ConcatFeatures >> LogisticRegression()) optimizer = Hyperopt(estimator=pipeline3, max_evals=1, show_progressbar=False) trained_optimizer = optimizer.fit(X_train, y_train) _ = trained_optimizer.predict(X_test) pipeline4 = ((PCA >> class_(operator=make_pipeline(Nystroem())) & class_(operator=make_pipeline(Nystroem()))) >> ConcatFeatures >> LogisticRegression()) optimizer = Hyperopt(estimator=pipeline4, max_evals=1, scoring="roc_auc", show_progressbar=False) trained_optimizer = optimizer.fit(X_train, y_train) _ = trained_optimizer.predict(X_test) # test_cross_validation from lale.helpers import cross_val_score cv_results = cross_val_score(pipeline1, X_train, y_train, cv=2) self.assertEqual(len(cv_results), 2) # test_to_json pipeline1.to_json()
def test_with_hyperopt2(self): from lale.expressions import ( count, it, max, mean, min, string_indexer, sum, variance, ) wrap_imported_operators() scan = Scan(table=it["main"]) scan_0 = Scan(table=it["customers"]) join = Join(pred=[(it["main"]["group_customer_id"] == it["customers"] ["group_customer_id"])]) map = Map( columns={ "[main](group_customer_id)[customers]|number_children|identity": it["number_children"], "[main](group_customer_id)[customers]|name|identity": it["name"], "[main](group_customer_id)[customers]|income|identity": it["income"], "[main](group_customer_id)[customers]|address|identity": it["address"], "[main](group_customer_id)[customers]|age|identity": it["age"], }, remainder="drop", ) pipeline_4 = join >> map scan_1 = Scan(table=it["purchase"]) join_0 = Join( pred=[(it["main"]["group_id"] == it["purchase"]["group_id"])], join_limit=50.0, ) aggregate = Aggregate( columns={ "[main](group_id)[purchase]|price|variance": variance(it["price"]), "[main](group_id)[purchase]|time|sum": sum(it["time"]), "[main](group_id)[purchase]|time|mean": mean(it["time"]), "[main](group_id)[purchase]|time|min": min(it["time"]), "[main](group_id)[purchase]|price|sum": sum(it["price"]), "[main](group_id)[purchase]|price|count": count(it["price"]), "[main](group_id)[purchase]|price|mean": mean(it["price"]), "[main](group_id)[purchase]|price|min": min(it["price"]), "[main](group_id)[purchase]|price|max": max(it["price"]), "[main](group_id)[purchase]|time|max": max(it["time"]), "[main](group_id)[purchase]|time|variance": variance(it["time"]), }, group_by=it["row_id"], ) pipeline_5 = join_0 >> aggregate map_0 = Map( columns={ "[main]|group_customer_id|identity": it["group_customer_id"], "[main]|transaction_id|identity": it["transaction_id"], "[main]|group_id|identity": it["group_id"], "[main]|comments|identity": it["comments"], "[main]|id|identity": it["id"], "prefix_0_id": it["prefix_0_id"], "next_purchase": it["next_purchase"], "[main]|time|identity": it["time"], }, remainder="drop", ) scan_2 = Scan(table=it["transactions"]) scan_3 = Scan(table=it["products"]) join_1 = Join(pred=[ (it["main"]["transaction_id"] == it["transactions"] ["transaction_id"]), (it["transactions"]["product_id"] == it["products"]["product_id"]), ]) map_1 = Map( columns={ "[main](transaction_id)[transactions](product_id)[products]|price|identity": it["price"], "[main](transaction_id)[transactions](product_id)[products]|type|identity": it["type"], }, remainder="drop", ) pipeline_6 = join_1 >> map_1 join_2 = Join(pred=[(it["main"]["transaction_id"] == it["transactions"] ["transaction_id"])]) map_2 = Map( columns={ "[main](transaction_id)[transactions]|description|identity": it["description"], "[main](transaction_id)[transactions]|product_id|identity": it["product_id"], }, remainder="drop", ) pipeline_7 = join_2 >> map_2 map_3 = Map(columns=[ string_indexer(it["[main]|comments|identity"]), string_indexer( it["[main](transaction_id)[transactions]|description|identity"] ), string_indexer(it[ "[main](transaction_id)[transactions](product_id)[products]|type|identity"] ), string_indexer( it["[main](group_customer_id)[customers]|name|identity"]), string_indexer( it["[main](group_customer_id)[customers]|address|identity"]), ]) pipeline_8 = ConcatFeatures() >> map_3 relational = Relational(operator=make_pipeline_graph( steps=[ scan, scan_0, pipeline_4, scan_1, pipeline_5, map_0, scan_2, scan_3, pipeline_6, pipeline_7, pipeline_8, ], edges=[ (scan, pipeline_4), (scan, pipeline_5), (scan, map_0), (scan, pipeline_6), (scan, pipeline_7), (scan_0, pipeline_4), (pipeline_4, pipeline_8), (scan_1, pipeline_5), (pipeline_5, pipeline_8), (map_0, pipeline_8), (scan_2, pipeline_6), (scan_2, pipeline_7), (scan_3, pipeline_6), (pipeline_6, pipeline_8), (pipeline_7, pipeline_8), ], )) pipeline = relational >> (KNeighborsClassifier | LogisticRegression) from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True) from lale.lib.lale import Hyperopt opt = Hyperopt(estimator=pipeline, max_evals=2) opt.fit(X, y)