def test_08_refine_model_with_lale(self): from lale import wrap_imported_operators from lale.lib.lale import Hyperopt wrap_imported_operators() try: println_pos( f"type(prefix_model) {type(TestAutoAIOutputConsumption.prefix_model)}" ) println_pos(f"type(LR) {type(LR)}") # This is for classifiers, regressors needs to have different operators & different scoring metrics (e.g 'r2') new_model = TestAutoAIOutputConsumption.prefix_model >> (LR | Tree | KNN) train_X = TestAutoAIOutputConsumption.training_df.drop( ["Risk"], axis=1).values train_y = TestAutoAIOutputConsumption.training_df["Risk"].values hyperopt = Hyperopt(estimator=new_model, cv=2, max_evals=3, scoring="roc_auc") hyperopt_pipelines = hyperopt.fit(train_X, train_y) TestAutoAIOutputConsumption.refined_model = ( hyperopt_pipelines.get_pipeline()) except Exception as e: assert False, f"Exception was thrown during model refinery: {e}"
def test_wrap_imported_operators(self): from lale.lib.sklearn import PCA from lale.lib.xgboost import XGBClassifier from lale.lib.lightgbm import LGBMClassifier lale.wrap_imported_operators() self.assertEqual(foo._schemas, PCA._schemas) self.assertEqual(bar._schemas, XGBClassifier._schemas) self.assertEqual(baz._schemas, LGBMClassifier._schemas)
def test_wrapped_from_import(self): old_globals = {**globals()} try: from lale.operators import PlannedIndividualOp self.assertFalse(isinstance(UnknownOp, PlannedIndividualOp)) lale.wrap_imported_operators() self.assertFalse(isinstance(UnknownOp, PlannedIndividualOp)) finally: for sym, obj in old_globals.items(): globals()[sym] = obj
def test_manual_grid(self): from lale.lib.sklearn import SVC from sklearn.datasets import load_iris from lale.lib.lale import GridSearchCV warnings.simplefilter("ignore") from lale import wrap_imported_operators wrap_imported_operators() iris = load_iris() parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 10]} svc = SVC() clf = GridSearchCV(estimator=svc, param_grid=parameters) clf.fit(iris.data, iris.target) clf.predict(iris.data)
def test_wrapped_from_import(self): old_globals = {**globals()} try: from lale.operators import make_operator, PlannedIndividualOp self.assertFalse(isinstance(UnknownOp, PlannedIndividualOp)) lale.wrap_imported_operators() self.assertTrue(isinstance(UnknownOp, PlannedIndividualOp)) self.assertEqual(UnknownOp.hyperparam_schema(), self.expected_schema) instance = UnknownOp(n_neighbors=3) self.assertEqual(instance.hyperparams(), {'n_neighbors': 3}) finally: for sym, obj in old_globals.items(): globals()[sym] = obj
def test_wrap_imported_operators(self): old_globals = {**globals()} try: from lale.lib.sklearn import PCA from lale.lib.xgboost import XGBClassifier from lale.lib.lightgbm import LGBMClassifier from lale.lib.autogen import Lars lale.wrap_imported_operators() self.assertEqual(foo._schemas, PCA._schemas) self.assertEqual(bar._schemas, XGBClassifier._schemas) self.assertEqual(baz._schemas, LGBMClassifier._schemas) self.assertEqual(foobar._schemas, Lars._schemas) finally: for sym, obj in old_globals.items(): globals()[sym] = obj
def test_manual_grid(self): from sklearn.datasets import load_iris from lale.lib.lale import HalvingGridSearchCV from lale.lib.sklearn import SVC warnings.simplefilter("ignore") from lale import wrap_imported_operators wrap_imported_operators() iris = load_iris() parameters = {"kernel": ("linear", "rbf"), "C": [1, 10]} svc = SVC() clf = HalvingGridSearchCV(estimator=svc, param_grid=parameters) clf.fit(iris.data, iris.target) clf.predict(iris.data)
def test_wrap_imported_operators(self): old_globals = {**globals()} try: from lale.lib.autogen import Lars from lale.lib.lightgbm import LGBMClassifier from lale.lib.xgboost import XGBClassifier lale.wrap_imported_operators(exclude_classes=["foo"]) from sklearn.decomposition import PCA as sklearn_pca op_obj = foo() self.assertIsInstance(op_obj, sklearn_pca) self.assertEqual(bar._schemas, XGBClassifier._schemas) # type: ignore self.assertEqual(baz._schemas, LGBMClassifier._schemas) # type: ignore self.assertEqual(foobar._schemas, Lars._schemas) finally: for sym, obj in old_globals.items(): globals()[sym] = obj
from sklearn.model_selection import train_test_split from sklearn.datasets import load_iris from sklearn.metrics import mean_squared_error data = load_iris() X, y = data.data, data.target y=X[:, 3] X=X[:, 0:3] X_train, X_test, y_train, y_test = train_test_split(X, y) # load data (train_X, train_y), (test_X, test_y) = dt.california_housing_df() pd.concat([train_X.head(), train_y.head()], axis=1) lale.wrap_imported_operators() # pipeline 1 pca_tree_planned = Pipeline(steps=[("tfm", PCA()), ("estim", Tree())]) pca_tree_planned.fit(train_X, train_y) predicted = pca_tree_planned.predict(test_X) print(f'R2 score {sklearn.metrics.r2_score(test_y, predicted):.2f}') # pipeline 2 pca_tree_planned = PCA() >> Tree() pca_tree_trained = pca_tree_planned.auto_configure( train_X, train_y, optimizer=Hyperopt, cv=3, max_evals=10, verbose=True) predicted = pca_tree_trained.predict(test_X) print(f'R2 score {sklearn.metrics.r2_score(test_y, predicted):.2f}') # iris data
def test_with_hyperopt2(self): from lale.expressions import ( count, it, max, mean, min, string_indexer, sum, variance, ) wrap_imported_operators() scan = Scan(table=it["main"]) scan_0 = Scan(table=it["customers"]) join = Join(pred=[(it["main"]["group_customer_id"] == it["customers"] ["group_customer_id"])]) map = Map( columns={ "[main](group_customer_id)[customers]|number_children|identity": it["number_children"], "[main](group_customer_id)[customers]|name|identity": it["name"], "[main](group_customer_id)[customers]|income|identity": it["income"], "[main](group_customer_id)[customers]|address|identity": it["address"], "[main](group_customer_id)[customers]|age|identity": it["age"], }, remainder="drop", ) pipeline_4 = join >> map scan_1 = Scan(table=it["purchase"]) join_0 = Join( pred=[(it["main"]["group_id"] == it["purchase"]["group_id"])], join_limit=50.0, ) aggregate = Aggregate( columns={ "[main](group_id)[purchase]|price|variance": variance(it["price"]), "[main](group_id)[purchase]|time|sum": sum(it["time"]), "[main](group_id)[purchase]|time|mean": mean(it["time"]), "[main](group_id)[purchase]|time|min": min(it["time"]), "[main](group_id)[purchase]|price|sum": sum(it["price"]), "[main](group_id)[purchase]|price|count": count(it["price"]), "[main](group_id)[purchase]|price|mean": mean(it["price"]), "[main](group_id)[purchase]|price|min": min(it["price"]), "[main](group_id)[purchase]|price|max": max(it["price"]), "[main](group_id)[purchase]|time|max": max(it["time"]), "[main](group_id)[purchase]|time|variance": variance(it["time"]), }, group_by=it["row_id"], ) pipeline_5 = join_0 >> aggregate map_0 = Map( columns={ "[main]|group_customer_id|identity": it["group_customer_id"], "[main]|transaction_id|identity": it["transaction_id"], "[main]|group_id|identity": it["group_id"], "[main]|comments|identity": it["comments"], "[main]|id|identity": it["id"], "prefix_0_id": it["prefix_0_id"], "next_purchase": it["next_purchase"], "[main]|time|identity": it["time"], }, remainder="drop", ) scan_2 = Scan(table=it["transactions"]) scan_3 = Scan(table=it["products"]) join_1 = Join(pred=[ (it["main"]["transaction_id"] == it["transactions"] ["transaction_id"]), (it["transactions"]["product_id"] == it["products"]["product_id"]), ]) map_1 = Map( columns={ "[main](transaction_id)[transactions](product_id)[products]|price|identity": it["price"], "[main](transaction_id)[transactions](product_id)[products]|type|identity": it["type"], }, remainder="drop", ) pipeline_6 = join_1 >> map_1 join_2 = Join(pred=[(it["main"]["transaction_id"] == it["transactions"] ["transaction_id"])]) map_2 = Map( columns={ "[main](transaction_id)[transactions]|description|identity": it["description"], "[main](transaction_id)[transactions]|product_id|identity": it["product_id"], }, remainder="drop", ) pipeline_7 = join_2 >> map_2 map_3 = Map(columns=[ string_indexer(it["[main]|comments|identity"]), string_indexer( it["[main](transaction_id)[transactions]|description|identity"] ), string_indexer(it[ "[main](transaction_id)[transactions](product_id)[products]|type|identity"] ), string_indexer( it["[main](group_customer_id)[customers]|name|identity"]), string_indexer( it["[main](group_customer_id)[customers]|address|identity"]), ]) pipeline_8 = ConcatFeatures() >> map_3 relational = Relational(operator=make_pipeline_graph( steps=[ scan, scan_0, pipeline_4, scan_1, pipeline_5, map_0, scan_2, scan_3, pipeline_6, pipeline_7, pipeline_8, ], edges=[ (scan, pipeline_4), (scan, pipeline_5), (scan, map_0), (scan, pipeline_6), (scan, pipeline_7), (scan_0, pipeline_4), (pipeline_4, pipeline_8), (scan_1, pipeline_5), (pipeline_5, pipeline_8), (map_0, pipeline_8), (scan_2, pipeline_6), (scan_2, pipeline_7), (scan_3, pipeline_6), (pipeline_6, pipeline_8), (pipeline_7, pipeline_8), ], )) pipeline = relational >> (KNeighborsClassifier | LogisticRegression) from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True) from lale.lib.lale import Hyperopt opt = Hyperopt(estimator=pipeline, max_evals=2) opt.fit(X, y)