def test_08_refine_model_with_lale(self):
        from lale import wrap_imported_operators
        from lale.lib.lale import Hyperopt

        wrap_imported_operators()
        try:
            println_pos(
                f"type(prefix_model) {type(TestAutoAIOutputConsumption.prefix_model)}"
            )
            println_pos(f"type(LR) {type(LR)}")
            # This is for classifiers, regressors needs to have different operators & different scoring metrics (e.g 'r2')
            new_model = TestAutoAIOutputConsumption.prefix_model >> (LR | Tree
                                                                     | KNN)
            train_X = TestAutoAIOutputConsumption.training_df.drop(
                ["Risk"], axis=1).values
            train_y = TestAutoAIOutputConsumption.training_df["Risk"].values
            hyperopt = Hyperopt(estimator=new_model,
                                cv=2,
                                max_evals=3,
                                scoring="roc_auc")
            hyperopt_pipelines = hyperopt.fit(train_X, train_y)
            TestAutoAIOutputConsumption.refined_model = (
                hyperopt_pipelines.get_pipeline())
        except Exception as e:
            assert False, f"Exception was thrown during model refinery: {e}"
Exemple #2
0
 def test_wrap_imported_operators(self):
     from lale.lib.sklearn import PCA
     from lale.lib.xgboost import XGBClassifier
     from lale.lib.lightgbm import LGBMClassifier
     lale.wrap_imported_operators()
     self.assertEqual(foo._schemas, PCA._schemas)
     self.assertEqual(bar._schemas, XGBClassifier._schemas)
     self.assertEqual(baz._schemas, LGBMClassifier._schemas)
Exemple #3
0
    def test_wrapped_from_import(self):
        old_globals = {**globals()}
        try:
            from lale.operators import PlannedIndividualOp

            self.assertFalse(isinstance(UnknownOp, PlannedIndividualOp))
            lale.wrap_imported_operators()
            self.assertFalse(isinstance(UnknownOp, PlannedIndividualOp))
        finally:
            for sym, obj in old_globals.items():
                globals()[sym] = obj
Exemple #4
0
    def test_manual_grid(self):
        from lale.lib.sklearn import SVC
        from sklearn.datasets import load_iris
        from lale.lib.lale import GridSearchCV
        warnings.simplefilter("ignore")

        from lale import wrap_imported_operators
        wrap_imported_operators()
        iris = load_iris()
        parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 10]}
        svc = SVC()
        clf = GridSearchCV(estimator=svc, param_grid=parameters)
        clf.fit(iris.data, iris.target)
        clf.predict(iris.data)
Exemple #5
0
 def test_wrapped_from_import(self):
     old_globals = {**globals()}
     try:
         from lale.operators import make_operator, PlannedIndividualOp
         self.assertFalse(isinstance(UnknownOp, PlannedIndividualOp))
         lale.wrap_imported_operators()
         self.assertTrue(isinstance(UnknownOp, PlannedIndividualOp))
         self.assertEqual(UnknownOp.hyperparam_schema(),
                          self.expected_schema)
         instance = UnknownOp(n_neighbors=3)
         self.assertEqual(instance.hyperparams(), {'n_neighbors': 3})
     finally:
         for sym, obj in old_globals.items():
             globals()[sym] = obj
 def test_wrap_imported_operators(self):
     old_globals = {**globals()}
     try:
         from lale.lib.sklearn import PCA
         from lale.lib.xgboost import XGBClassifier
         from lale.lib.lightgbm import LGBMClassifier
         from lale.lib.autogen import Lars
         lale.wrap_imported_operators()
         self.assertEqual(foo._schemas, PCA._schemas)
         self.assertEqual(bar._schemas, XGBClassifier._schemas)
         self.assertEqual(baz._schemas, LGBMClassifier._schemas)
         self.assertEqual(foobar._schemas, Lars._schemas)
     finally:
         for sym, obj in old_globals.items():
             globals()[sym] = obj
Exemple #7
0
    def test_manual_grid(self):
        from sklearn.datasets import load_iris

        from lale.lib.lale import HalvingGridSearchCV
        from lale.lib.sklearn import SVC

        warnings.simplefilter("ignore")

        from lale import wrap_imported_operators

        wrap_imported_operators()
        iris = load_iris()
        parameters = {"kernel": ("linear", "rbf"), "C": [1, 10]}
        svc = SVC()
        clf = HalvingGridSearchCV(estimator=svc, param_grid=parameters)
        clf.fit(iris.data, iris.target)
        clf.predict(iris.data)
Exemple #8
0
    def test_wrap_imported_operators(self):
        old_globals = {**globals()}
        try:
            from lale.lib.autogen import Lars
            from lale.lib.lightgbm import LGBMClassifier
            from lale.lib.xgboost import XGBClassifier

            lale.wrap_imported_operators(exclude_classes=["foo"])
            from sklearn.decomposition import PCA as sklearn_pca

            op_obj = foo()
            self.assertIsInstance(op_obj, sklearn_pca)
            self.assertEqual(bar._schemas, XGBClassifier._schemas)  # type: ignore
            self.assertEqual(baz._schemas, LGBMClassifier._schemas)  # type: ignore
            self.assertEqual(foobar._schemas, Lars._schemas)
        finally:
            for sym, obj in old_globals.items():
                globals()[sym] = obj
Exemple #9
0

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import mean_squared_error
data = load_iris()
X, y = data.data, data.target
y=X[:, 3]
X=X[:, 0:3]
X_train, X_test, y_train, y_test = train_test_split(X, y)


# load data
(train_X, train_y), (test_X, test_y) = dt.california_housing_df()
pd.concat([train_X.head(), train_y.head()], axis=1)
lale.wrap_imported_operators()

# pipeline 1
pca_tree_planned = Pipeline(steps=[("tfm", PCA()), ("estim", Tree())])
pca_tree_planned.fit(train_X, train_y)
predicted = pca_tree_planned.predict(test_X)
print(f'R2 score {sklearn.metrics.r2_score(test_y, predicted):.2f}')

# pipeline 2
pca_tree_planned = PCA() >> Tree()
pca_tree_trained = pca_tree_planned.auto_configure(
    train_X, train_y, optimizer=Hyperopt, cv=3, max_evals=10, verbose=True)
predicted = pca_tree_trained.predict(test_X)
print(f'R2 score {sklearn.metrics.r2_score(test_y, predicted):.2f}')

# iris data
Exemple #10
0
    def test_with_hyperopt2(self):
        from lale.expressions import (
            count,
            it,
            max,
            mean,
            min,
            string_indexer,
            sum,
            variance,
        )

        wrap_imported_operators()
        scan = Scan(table=it["main"])
        scan_0 = Scan(table=it["customers"])
        join = Join(pred=[(it["main"]["group_customer_id"] == it["customers"]
                           ["group_customer_id"])])
        map = Map(
            columns={
                "[main](group_customer_id)[customers]|number_children|identity":
                it["number_children"],
                "[main](group_customer_id)[customers]|name|identity":
                it["name"],
                "[main](group_customer_id)[customers]|income|identity":
                it["income"],
                "[main](group_customer_id)[customers]|address|identity":
                it["address"],
                "[main](group_customer_id)[customers]|age|identity":
                it["age"],
            },
            remainder="drop",
        )
        pipeline_4 = join >> map
        scan_1 = Scan(table=it["purchase"])
        join_0 = Join(
            pred=[(it["main"]["group_id"] == it["purchase"]["group_id"])],
            join_limit=50.0,
        )
        aggregate = Aggregate(
            columns={
                "[main](group_id)[purchase]|price|variance":
                variance(it["price"]),
                "[main](group_id)[purchase]|time|sum": sum(it["time"]),
                "[main](group_id)[purchase]|time|mean": mean(it["time"]),
                "[main](group_id)[purchase]|time|min": min(it["time"]),
                "[main](group_id)[purchase]|price|sum": sum(it["price"]),
                "[main](group_id)[purchase]|price|count": count(it["price"]),
                "[main](group_id)[purchase]|price|mean": mean(it["price"]),
                "[main](group_id)[purchase]|price|min": min(it["price"]),
                "[main](group_id)[purchase]|price|max": max(it["price"]),
                "[main](group_id)[purchase]|time|max": max(it["time"]),
                "[main](group_id)[purchase]|time|variance":
                variance(it["time"]),
            },
            group_by=it["row_id"],
        )
        pipeline_5 = join_0 >> aggregate
        map_0 = Map(
            columns={
                "[main]|group_customer_id|identity": it["group_customer_id"],
                "[main]|transaction_id|identity": it["transaction_id"],
                "[main]|group_id|identity": it["group_id"],
                "[main]|comments|identity": it["comments"],
                "[main]|id|identity": it["id"],
                "prefix_0_id": it["prefix_0_id"],
                "next_purchase": it["next_purchase"],
                "[main]|time|identity": it["time"],
            },
            remainder="drop",
        )
        scan_2 = Scan(table=it["transactions"])
        scan_3 = Scan(table=it["products"])
        join_1 = Join(pred=[
            (it["main"]["transaction_id"] == it["transactions"]
             ["transaction_id"]),
            (it["transactions"]["product_id"] == it["products"]["product_id"]),
        ])
        map_1 = Map(
            columns={
                "[main](transaction_id)[transactions](product_id)[products]|price|identity":
                it["price"],
                "[main](transaction_id)[transactions](product_id)[products]|type|identity":
                it["type"],
            },
            remainder="drop",
        )
        pipeline_6 = join_1 >> map_1
        join_2 = Join(pred=[(it["main"]["transaction_id"] == it["transactions"]
                             ["transaction_id"])])
        map_2 = Map(
            columns={
                "[main](transaction_id)[transactions]|description|identity":
                it["description"],
                "[main](transaction_id)[transactions]|product_id|identity":
                it["product_id"],
            },
            remainder="drop",
        )
        pipeline_7 = join_2 >> map_2
        map_3 = Map(columns=[
            string_indexer(it["[main]|comments|identity"]),
            string_indexer(
                it["[main](transaction_id)[transactions]|description|identity"]
            ),
            string_indexer(it[
                "[main](transaction_id)[transactions](product_id)[products]|type|identity"]
                           ),
            string_indexer(
                it["[main](group_customer_id)[customers]|name|identity"]),
            string_indexer(
                it["[main](group_customer_id)[customers]|address|identity"]),
        ])
        pipeline_8 = ConcatFeatures() >> map_3
        relational = Relational(operator=make_pipeline_graph(
            steps=[
                scan,
                scan_0,
                pipeline_4,
                scan_1,
                pipeline_5,
                map_0,
                scan_2,
                scan_3,
                pipeline_6,
                pipeline_7,
                pipeline_8,
            ],
            edges=[
                (scan, pipeline_4),
                (scan, pipeline_5),
                (scan, map_0),
                (scan, pipeline_6),
                (scan, pipeline_7),
                (scan_0, pipeline_4),
                (pipeline_4, pipeline_8),
                (scan_1, pipeline_5),
                (pipeline_5, pipeline_8),
                (map_0, pipeline_8),
                (scan_2, pipeline_6),
                (scan_2, pipeline_7),
                (scan_3, pipeline_6),
                (pipeline_6, pipeline_8),
                (pipeline_7, pipeline_8),
            ],
        ))
        pipeline = relational >> (KNeighborsClassifier | LogisticRegression)
        from sklearn.datasets import load_iris

        X, y = load_iris(return_X_y=True)
        from lale.lib.lale import Hyperopt

        opt = Hyperopt(estimator=pipeline, max_evals=2)
        opt.fit(X, y)