Beispiel #1
0
    def test_higher_order_2(self):
        self.maxDiff = None
        from lale.json_operator import from_json
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import KNeighborsClassifier as KNN
        from lale.lib.sklearn import LogisticRegression as LR
        from lale.lib.sklearn import VotingClassifier as Vote

        operator = Vote(
            estimators=[("knn", KNN), ("pipeline", PCA() >> LR)], voting="soft"
        )
        json_expected = {
            "class": "lale.lib.sklearn.voting_classifier.VotingClassifierImpl",
            "state": "trainable",
            "operator": "VotingClassifier",
            "is_frozen_trainable": True,
            "label": "Vote",
            "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.voting_classifier.html",
            "hyperparams": {
                "estimators": [
                    ("knn", {"$ref": "../steps/knn"}),
                    ("pipeline", {"$ref": "../steps/pipeline"}),
                ],
                "voting": "soft",
            },
            "steps": {
                "knn": {
                    "class": "lale.lib.sklearn.k_neighbors_classifier.KNeighborsClassifierImpl",
                    "state": "planned",
                    "operator": "KNeighborsClassifier",
                    "label": "KNN",
                    "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.k_neighbors_classifier.html",
                },
                "pipeline": {
                    "class": "lale.operators.PlannedPipeline",
                    "state": "planned",
                    "edges": [["pca", "lr"]],
                    "steps": {
                        "pca": {
                            "class": "lale.lib.sklearn.pca.PCAImpl",
                            "state": "trainable",
                            "operator": "PCA",
                            "label": "PCA",
                            "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html",
                            "hyperparams": {},
                            "is_frozen_trainable": False,
                        },
                        "lr": {
                            "class": "lale.lib.sklearn.logistic_regression.LogisticRegressionImpl",
                            "state": "planned",
                            "operator": "LogisticRegression",
                            "label": "LR",
                            "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html",
                        },
                    },
                },
            },
        }
        json = operator.to_json()
        self.assertEqual(json, json_expected)
        operator_2 = from_json(json)
        json_2 = operator_2.to_json()
        self.assertEqual(json, json_2)
Beispiel #2
0
    def test_predict_log_proba_trainable(self):
        from lale.lib.sklearn import BaggingClassifier

        clf = BaggingClassifier(base_estimator=PCA() >> LogisticRegression())
        with self.assertRaises(ValueError):
            clf.predict_log_proba(self.X_test)
Beispiel #3
0
 def test_remove_last5(self):
     pipeline = (
         StandardScaler() >>
         (PCA() & Nystroem() & PassiveAggressiveClassifier()) >>
         ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier())
     pipeline.remove_last(inplace=True).freeze_trainable()
Beispiel #4
0
 def test_hyperparam_overriding_with_hyperopt(self):
     pca1 = PCA(n_components=3)
     pca2 = PCA()
     search_space1 = hyperopt_search_space(pca1)
     search_space2 = hyperopt_search_space(pca2)
     self.assertNotEqual(search_space1, search_space2)
Beispiel #5
0
    def test_feature_preprocessor(self):
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(fproc_name.split('.')[0:-1])
        class_name = fproc_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        fproc = class_()

        from lale.lib.sklearn.one_hot_encoder import OneHotEncoderImpl
        if isinstance(fproc._impl, OneHotEncoderImpl):
            #fproc = OneHotEncoder(handle_unknown = 'ignore')
            #remove the hack when this is fixed
            fproc = PCA()
        #test_schemas_are_schemas
        from lale.helpers import validate_is_schema
        validate_is_schema(fproc.input_schema_fit())
        validate_is_schema(fproc.input_schema_transform())
        validate_is_schema(fproc.output_schema())
        validate_is_schema(fproc.hyperparam_schema())

        #test_init_fit_transform
        trained = fproc.fit(self.X_train, self.y_train)
        predictions = trained.transform(self.X_test)

        #test_predict_on_trainable
        trained = fproc.fit(X_train, y_train)
        fproc.transform(X_train)

        #test_to_json
        fproc.to_json()

        #test_in_a_pipeline
        #This test assumes that the output of feature processing is compatible with LogisticRegression
        from lale.lib.sklearn import LogisticRegression
        pipeline = fproc >> LogisticRegression()
        trained = pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #Tune the pipeline with LR using HyperoptClassifier
        from lale.lib.lale import HyperoptClassifier
        hyperopt = HyperoptClassifier(model=pipeline, max_evals=1)
        trained = hyperopt.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)
    def test_with_lale_pipeline(self):
        from lale.lib.sklearn import BaggingClassifier

        clf = BaggingClassifier(base_estimator=PCA() >> LogisticRegression())
        trained = clf.fit(self.X_train, self.y_train)
        trained.predict(self.X_test)
Beispiel #7
0
 def test_PCA(self):
     op = PCA()
     op.fit(self.X, [])
Beispiel #8
0
    def test_pipeline_with_hyperopt(self):
        from lale.lib.lale import Hyperopt
        from lale.lib.sklearn import BaggingClassifier

        clf = BaggingClassifier(base_estimator=PCA() >> LogisticRegression())
        _ = clf.auto_configure(self.X_train, self.y_train, Hyperopt, max_evals=1)
Beispiel #9
0
 def test_export_to_sklearn_pipeline5(self):
     lale_pipeline = PCA() >> (XGBClassifier() | SGDClassifier())
     with self.assertRaises(ValueError):
         _ = lale_pipeline.export_to_sklearn_pipeline()
Beispiel #10
0
    def test_pipeline_1(self):
        self.maxDiff = None
        from lale.json_operator import from_json, to_json
        from lale.lib.lale import ConcatFeatures, NoOp
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import LogisticRegression as LR

        operator = (PCA & NoOp) >> ConcatFeatures >> LR
        json_expected = {
            "class":
            "lale.operators.PlannedPipeline",
            "state":
            "planned",
            "edges": [
                ["pca", "concat_features"],
                ["no_op", "concat_features"],
                ["concat_features", "lr"],
            ],
            "steps": {
                "pca": {
                    "class":
                    PCA.class_name(),
                    "state":
                    "planned",
                    "operator":
                    "PCA",
                    "label":
                    "PCA",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html",
                },
                "no_op": {
                    "class": NoOp.class_name(),
                    "state": "trained",
                    "operator": "NoOp",
                    "label": "NoOp",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html",
                    "hyperparams": None,
                    "coefs": None,
                    "is_frozen_trainable": True,
                    "is_frozen_trained": True,
                },
                "concat_features": {
                    "class": ConcatFeatures.class_name(),
                    "state": "trained",
                    "operator": "ConcatFeatures",
                    "label": "ConcatFeatures",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.concat_features.html",
                    "hyperparams": None,
                    "coefs": None,
                    "is_frozen_trainable": True,
                    "is_frozen_trained": True,
                },
                "lr": {
                    "class":
                    LR.class_name(),
                    "state":
                    "planned",
                    "operator":
                    "LogisticRegression",
                    "label":
                    "LR",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html",
                },
            },
        }
        json = to_json(operator)
        self.assertEqual(json, json_expected)
        operator_2 = from_json(json)
        json_2 = to_json(operator_2)
        self.assertEqual(json, json_2)
Beispiel #11
0
    def test_nested(self):
        self.maxDiff = None
        from lale.json_operator import from_json, to_json
        from lale.lib.lale import NoOp
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import LogisticRegression as LR

        operator = PCA >> (LR(C=0.09) | NoOp >> LR(C=0.19))
        json_expected = {
            "class": "lale.operators.PlannedPipeline",
            "state": "planned",
            "edges": [["pca", "choice"]],
            "steps": {
                "pca": {
                    "class":
                    PCA.class_name(),
                    "state":
                    "planned",
                    "operator":
                    "PCA",
                    "label":
                    "PCA",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html",
                },
                "choice": {
                    "class": "lale.operators.OperatorChoice",
                    "state": "planned",
                    "operator": "OperatorChoice",
                    "steps": {
                        "lr_0": {
                            "class": LR.class_name(),
                            "state": "trainable",
                            "operator": "LogisticRegression",
                            "label": "LR",
                            "documentation_url":
                            "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html",
                            "hyperparams": {
                                "C": 0.09
                            },
                            "is_frozen_trainable": False,
                        },
                        "pipeline_1": {
                            "class": "lale.operators.TrainablePipeline",
                            "state": "trainable",
                            "edges": [["no_op", "lr_1"]],
                            "steps": {
                                "no_op": {
                                    "class": NoOp.class_name(),
                                    "state": "trained",
                                    "operator": "NoOp",
                                    "label": "NoOp",
                                    "documentation_url":
                                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html",
                                    "hyperparams": None,
                                    "coefs": None,
                                    "is_frozen_trainable": True,
                                    "is_frozen_trained": True,
                                },
                                "lr_1": {
                                    "class": LR.class_name(),
                                    "state": "trainable",
                                    "operator": "LogisticRegression",
                                    "label": "LR",
                                    "documentation_url":
                                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html",
                                    "hyperparams": {
                                        "C": 0.19
                                    },
                                    "is_frozen_trainable": False,
                                },
                            },
                        },
                    },
                },
            },
        }
        json = to_json(operator)
        self.assertEqual(json, json_expected)
        operator_2 = from_json(json)
        json_2 = to_json(operator_2)
        self.assertEqual(json, json_2)
Beispiel #12
0
 def test_two_estimators_predict_proba1(self):
     pipeline = StandardScaler() >> (
         PCA() & Nystroem() & PassiveAggressiveClassifier()
     ) >> ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier()
     pipeline.fit(self.X_train, self.y_train)
     pipeline.predict_proba(self.X_test)
Beispiel #13
0
 def test_pipeline_create(self):
     from lale.operators import Pipeline
     pipeline = Pipeline(([('pca1', PCA()), ('lr1', LogisticRegression())]))
     trained = pipeline.fit(self.X_train, self.y_train)
     predictions = trained.predict(self.X_test)
     accuracy_score(self.y_test, predictions)
Beispiel #14
0
            logger.warning(
                "ValueError in predicting using classifier:{}, the error is:{}"
                .format(reg, e))
            predictions = None

        return predictions

    def get_trials(self):
        return self.trials


if __name__ == '__main__':
    from lale.lib.lale import ConcatFeatures
    from lale.lib.sklearn import Nystroem, PCA, RandomForestRegressor
    from sklearn.metrics import r2_score
    pca = PCA(n_components=3)
    nys = Nystroem(n_components=3)
    concat = ConcatFeatures()
    rf = RandomForestRegressor()

    trainable = (pca & nys) >> concat >> rf
    #trainable = nys >>rf
    import sklearn.datasets
    from lale.helpers import cross_val_score
    diabetes = sklearn.datasets.load_diabetes()
    X, y = sklearn.utils.shuffle(diabetes.data,
                                 diabetes.target,
                                 random_state=42)

    hp_n = HyperoptRegressor(model=trainable, max_evals=20)
Beispiel #15
0
    def create_pipeline(self):
        from sklearn.decomposition import PCA
        from sklearn.pipeline import make_pipeline

        pipeline = make_pipeline(PCA(), LogisticRegression())
        return pipeline
Beispiel #16
0
 def test_export_to_sklearn_pipeline_with_noop_2(self):
     lale_pipeline = PCA(n_components=3) >> NoOp() >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Beispiel #17
0
    def test_resampler(self):
        from lale.lib.lale import ConcatFeatures, NoOp
        from lale.lib.sklearn import (
            PCA,
            LogisticRegression,
            Nystroem,
            RandomForestClassifier,
        )

        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib

        module_name = ".".join(res_name.split(".")[0:-1])
        class_name = res_name.split(".")[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        with self.assertRaises(ValidationError):
            res = class_()

        # test_schemas_are_schemas
        lale.type_checking.validate_is_schema(class_.input_schema_fit())
        lale.type_checking.validate_is_schema(class_.input_schema_predict())
        lale.type_checking.validate_is_schema(class_.output_schema_predict())
        lale.type_checking.validate_is_schema(class_.hyperparam_schema())

        # test_init_fit_predict
        from lale.operators import make_pipeline

        pipeline1 = PCA() >> class_(operator=make_pipeline(LogisticRegression()))
        trained = pipeline1.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        pipeline2 = class_(operator=make_pipeline(PCA(), LogisticRegression()))
        trained = pipeline2.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        # test_with_hyperopt
        from lale.lib.lale import Hyperopt

        optimizer = Hyperopt(
            estimator=PCA >> class_(operator=make_pipeline(LogisticRegression())),
            max_evals=1,
            show_progressbar=False,
        )
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline3 = class_(
            operator=PCA()
            >> (Nystroem & NoOp)
            >> ConcatFeatures
            >> LogisticRegression()
        )
        optimizer = Hyperopt(estimator=pipeline3, max_evals=1, show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline4 = (
            (
                PCA >> class_(operator=make_pipeline(Nystroem()))
                & class_(operator=make_pipeline(Nystroem()))
            )
            >> ConcatFeatures
            >> LogisticRegression()
        )
        optimizer = Hyperopt(
            estimator=pipeline4, max_evals=1, scoring="roc_auc", show_progressbar=False
        )
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        # test_cross_validation
        from lale.helpers import cross_val_score

        cv_results = cross_val_score(pipeline1, X_train, y_train, cv=2)
        self.assertEqual(len(cv_results), 2)

        # test_to_json
        pipeline1.to_json()
Beispiel #18
0
 def test_export_to_sklearn_pipeline_with_noop_3(self):
     # This test is probably unnecessary, but doesn't harm at this point
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier() >> NoOp()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     _ = trained_lale_pipeline.export_to_sklearn_pipeline()
Beispiel #19
0
    def test_feature_preprocessor(self):
        X_train, y_train = self.X_train, self.y_train
        import importlib

        module_name = ".".join(fproc_name.split(".")[0:-1])
        class_name = fproc_name.split(".")[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        fproc = class_()

        from lale.lib.sklearn.one_hot_encoder import OneHotEncoderImpl

        if fproc._impl_class() == OneHotEncoderImpl:
            # fproc = OneHotEncoder(handle_unknown = 'ignore')
            # remove the hack when this is fixed
            fproc = PCA()
        # test_schemas_are_schemas
        lale.type_checking.validate_is_schema(fproc.input_schema_fit())
        lale.type_checking.validate_is_schema(fproc.input_schema_transform())
        lale.type_checking.validate_is_schema(fproc.output_schema_transform())
        lale.type_checking.validate_is_schema(fproc.hyperparam_schema())

        # test_init_fit_transform
        trained = fproc.fit(self.X_train, self.y_train)
        _ = trained.transform(self.X_test)

        # test_predict_on_trainable
        trained = fproc.fit(X_train, y_train)
        fproc.transform(X_train)

        # test_to_json
        fproc.to_json()

        # test_in_a_pipeline
        # This test assumes that the output of feature processing is compatible with LogisticRegression
        from lale.lib.sklearn import LogisticRegression

        pipeline = fproc >> LogisticRegression()
        trained = pipeline.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)

        # Tune the pipeline with LR using Hyperopt
        from lale.lib.lale import Hyperopt

        hyperopt = Hyperopt(estimator=pipeline, max_evals=1, verbose=True, cv=3)
        trained = hyperopt.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)
Beispiel #20
0
 def test_two_estimators_predict_proba(self):
     pipeline = (StandardScaler() >>
                 (PCA() & Nystroem() & LogisticRegression()) >>
                 ConcatFeatures() >> NoOp() >> LogisticRegression())
     trained = pipeline.fit(self.X_train, self.y_train)
     trained.predict_proba(self.X_test)
Beispiel #21
0
    def test_pipeline_parameters(self):
        pgo = PGO.load_pgo_file(example_pgo_fp)

        trainable = PCA() >> LogisticRegression() 
        parameters = get_grid_search_parameter_grids(trainable,num_samples=2, pgo=pgo)
Beispiel #22
0
 def test_two_estimators_predict_proba1(self):
     pipeline = (StandardScaler() >> (PCA() & Nystroem() & GaussianNB()) >>
                 ConcatFeatures() >> NoOp() >> GaussianNB())
     pipeline.fit(self.X_train, self.y_train)
     pipeline.predict_proba(self.X_test)
Beispiel #23
0
from sklearn.datasets import load_iris
from sklearn.metrics import mean_squared_error
data = load_iris()
X, y = data.data, data.target
y=X[:, 3]
X=X[:, 0:3]
X_train, X_test, y_train, y_test = train_test_split(X, y)


# load data
(train_X, train_y), (test_X, test_y) = dt.california_housing_df()
pd.concat([train_X.head(), train_y.head()], axis=1)
lale.wrap_imported_operators()

# pipeline 1
pca_tree_planned = Pipeline(steps=[("tfm", PCA()), ("estim", Tree())])
pca_tree_planned.fit(train_X, train_y)
predicted = pca_tree_planned.predict(test_X)
print(f'R2 score {sklearn.metrics.r2_score(test_y, predicted):.2f}')

# pipeline 2
pca_tree_planned = PCA() >> Tree()
pca_tree_trained = pca_tree_planned.auto_configure(
    train_X, train_y, optimizer=Hyperopt, cv=3, max_evals=10, verbose=True)
predicted = pca_tree_trained.predict(test_X)
print(f'R2 score {sklearn.metrics.r2_score(test_y, predicted):.2f}')

# iris data
pca_tree_planned = PCA >> RF
pca_tree_trained = pca_tree_planned.auto_configure(
    X, y, optimizer=Hyperopt, cv=3, max_evals=10, verbose=True)
Beispiel #24
0
 def test_duplicate_instances(self):
     tfm = PCA()
     clf = LogisticRegression(LogisticRegression.solver.lbfgs,
                              LogisticRegression.multi_class.auto)
     with self.assertRaises(ValueError):
         _ = lale.operators.make_pipeline(tfm, tfm, clf)
Beispiel #25
0
        'post': []},
    'properties': {
        'hyperparams': _hyperparams_schema,
        'input_fit': _input_fit_schema,
        'input_predict': _input_predict_schema,
        'output_predict': _output_predict_schema}}

lale.docstrings.set_docstrings(HyperoptImpl, _combined_schemas)

Hyperopt = lale.operators.make_operator(HyperoptImpl, _combined_schemas)

if __name__ == '__main__':
    from lale.lib.lale import ConcatFeatures
    from lale.lib.sklearn import Nystroem
    from lale.lib.sklearn import PCA
    pca = PCA(n_components=10)
    nys = Nystroem(n_components=10)
    concat = ConcatFeatures()
    lr = LogisticRegression(random_state=42, C=0.1)

    trainable = (pca & nys) >> concat >> lr

    import sklearn.datasets
    from lale.helpers import cross_val_score
    digits = sklearn.datasets.load_iris()
    X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42)

    hp_n = Hyperopt(estimator=trainable, max_evals=2)

    hp_n_trained = hp_n.fit(X, y)
    predictions = hp_n_trained.predict(X)
Beispiel #26
0
    def test_import_from_sklearn_pipeline_no_wrapper(self):
        from sklearn.neighbors import LocalOutlierFactor
        from sklearn.pipeline import make_pipeline

        sklearn_pipeline = make_pipeline(PCA(), LocalOutlierFactor())
        _ = import_from_sklearn_pipeline(sklearn_pipeline, fitted=False)