def dont_test_car_hyperopt(self):

        from lale.datasets.auto_weka import fetch_car
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.preprocessing import LabelEncoder
        import pandas as pd
        from lale.lib.weka import J48
        from lalegpl.lib.r import ArulesCBAClassifier 
        from lale.operators import make_pipeline
        from lale.lib.lale import HyperoptClassifier
        from lale.lib.sklearn import LogisticRegression, KNeighborsClassifier

        (X_train, y_train), (X_test, y_test) = fetch_car()
        y_name = y_train.name
        le = LabelEncoder()
        y_train = le.fit_transform(y_train)
        y_test = le.transform(y_test)

        y_train = pd.Series(y_train, name=y_name)
        y_test = pd.Series(y_test, name=y_name)

        planned_pipeline = make_pipeline(ArulesCBAClassifier() | LogisticRegression() | KNeighborsClassifier())

        clf = HyperoptClassifier(model = planned_pipeline, max_evals = 1)
        best_pipeline = clf.fit(X_train, y_train)
        print(accuracy_score(y_test, best_pipeline.predict(X_test)))
Beispiel #2
0
def run_hyperopt_on_planned_pipeline(planned_pipeline, max_iters=1):
    # data
    from sklearn.datasets import load_iris
    features, labels = load_iris(return_X_y=True)
    # set up optimizer
    from lale.lib.lale.hyperopt_classifier import HyperoptClassifier
    opt = HyperoptClassifier(estimator=planned_pipeline, max_evals=max_iters)
    # run optimizer
    res = opt.fit(features, labels)
Beispiel #3
0
    def test_lr_run(self):
        pgo = PGO.load_pgo_file(example_pgo_fp)

        from lale.lib.lale import HyperoptClassifier
        from sklearn.datasets import load_iris

        lr = LogisticRegression()
        clf = HyperoptClassifier(model=lr, max_evals=5, pgo=pgo)
        iris = load_iris()
        clf.fit(iris.data, iris.target)
Beispiel #4
0
 def test_using_scoring(self):
     from sklearn.metrics import hinge_loss, make_scorer, f1_score, accuracy_score
     lr = LogisticRegression()
     clf = HyperoptClassifier(estimator=lr,
                              scoring='accuracy',
                              cv=5,
                              max_evals=2)
     trained = clf.fit(self.X_train, self.y_train)
     predictions = trained.predict(self.X_test)
     predictions_1 = clf.predict(self.X_test)
     assert np.array_equal(predictions_1, predictions)
Beispiel #5
0
 def test_custom_scoring(self):
     from sklearn.metrics import f1_score, make_scorer
     lr = LogisticRegression()
     clf = HyperoptClassifier(estimator=lr,
                              scoring=make_scorer(f1_score,
                                                  average='macro'),
                              cv=5,
                              max_evals=2)
     trained = clf.fit(self.X_train, self.y_train)
     predictions = trained.predict(self.X_test)
     predictions_1 = clf.predict(self.X_test)
     assert np.array_equal(predictions_1, predictions)
Beispiel #6
0
    def test_runtime_limit_zero_time_hoc(self):
        planned_pipeline = (MinMaxScaler | Normalizer) >> (
            LogisticRegression | KNeighborsClassifier)
        from sklearn.datasets import load_iris
        X, y = load_iris(return_X_y=True)

        hoc = HyperoptClassifier(estimator=planned_pipeline,
                                 max_evals=100,
                                 cv=3,
                                 scoring='accuracy',
                                 max_opt_time=0.0)
        hoc_fitted = hoc.fit(X, y)
        from lale.helpers import best_estimator
        assert best_estimator(hoc_fitted) is None
    def test_J48_for_car_dataset(self):
        from lalegpl.datasets.auto_weka import fetch_car
        (X_train, y_train), (X_test, y_test) = fetch_car()
        from sklearn.preprocessing import LabelEncoder
        le = LabelEncoder()
        y_train = le.fit_transform(y_train)
        y_test = le.transform(y_test)

        clf = J48()
        from sklearn.metrics import accuracy_score
        from lale.lib.lale import NoOp, HyperoptClassifier
        from lale.operators import make_pipeline
        clf = HyperoptClassifier(make_pipeline(J48()), max_evals=1)
        trained_clf = clf.fit(X_train, y_train)
        print(accuracy_score(y_test, trained_clf.predict(X_test)))
Beispiel #8
0
    def test_feature_preprocessor(self):
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(fproc_name.split('.')[0:-1])
        class_name = fproc_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        fproc = class_()

        from lale.lib.sklearn.one_hot_encoder import OneHotEncoderImpl
        if isinstance(fproc._impl, OneHotEncoderImpl):
            #fproc = OneHotEncoder(handle_unknown = 'ignore')
            #remove the hack when this is fixed
            fproc = PCA()
        #test_schemas_are_schemas
        from lale.helpers import validate_is_schema
        validate_is_schema(fproc.input_schema_fit())
        validate_is_schema(fproc.input_schema_transform())
        validate_is_schema(fproc.output_schema())
        validate_is_schema(fproc.hyperparam_schema())

        #test_init_fit_transform
        trained = fproc.fit(self.X_train, self.y_train)
        predictions = trained.transform(self.X_test)

        #test_predict_on_trainable
        trained = fproc.fit(X_train, y_train)
        fproc.transform(X_train)

        #test_to_json
        fproc.to_json()

        #test_in_a_pipeline
        #This test assumes that the output of feature processing is compatible with LogisticRegression
        from lale.lib.sklearn import LogisticRegression
        pipeline = fproc >> LogisticRegression()
        trained = pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #Tune the pipeline with LR using HyperoptClassifier
        from lale.lib.lale import HyperoptClassifier
        hyperopt = HyperoptClassifier(model=pipeline, max_evals=1)
        trained = hyperopt.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)
Beispiel #9
0
    def test_runtime_limit_hoc(self):
        import time
        planned_pipeline = (MinMaxScaler | Normalizer) >> (
            LogisticRegression | KNeighborsClassifier)
        from sklearn.datasets import load_iris
        X, y = load_iris(return_X_y=True)

        max_opt_time = 2.0
        hoc = HyperoptClassifier(estimator=planned_pipeline,
                                 max_evals=100,
                                 cv=3,
                                 scoring='accuracy',
                                 max_opt_time=max_opt_time)
        start = time.time()
        best_trained = hoc.fit(X, y)
        end = time.time()
        opt_time = end - start
        rel_diff = (opt_time - max_opt_time) / max_opt_time
        assert rel_diff < 0.2, (
            'Max time: {}, Actual time: {}, relative diff: {}'.format(
                max_opt_time, opt_time, rel_diff))
Beispiel #10
0
    def test_classifier(self):
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(clf_name.split('.')[0:-1])
        class_name = clf_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        clf = class_()

        #test_schemas_are_schemas
        from lale.helpers import validate_is_schema
        validate_is_schema(clf.input_schema_fit())
        validate_is_schema(clf.input_schema_predict())
        validate_is_schema(clf.output_schema())
        validate_is_schema(clf.hyperparam_schema())

        #test_init_fit_predict
        trained = clf.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #test_with_hyperopt
        from lale.lib.lale import HyperoptClassifier
        hyperopt = HyperoptClassifier(model=clf, max_evals=1)
        trained = hyperopt.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #test_cross_validation
        from lale.helpers import cross_val_score
        cv_results = cross_val_score(clf, X_train, y_train, cv=2)
        self.assertEqual(len(cv_results), 2)

        #test_with_gridsearchcv_auto_wrapped
        from sklearn.metrics import accuracy_score, make_scorer
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            from lale.lib.sklearn.gradient_boosting_classifier import GradientBoostingClassifierImpl
            from lale.lib.sklearn.mlp_classifier import MLPClassifierImpl
            if isinstance(clf._impl, GradientBoostingClassifierImpl):
                #because exponential loss does not work with iris dataset as it is not binary classification
                import lale.schemas as schemas
                clf = clf.customize_schema(
                    loss=schemas.Enum(default='deviance', values=['deviance']))
            if not isinstance(clf._impl, MLPClassifierImpl):
                #mlp fails due to issue #164.
                grid_search = LaleGridSearchCV(
                    clf,
                    lale_num_samples=1,
                    lale_num_grids=1,
                    cv=2,
                    scoring=make_scorer(accuracy_score))
                grid_search.fit(X_train, y_train)

        #test_predict_on_trainable
        trained = clf.fit(X_train, y_train)
        clf.predict(X_train)

        #test_to_json
        clf.to_json()

        #test_in_a_pipeline
        pipeline = NoOp() >> clf
        trained = pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)