Esempio n. 1
0
    def test_cv_folds(self):
        trainable_lr = LogisticRegression(n_jobs=1)
        iris = sklearn.datasets.load_iris()
        from sklearn.model_selection import KFold

        from lale.helpers import cross_val_score

        cv_results = cross_val_score(trainable_lr, iris.data, iris.target, cv=KFold(2))
        self.assertEqual(len(cv_results), 2)
Esempio n. 2
0
def f_min(op, X, y, num_folds=5):
    import numpy as np

    from lale.helpers import cross_val_score

    # try:
    scores = cross_val_score(op, X, y, cv=num_folds)

    return 1 - np.mean(scores)  # Minimize!
Esempio n. 3
0
def test_f_min(op, X, y, num_folds=5):
    from sklearn import datasets
    from lale.helpers import cross_val_score
    import numpy as np

    # try:
    scores = cross_val_score(op, X, y, cv=num_folds)

    return 1 - np.mean(scores)  # Minimize!
Esempio n. 4
0
 def test_cv_scoring(self):
     trainable_lr = LogisticRegression(n_jobs=1)
     iris = sklearn.datasets.load_iris()
     from lale.helpers import cross_val_score
     from sklearn.metrics import confusion_matrix
     cv_results = cross_val_score(trainable_lr,
                                  iris.data,
                                  iris.target,
                                  scoring=confusion_matrix)
     self.assertEqual(len(cv_results), 5)
Esempio n. 5
0
    def test_comparison_with_scikit(self):
        import warnings

        warnings.filterwarnings("ignore")
        import sklearn.datasets
        import sklearn.utils

        from lale.helpers import cross_val_score
        from lale.lib.sklearn import PCA

        pca = PCA(n_components=3, random_state=42, svd_solver="arpack")
        nys = Nystroem(n_components=10, random_state=42)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        trainable = (pca & nys) >> concat >> lr
        digits = sklearn.datasets.load_digits()
        X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42)

        cv_results = cross_val_score(trainable, X, y)
        cv_results = ["{0:.1%}".format(score) for score in cv_results]

        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.linear_model import LogisticRegression as SklearnLR
        from sklearn.model_selection import cross_val_score
        from sklearn.pipeline import FeatureUnion, make_pipeline

        union = FeatureUnion(
            [
                (
                    "pca",
                    SklearnPCA(n_components=3, random_state=42, svd_solver="arpack"),
                ),
                ("nys", SklearnNystroem(n_components=10, random_state=42)),
            ]
        )
        lr = SklearnLR(random_state=42, C=0.1)
        pipeline = make_pipeline(union, lr)

        scikit_cv_results = cross_val_score(pipeline, X, y, cv=5)
        scikit_cv_results = ["{0:.1%}".format(score) for score in scikit_cv_results]
        self.assertEqual(cv_results, scikit_cv_results)
        warnings.resetwarnings()
Esempio n. 6
0
 def test_clone_with_scikit2(self):
     lr = LogisticRegression()
     from sklearn.model_selection import cross_val_score
     from sklearn.metrics import accuracy_score, make_scorer
     from sklearn.datasets import load_iris
     pca = PCA()
     trainable = pca >> lr
     from sklearn.base import clone
     iris = load_iris()
     X, y = iris.data, iris.target
     trainable2 = clone(trainable)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         result = cross_val_score(trainable,
                                  X,
                                  y,
                                  scoring=make_scorer(accuracy_score),
                                  cv=2)
         result2 = cross_val_score(trainable2,
                                   X,
                                   y,
                                   scoring=make_scorer(accuracy_score),
                                   cv=2)
     for i in range(len(result)):
         self.assertEqual(result[i], result2[i])
     # Testing clone with nested linear pipelines
     trainable = PCA() >> trainable
     trainable2 = clone(trainable)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         result = cross_val_score(trainable,
                                  X,
                                  y,
                                  scoring=make_scorer(accuracy_score),
                                  cv=2)
         result2 = cross_val_score(trainable2,
                                   X,
                                   y,
                                   scoring=make_scorer(accuracy_score),
                                   cv=2)
     for i in range(len(result)):
         self.assertEqual(result[i], result2[i])
Esempio n. 7
0
    def test_clone_operator_choice(self):
        from sklearn.model_selection import cross_val_score
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.base import clone
        from sklearn.datasets import load_iris
        iris = load_iris()
        X, y = iris.data, iris.target

        lr = LogisticRegression()
        trainable = PCA() >> lr 
        trainable_wrapper = make_sklearn_compat(trainable)
        trainable2 = clone(trainable_wrapper)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            result = cross_val_score(trainable_wrapper, X, y,
                                     scoring=make_scorer(accuracy_score), cv=2)
            result2 = cross_val_score(trainable2, X, y,
                                      scoring=make_scorer(accuracy_score), cv=2)
        for i in range(len(result)):
            self.assertEqual(result[i], result2[i])
Esempio n. 8
0
 def test_cv_folds_scikit(self):
     trainable_lr = LogisticRegression(n_jobs=1)
     iris = sklearn.datasets.load_iris()
     from sklearn.model_selection import cross_val_score
     from sklearn.metrics import accuracy_score, make_scorer
     from sklearn.model_selection import KFold
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         cv_results = cross_val_score(
             trainable_lr, iris.data, iris.target,
             cv = KFold(2), scoring=make_scorer(accuracy_score))
     self.assertEqual(len(cv_results), 2)
Esempio n. 9
0
    def test_resampler(self):
        from lale.lib.sklearn import PCA, Nystroem, LogisticRegression, RandomForestClassifier
        from lale.lib.lale import NoOp, ConcatFeatures
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(res_name.split('.')[0:-1])
        class_name = res_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        with self.assertRaises(ValueError):
            res = class_()

        #test_schemas_are_schemas
        lale.type_checking.validate_is_schema(class_.input_schema_fit())
        lale.type_checking.validate_is_schema(class_.input_schema_predict())
        lale.type_checking.validate_is_schema(class_.output_schema_predict())
        lale.type_checking.validate_is_schema(class_.hyperparam_schema())

        #test_init_fit_predict
        from lale.operators import make_pipeline
        pipeline1 = PCA() >> class_(operator=make_pipeline(LogisticRegression()))
        trained = pipeline1.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        pipeline2 = class_(operator=make_pipeline(PCA(), LogisticRegression()))
        trained = pipeline2.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        #test_with_hyperopt
        from lale.lib.lale import Hyperopt
        optimizer = Hyperopt(estimator=PCA >> class_(operator=make_pipeline(LogisticRegression())), max_evals = 1, show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline3 = class_(operator= PCA() >> (Nystroem & NoOp) >> ConcatFeatures >> LogisticRegression())
        optimizer = Hyperopt(estimator=pipeline3, max_evals = 1, show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline4 = (PCA >> class_(operator=make_pipeline(Nystroem())) & class_(operator=make_pipeline(Nystroem()))) >> ConcatFeatures >> LogisticRegression()
        optimizer = Hyperopt(estimator=pipeline4, max_evals = 1, scoring='roc_auc', show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        #test_cross_validation
        from lale.helpers import cross_val_score
        cv_results = cross_val_score(pipeline1, X_train, y_train, cv = 2)
        self.assertEqual(len(cv_results), 2)

        #test_to_json
        pipeline1.to_json()
Esempio n. 10
0
    def test_classifier(self):
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(clf_name.split('.')[0:-1])
        class_name = clf_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        clf = class_()

        #test_schemas_are_schemas
        lale.type_checking.validate_is_schema(clf.input_schema_fit())
        lale.type_checking.validate_is_schema(clf.input_schema_predict())
        lale.type_checking.validate_is_schema(clf.output_schema_predict())
        lale.type_checking.validate_is_schema(clf.hyperparam_schema())

        #test_init_fit_predict
        trained = clf.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #test_with_hyperopt
        from lale.lib.lale import Hyperopt
        hyperopt = Hyperopt(estimator=clf, max_evals=1)
        trained = hyperopt.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #test_cross_validation
        from lale.helpers import cross_val_score
        cv_results = cross_val_score(clf, X_train, y_train, cv=2)
        self.assertEqual(len(cv_results), 2)

        #test_with_gridsearchcv_auto_wrapped
        from sklearn.metrics import accuracy_score, make_scorer
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            from lale.lib.sklearn.gradient_boosting_classifier import GradientBoostingClassifierImpl
            from lale.lib.sklearn.mlp_classifier import MLPClassifierImpl
            if clf._impl_class() == GradientBoostingClassifierImpl:
                #because exponential loss does not work with iris dataset as it is not binary classification
                import lale.schemas as schemas
                clf = clf.customize_schema(
                    loss=schemas.Enum(default='deviance', values=['deviance']))
            grid_search = lale.lib.lale.GridSearchCV(
                estimator=clf,
                lale_num_samples=1,
                lale_num_grids=1,
                cv=2,
                scoring=make_scorer(accuracy_score))
            grid_search.fit(X_train, y_train)

        #test_predict_on_trainable
        trained = clf.fit(X_train, y_train)
        clf.predict(X_train)

        #test_to_json
        clf.to_json()

        #test_in_a_pipeline
        pipeline = NoOp() >> clf
        trained = pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)