Exemple #1
0
 def test_predict_proba(self):
     trainable = KNeighborsClassifier()
     iris = sklearn.datasets.load_iris()
     trained = trainable.fit(iris.data, iris.target)
     #with self.assertWarns(DeprecationWarning):
     predicted = trainable.predict_proba(iris.data)
     predicted = trained.predict_proba(iris.data)
Exemple #2
0
    def test_with_multioutput_targets(self):
        from sklearn.datasets import make_classification, load_iris
        import numpy as np
        from sklearn.utils import shuffle

        X, y1 = make_classification(n_samples=10, n_features=100, n_informative=30, n_classes=3, random_state=1)
        y2 = shuffle(y1, random_state=1)
        y3 = shuffle(y1, random_state=2)
        Y = np.vstack((y1, y2, y3)).T
        trainable = KNeighborsClassifier()
        trained = trainable.fit(X, Y)
        predictions = trained.predict(X)
Exemple #3
0
 def test_trained_individual_op_freeze_trainable(self):
     from lale.lib.sklearn import KNeighborsClassifier
     from lale.operators import TrainedIndividualOp
     trainable = KNeighborsClassifier(n_neighbors=1)
     X = [[0.0], [1.0], [2.0]]
     y_old = [0.0, 0.0, 1.0]
     liquid = trainable.fit(X, y_old)
     self.assertIsInstance(liquid, TrainedIndividualOp)
     self.assertFalse(liquid.is_frozen_trainable())
     self.assertIn('algorithm', liquid.free_hyperparams())
     frozen = liquid.freeze_trainable()
     self.assertIsInstance(frozen, TrainedIndividualOp)
     self.assertTrue(frozen.is_frozen_trainable())
     self.assertFalse(frozen.is_frozen_trained())
     self.assertEqual(len(frozen.free_hyperparams()), 0)
Exemple #4
0
 def test_import_from_sklearn_pipeline_feature_union(self):
     from sklearn.pipeline import FeatureUnion
     from sklearn.decomposition import PCA
     from sklearn.kernel_approximation import Nystroem
     from sklearn.neighbors import KNeighborsClassifier
     from sklearn.pipeline import make_pipeline
     union = FeatureUnion([("pca", PCA(n_components=1)),
                           ("nys", Nystroem(n_components=2,
                                            random_state=42))])
     sklearn_pipeline = make_pipeline(union, KNeighborsClassifier())
     lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
     self.assertEqual(len(lale_pipeline.edges()), 3)
     from lale.lib.sklearn.pca import PCAImpl
     from lale.lib.sklearn.nystroem import NystroemImpl
     from lale.lib.lale.concat_features import ConcatFeaturesImpl
     from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
     self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl)
     self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(),
                      KNeighborsClassifierImpl)
     self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Exemple #5
0
    def test_with_concat_features2(self):
        import warnings

        warnings.filterwarnings("ignore")

        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score

        from lale.lib.lale import Hyperopt

        data = load_iris()
        X, y = data.data, data.target
        pca = PCA(n_components=3)
        nys = Nystroem(n_components=10)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        from lale.operators import make_pipeline

        pipeline = make_pipeline(
            ((((SimpleImputer() | NoOp()) >> pca) & nys) >> concat >> lr)
            | KNeighborsClassifier()
        )
        clf = Hyperopt(estimator=pipeline, max_evals=1, handle_cv_failure=True)
        trained = clf.fit(X, y)
        predictions = trained.predict(X)
        print(accuracy_score(y, predictions))
        warnings.resetwarnings()
Exemple #6
0
    def test_invalid_args(self):
        import jsonschema

        clf = LGBMClassifier()
        nsga2_args = {
            'estimator': clf,
            'cv': 3,
            'max_evals': 50,
            'population_size': 10
        }

        # No scorer specified
        with self.assertRaises(jsonschema.exceptions.ValidationError):
            _ = NSGA2(**nsga2_args)

        # Less scorers provided
        with self.assertRaises(AssertionError):
            _ = NSGA2(scoring=['accuracy'], **nsga2_args)

        # Specify LALE Pipeline as estimator. It should raise
        # AssertionError as MOO over pipelines is not supported
        pipeline = MinMaxScaler() >> KNeighborsClassifier()
        fpr_scorer = make_scorer(compute_fpr, greater_is_better=False)
        with self.assertRaises(AssertionError):
            _ = NSGA2(estimator=pipeline, scoring=['accuracy', fpr_scorer])
 def test_with_lale_classifiers(self):
     clf = VotingClassifier(
         estimators=[("knn",
                      KNeighborsClassifier()), ("lr",
                                                LogisticRegression())])
     trained = clf.fit(self.X_train, self.y_train)
     trained.predict(self.X_test)
Exemple #8
0
    def test_get_named_pipeline(self):
        pipeline = MinMaxScaler() >> KNeighborsClassifier()
        trained_pipeline = pipeline.fit(self.X_train, self.y_train)

        fpr_scorer = make_scorer(compute_fpr, greater_is_better=False)
        nsga2_args = {
            'scoring': ['accuracy', fpr_scorer],
            'best_score': [1, 0],
            'cv': 3,
            'max_evals': 20,
            'population_size': 10
        }
        opt_last = OptimizeLast(estimator=trained_pipeline,
                                last_optimizer=NSGA2,
                                optimizer_args=nsga2_args)

        res_last = opt_last.fit(self.X_train, self.y_train)

        df_summary = res_last.summary()
        pareto_pipeline = res_last.get_pipeline(pipeline_name='p0')
        self.assertEqual(type(trained_pipeline), type(pareto_pipeline))

        if (df_summary.shape[0] > 1):
            pareto_pipeline = res_last.get_pipeline(pipeline_name='p1')
            self.assertEqual(type(trained_pipeline), type(pareto_pipeline))
    def test_export_to_sklearn_pipeline2(self):
        from sklearn.feature_selection import SelectKBest
        from sklearn.pipeline import FeatureUnion

        lale_pipeline = (
            (
                (
                    (PCA(svd_solver="randomized", random_state=42) & SelectKBest(k=3))
                    >> ConcatFeatures()
                )
                & Nystroem(random_state=42)
            )
            >> ConcatFeatures()
            >> KNeighborsClassifier()
        )
        trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
        sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
        self.assertIsInstance(
            sklearn_pipeline.named_steps["featureunion"], FeatureUnion
        )
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN

        self.assertIsInstance(
            sklearn_pipeline.named_steps["kneighborsclassifier"], SklearnKNN
        )
        self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Exemple #10
0
    def dont_test_car_hyperopt(self):

        from lale.datasets.auto_weka import fetch_car
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.preprocessing import LabelEncoder
        import pandas as pd
        from lale.lib.weka import J48
        from lalegpl.lib.r import ArulesCBAClassifier
        from lale.operators import make_pipeline
        from lale.lib.lale import HyperoptClassifier
        from lale.lib.sklearn import LogisticRegression, KNeighborsClassifier

        (X_train, y_train), (X_test, y_test) = fetch_car()
        y_name = y_train.name
        le = LabelEncoder()
        y_train = le.fit_transform(y_train)
        y_test = le.transform(y_test)

        y_train = pd.Series(y_train, name=y_name)
        y_test = pd.Series(y_test, name=y_name)

        planned_pipeline = make_pipeline(ArulesCBAClassifier()
                                         | LogisticRegression()
                                         | KNeighborsClassifier())

        clf = HyperoptClassifier(model=planned_pipeline, max_evals=1)
        best_pipeline = clf.fit(X_train, y_train)
        print(accuracy_score(y_test, best_pipeline.predict(X_test)))
Exemple #11
0
 def test_import_from_sklearn_pipeline_nested_pipeline(self):
     from sklearn.pipeline import FeatureUnion, make_pipeline
     from sklearn.decomposition import PCA
     from sklearn.kernel_approximation import Nystroem
     from sklearn.feature_selection import SelectKBest
     from sklearn.neighbors import KNeighborsClassifier
     from sklearn.pipeline import make_pipeline
     union = FeatureUnion([("selectkbest_pca",
                            make_pipeline(SelectKBest(k=3),
                                          PCA(n_components=1))),
                           ("nys", Nystroem(n_components=2,
                                            random_state=42))])
     sklearn_pipeline = make_pipeline(union, KNeighborsClassifier())
     lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
     self.assertEqual(len(lale_pipeline.edges()), 4)
     from lale.lib.sklearn.pca import PCAImpl
     from lale.lib.sklearn.nystroem import NystroemImpl
     from lale.lib.lale.concat_features import ConcatFeaturesImpl
     from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
     #These assertions assume topological sort
     self.assertIsInstance(lale_pipeline.edges()[0][0]._impl, SelectKBest)
     self.assertIsInstance(lale_pipeline.edges()[0][1]._impl, PCAImpl)
     self.assertIsInstance(lale_pipeline.edges()[1][0]._impl, PCAImpl)
     self.assertIsInstance(lale_pipeline.edges()[1][1]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[2][0]._impl, NystroemImpl)
     self.assertIsInstance(lale_pipeline.edges()[2][1]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[3][0]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[3][1]._impl,
                           KNeighborsClassifierImpl)
     self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Exemple #12
0
 def test_individual_op_freeze_trained(self):
     from lale.lib.sklearn import KNeighborsClassifier
     trainable = KNeighborsClassifier(n_neighbors=1)
     X = [[0.0], [1.0], [2.0]]
     y_old = [0.0, 0.0, 1.0]
     y_new = [1.0, 0.0, 0.0]
     liquid_old = trainable.fit(X, y_old)
     self.assertEqual(list(liquid_old.predict(X)), list(y_old))
     liquid_new = liquid_old.fit(X, y_new)
     self.assertEqual(list(liquid_new.predict(X)), list(y_new))
     frozen_old = trainable.fit(X, y_old).freeze_trained()
     self.assertFalse(liquid_old.is_frozen_trained())
     self.assertTrue(frozen_old.is_frozen_trained())
     self.assertEqual(list(frozen_old.predict(X)), list(y_old))
     frozen_new = frozen_old.fit(X, y_new)
     self.assertEqual(list(frozen_new.predict(X)), list(y_old))
Exemple #13
0
    def test_pipeline_choice_with_hyperopt(self):
        from lale.lib.lale import Hyperopt
        from lale.lib.sklearn import BaggingClassifier

        clf = BaggingClassifier(
            base_estimator=PCA() >> (LogisticRegression() | KNeighborsClassifier())
        )
        _ = clf.auto_configure(self.X_train, self.y_train, Hyperopt, max_evals=1)
Exemple #14
0
    def test_with_hyperopt(self):
        from lale.lib.lale import Hyperopt
        from lale.lib.sklearn import VotingClassifier

        clf = VotingClassifier(
            estimators=[("knn", KNeighborsClassifier()), ("lr", LogisticRegression())]
        )
        _ = clf.auto_configure(self.X_train, self.y_train, Hyperopt, max_evals=1)
Exemple #15
0
 def test_with_lale_pipeline(self):
     from lale.lib.sklearn import VotingClassifier
     clf = VotingClassifier(estimators=[(
         'knn',
         KNeighborsClassifier()), ('pca_lr',
                                   PCA() >> LogisticRegression())])
     trained = clf.fit(self.X_train, self.y_train)
     trained.predict(self.X_test)
Exemple #16
0
 def test_export_to_sklearn_pipeline_with_noop_4(self):
     from lale.lib.sklearn import KNeighborsClassifier
     from lale.lib.lale import NoOp
     from sklearn.pipeline import make_pipeline
     lale_pipeline = NoOp() >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Exemple #17
0
    def test_trained_individual_op_freeze_trainable(self):
        from lale.lib.sklearn import KNeighborsClassifier
        from lale.operators import TrainedIndividualOp

        with EnableSchemaValidation():
            trainable = KNeighborsClassifier(n_neighbors=1)
            X = np.array([[0.0], [1.0], [2.0]])
            y_old = np.array([0.0, 0.0, 1.0])
            liquid = trainable.fit(X, y_old)
            self.assertIsInstance(liquid, TrainedIndividualOp)
            self.assertFalse(liquid.is_frozen_trainable())
            self.assertIn("algorithm", liquid.free_hyperparams())
            frozen = liquid.freeze_trainable()
            self.assertIsInstance(frozen, TrainedIndividualOp)
            self.assertTrue(frozen.is_frozen_trainable())
            self.assertFalse(frozen.is_frozen_trained())
            self.assertEqual(len(frozen.free_hyperparams()), 0)
Exemple #18
0
 def test_multiple_estimators_predict_predict_proba(self):
     pipeline = (StandardScaler() >>
                 (LogisticRegression() & PCA()) >> ConcatFeatures() >>
                 (NoOp() & LinearSVC()) >> ConcatFeatures() >>
                 KNeighborsClassifier())
     pipeline.fit(self.X_train, self.y_train)
     _ = pipeline.predict_proba(self.X_test)
     _ = pipeline.predict(self.X_test)
Exemple #19
0
 def test_export_to_sklearn_pipeline_with_noop_3(self):
     from lale.lib.sklearn import PCA, KNeighborsClassifier
     from lale.lib.lale import NoOp
     from sklearn.pipeline import make_pipeline
     # This test is probably unnecessary, but doesn't harm at this point
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier() >> NoOp()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
Exemple #20
0
    def test_individual_op_freeze_trained(self):
        from lale.lib.sklearn import KNeighborsClassifier

        with EnableSchemaValidation():
            trainable = KNeighborsClassifier(n_neighbors=1)
            X = np.array([[0.0], [1.0], [2.0]])
            y_old = np.array([0.0, 0.0, 1.0])
            y_new = np.array([1.0, 0.0, 0.0])
            liquid_old = trainable.fit(X, y_old)
            self.assertEqual(list(liquid_old.predict(X)), list(y_old))
            liquid_new = liquid_old.fit(X, y_new)
            self.assertEqual(list(liquid_new.predict(X)), list(y_new))
            frozen_old = trainable.fit(X, y_old).freeze_trained()
            self.assertFalse(liquid_old.is_frozen_trained())
            self.assertTrue(frozen_old.is_frozen_trained())
            self.assertEqual(list(frozen_old.predict(X)), list(y_old))
            frozen_new = frozen_old.fit(X, y_new)
            self.assertEqual(list(frozen_new.predict(X)), list(y_old))
Exemple #21
0
 def test_import_from_sklearn_pipeline_nested_pipeline1(self):
     from sklearn.pipeline import FeatureUnion, make_pipeline
     from sklearn.decomposition import PCA
     from sklearn.kernel_approximation import Nystroem
     from sklearn.feature_selection import SelectKBest
     from sklearn.neighbors import KNeighborsClassifier
     from sklearn.pipeline import make_pipeline
     union = FeatureUnion([
         ("selectkbest_pca",
          make_pipeline(
              SelectKBest(k=3),
              FeatureUnion([('pca', PCA(n_components=1)),
                            ('nested_pipeline',
                             make_pipeline(SelectKBest(k=2),
                                           Nystroem()))]))),
         ("nys", Nystroem(n_components=2, random_state=42))
     ])
     sklearn_pipeline = make_pipeline(union, KNeighborsClassifier())
     lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
     self.assertEqual(len(lale_pipeline.edges()), 8)
     #These assertions assume topological sort, which may not be unique. So the assertions are brittle.
     from lale.lib.sklearn.pca import PCAImpl
     from lale.lib.sklearn.nystroem import NystroemImpl
     from lale.lib.lale.concat_features import ConcatFeaturesImpl
     from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
     from lale.lib.sklearn.select_k_best import SelectKBestImpl
     self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), PCAImpl)
     self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), PCAImpl)
     self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[5][0]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[5][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[6][0]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[6][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[7][0]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[7][1]._impl_class(),
                      KNeighborsClassifierImpl)
     self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Exemple #22
0
    def test_with_observed_gridsearch(self):
        from lale.lib.sklearn import VotingClassifier
        from lale.lib.lale import GridSearchCV
        from lale.lib.lale import Observing
        from lale.lib.lale.observing import LoggingObserver

        from sklearn.metrics import accuracy_score, make_scorer
        clf = VotingClassifier(estimators=[('knn', KNeighborsClassifier()), ('rc', RidgeClassifier())], voting='hard')
        trained = clf.auto_configure(self.X_train, self.y_train, GridSearchCV, lale_num_samples=1, lale_num_grids=1, cv=2, scoring=make_scorer(accuracy_score), observer=LoggingObserver)
Exemple #23
0
    def test_with_voting_classifier1(self):
        lr = LogisticRegression()
        knn = KNeighborsClassifier()
        from sklearn.ensemble import VotingClassifier

        vclf = VotingClassifier(estimators=[("lr", lr), ("knn", knn)])

        iris = load_iris()
        X, y = iris.data, iris.target
        vclf.fit(X, y)
Exemple #24
0
 def test_smac2(self):
     from sklearn.metrics import accuracy_score
     from lale.lib.lale import SMAC
     planned_pipeline = (PCA | NoOp) >> KNeighborsClassifier(n_neighbors = 10000)
     opt = SMAC(estimator=planned_pipeline, max_evals=1)
     # run optimizer
     res = opt.fit(self.X_train, self.y_train)
     # Get the trials object and make sure that SMAC assigned cost_for_crash which is MAXINT by default to 
     #at least one trial (correspond to KNN).
     trials = res._impl.get_trials()
     assert 2147483647.0 in trials.cost_per_config.values()
Exemple #25
0
    def test_fit_args(self):
        from lale.lib.lale import TopKVotingClassifier
        from lale.lib.sklearn import Nystroem

        ensemble = TopKVotingClassifier(
            estimator=(PCA() | Nystroem())
            >> (LogisticRegression() | KNeighborsClassifier()),
            k=2,
        )
        trained = ensemble.fit(self.X_train, self.y_train)
        trained.predict(self.X_test)
Exemple #26
0
 def test_export_to_sklearn_pipeline(self):
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
         sklearn_step_params = sklearn_pipeline.named_steps[
             pipeline_step].get_params()
         lale_sklearn_params = self.get_sklearn_params(
             trained_lale_pipeline.steps()[i])
         self.assertEqual(sklearn_step_params, lale_sklearn_params)
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Exemple #27
0
 def test_nested_pipeline1(self):
     from sklearn.datasets import load_iris
     from lale.lib.lale import Hyperopt
     from sklearn.metrics import accuracy_score
     data = load_iris()
     X, y = data.data, data.target
     #pipeline = KNeighborsClassifier() | (OneHotEncoder(handle_unknown = 'ignore') >> LogisticRegression())
     pipeline = KNeighborsClassifier() | (SimpleImputer() >> LogisticRegression())
     clf = Hyperopt(estimator=pipeline, max_evals=1)
     trained = clf.fit(X, y)
     predictions = trained.predict(X)
     print(accuracy_score(y, predictions))
Exemple #28
0
    def test_fit_args(self):
        from sklearn.datasets import load_iris
        from lale.lib.lale import TopKVotingClassifier
        from lale.lib.sklearn import Nystroem
        from sklearn.metrics import accuracy_score

        ensemble = TopKVotingClassifier(
            estimator=(PCA() | Nystroem()) >>
            (LogisticRegression() | KNeighborsClassifier()),
            k=2)
        trained = ensemble.fit(self.X_train, self.y_train)
        trained.predict(self.X_test)
Exemple #29
0
 def test_schema_validation(self):
     trainable_16 = KNeighborsClassifier(n_neighbors=16)
     with self.assertRaises(jsonschema.ValidationError):
         _ = trainable_16.fit(self.train_X, self.train_y)
     trainable_15 = KNeighborsClassifier(n_neighbors=15)
     trained_15 = trainable_15.fit(self.train_X, self.train_y)
     _ = trained_15.predict(self.test_X)
Exemple #30
0
    def test_fit_smaller_trials(self):
        from lale.lib.lale import TopKVotingClassifier
        from lale.lib.sklearn import Nystroem

        ensemble = TopKVotingClassifier(
            estimator=(PCA() | Nystroem())
            >> (LogisticRegression() | KNeighborsClassifier()),
            args_to_optimizer={"max_evals": 3},
            k=20,
        )
        trained = ensemble.fit(self.X_train, self.y_train)
        final_ensemble = trained._impl._best_estimator
        self.assertLessEqual(len(final_ensemble._impl._wrapped_model.estimators), 3)