예제 #1
0
 def test_pipeline_freeze_trainable(self):
     from lale.lib.sklearn import PCA, LogisticRegression
     liquid = PCA() >> LogisticRegression()
     self.assertFalse(liquid.is_frozen_trainable())
     liquid_grid = get_grid_search_parameter_grids(liquid)
     self.assertTrue(len(liquid_grid) > 1, f'grid size {len(liquid_grid)}')
     frozen = liquid.freeze_trainable()
     self.assertTrue(frozen.is_frozen_trainable())
     frozen_grid = get_grid_search_parameter_grids(frozen)
     self.assertEqual(len(frozen_grid), 1)
예제 #2
0
 def test_individual_op_freeze_trainable(self):
     from lale.lib.sklearn import LogisticRegression
     liquid = LogisticRegression(C=0.1, solver='liblinear')
     self.assertIn('penalty', liquid.free_hyperparams())
     self.assertFalse(liquid.is_frozen_trainable())
     liquid_grid = get_grid_search_parameter_grids(liquid)
     self.assertTrue(len(liquid_grid) > 1, f'grid size {len(liquid_grid)}')
     frozen = liquid.freeze_trainable()
     self.assertEqual(len(frozen.free_hyperparams()), 0)
     self.assertTrue(frozen.is_frozen_trainable())
     frozen_grid = get_grid_search_parameter_grids(frozen)
     self.assertEqual(len(frozen_grid), 1)
예제 #3
0
    def test_pipeline_parameters(self):
        pgo = PGO.load_pgo_file(example_pgo_fp)

        trainable = PCA() >> LogisticRegression()
        parameters = get_grid_search_parameter_grids(trainable,
                                                     num_samples=2,
                                                     pgo=pgo)
예제 #4
0
    def test_lr_parameters(self):
        pgo = PGO.load_pgo_file(example_pgo_fp)

        lr = LogisticRegression()
        parameters = get_grid_search_parameter_grids(lr,
                                                     num_samples=2,
                                                     pgo=pgo)
예제 #5
0
    def test_with_gridsearchcv3_auto(self):
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.model_selection import GridSearchCV

        lr = LogisticRegression()
        from sklearn.pipeline import Pipeline

        scikit_pipeline = Pipeline(
            [(Nystroem().name(), Nystroem()), (lr.name(), LogisticRegression())]
        )
        all_parameters = get_grid_search_parameter_grids(
            Nystroem() >> lr, num_samples=1
        )
        # otherwise the test takes too long
        parameters = random.sample(all_parameters, 2)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            clf = GridSearchCV(
                scikit_pipeline, parameters, cv=2, scoring=make_scorer(accuracy_score)
            )
            iris = load_iris()
            clf.fit(iris.data, iris.target)
            _ = clf.predict(iris.data)
예제 #6
0
    def test_grid_search_on_trained_auto(self):
        from sklearn.model_selection import GridSearchCV
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        iris = load_iris()
        X, y = iris.data, iris.target
        lr = LogisticRegression()
        trained = lr.fit(X, y)
        parameters = get_grid_search_parameter_grids(lr, num_samples=2)

        clf = GridSearchCV(trained, parameters, cv=5, scoring=make_scorer(accuracy_score))
예제 #7
0
    def dont_test_with_gridsearchcv2_auto(self):
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.model_selection import GridSearchCV

        lr = LogisticRegression(random_state=42)
        pca = PCA(random_state=42, svd_solver="arpack")
        trainable = pca >> lr
        from sklearn.pipeline import Pipeline

        scikit_pipeline = Pipeline([
            (pca.name(), PCA(random_state=42, svd_solver="arpack")),
            (lr.name(), LogisticRegression(random_state=42)),
        ])
        all_parameters = get_grid_search_parameter_grids(trainable,
                                                         num_samples=1)
        # otherwise the test takes too long
        parameters = random.sample(all_parameters, 2)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            clf = GridSearchCV(scikit_pipeline,
                               parameters,
                               cv=2,
                               scoring=make_scorer(accuracy_score))
            iris = load_iris()
            clf.fit(iris.data, iris.target)
            predicted = clf.predict(iris.data)
            accuracy_with_lale_operators = accuracy_score(
                iris.target, predicted)

        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.linear_model import LogisticRegression as SklearnLR
        from sklearn.pipeline import Pipeline

        scikit_pipeline = Pipeline([
            (pca.name(), SklearnPCA(random_state=42, svd_solver="arpack")),
            (lr.name(), SklearnLR(random_state=42)),
        ])
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            clf = GridSearchCV(scikit_pipeline,
                               parameters,
                               cv=2,
                               scoring=make_scorer(accuracy_score))
            iris = load_iris()
            clf.fit(iris.data, iris.target)
            predicted = clf.predict(iris.data)
            accuracy_with_scikit_operators = accuracy_score(
                iris.target, predicted)
        self.assertEqual(accuracy_with_lale_operators,
                         accuracy_with_scikit_operators)