예제 #1
0
    def test_import_from_sklearn_pipeline_nested_pipeline1(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.feature_selection import SelectKBest
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
        from sklearn.pipeline import FeatureUnion

        union = FeatureUnion(
            [
                (
                    "selectkbest_pca",
                    sklearn.pipeline.make_pipeline(
                        SelectKBest(k=3),
                        FeatureUnion(
                            [
                                ("pca", SklearnPCA(n_components=1)),
                                (
                                    "nested_pipeline",
                                    sklearn.pipeline.make_pipeline(
                                        SelectKBest(k=2), SklearnNystroem()
                                    ),
                                ),
                            ]
                        ),
                    ),
                ),
                ("nys", SklearnNystroem(n_components=2, random_state=42)),
            ]
        )
        sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
        lale_pipeline = typing.cast(
            lale.operators.TrainablePipeline,
            import_from_sklearn_pipeline(sklearn_pipeline),
        )
        self.assertEqual(len(lale_pipeline.edges()), 8)
        # These assertions assume topological sort, which may not be unique. So the assertions are brittle.
        from lale.lib.lale.concat_features import ConcatFeatures
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier
        from lale.lib.sklearn.nystroem import Nystroem
        from lale.lib.sklearn.pca import PCA
        from lale.lib.sklearn.select_k_best import SelectKBest

        self.assertIsInstance(lale_pipeline.edges()[0][0], SelectKBest)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[0][1], PCA)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[1][0], SelectKBest)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[1][1], SelectKBest)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[2][0], SelectKBest)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[2][1], Nystroem)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[3][0], PCA)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[3][1], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[4][0], Nystroem)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[4][1], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[5][0], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[5][1], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[6][0], Nystroem)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[6][1], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[7][0], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[7][1], KNeighborsClassifier)  # type: ignore
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
예제 #2
0
 def test_import_from_sklearn_pipeline_nested_pipeline1(self):
     from sklearn.pipeline import FeatureUnion
     from sklearn.decomposition import PCA as SklearnPCA
     from sklearn.kernel_approximation import Nystroem as SklearnNystroem
     from sklearn.feature_selection import SelectKBest
     from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
     union = FeatureUnion([
         ("selectkbest_pca",
          sklearn.pipeline.make_pipeline(
              SelectKBest(k=3),
              FeatureUnion([
                  ('pca', SklearnPCA(n_components=1)),
                  ('nested_pipeline',
                   sklearn.pipeline.make_pipeline(SelectKBest(k=2),
                                                  SklearnNystroem()))
              ]))), ("nys", SklearnNystroem(n_components=2,
                                            random_state=42))
     ])
     sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
     lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
     self.assertEqual(len(lale_pipeline.edges()), 8)
     #These assertions assume topological sort, which may not be unique. So the assertions are brittle.
     from lale.lib.sklearn.pca import PCAImpl
     from lale.lib.sklearn.nystroem import NystroemImpl
     from lale.lib.lale.concat_features import ConcatFeaturesImpl
     from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
     from lale.lib.sklearn.select_k_best import SelectKBestImpl
     self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), PCAImpl)
     self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), PCAImpl)
     self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[5][0]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[5][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[6][0]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[6][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[7][0]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[7][1]._impl_class(),
                      KNeighborsClassifierImpl)
     self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
예제 #3
0
    def test_import_from_sklearn_pipeline_feature_union(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
        from sklearn.pipeline import FeatureUnion

        union = FeatureUnion([
            ("pca", SklearnPCA(n_components=1)),
            ("nys", SklearnNystroem(n_components=2, random_state=42)),
        ])
        sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
        lale_pipeline = typing.cast(
            lale.operators.TrainablePipeline,
            import_from_sklearn_pipeline(sklearn_pipeline),
        )
        self.assertEqual(len(lale_pipeline.edges()), 3)
        from lale.lib.lale.concat_features import ConcatFeatures
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier
        from lale.lib.sklearn.nystroem import Nystroem
        from lale.lib.sklearn.pca import PCA

        self.assertIsInstance(lale_pipeline.edges()[0][0], PCA)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[0][1],
                              ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[1][0],
                              Nystroem)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[1][1],
                              ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[2][0],
                              ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[2][1],
                              KNeighborsClassifier)  # type: ignore
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
예제 #4
0
    def test_import_from_sklearn_pipeline_feature_union(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
        from sklearn.pipeline import FeatureUnion

        union = FeatureUnion([
            ("pca", SklearnPCA(n_components=1)),
            ("nys", SklearnNystroem(n_components=2, random_state=42)),
        ])
        sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
        lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
        self.assertEqual(len(lale_pipeline.edges()), 3)
        from lale.lib.lale.concat_features import ConcatFeaturesImpl
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
        from lale.lib.sklearn.nystroem import NystroemImpl
        from lale.lib.sklearn.pca import PCAImpl

        self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl)
        self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(),
                         NystroemImpl)
        self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(),
                         KNeighborsClassifierImpl)
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
예제 #5
0
    def test_comparison_with_scikit(self):
        import warnings
        warnings.filterwarnings("ignore")
        from lale.lib.sklearn import PCA
        import sklearn.datasets
        from lale.helpers import cross_val_score
        pca = PCA(n_components=3, random_state=42, svd_solver='arpack')
        nys = Nystroem(n_components=10, random_state=42)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        trainable = (pca & nys) >> concat >> lr
        digits = sklearn.datasets.load_digits()
        X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42)

        cv_results = cross_val_score(trainable, X, y)
        cv_results = ['{0:.1%}'.format(score) for score in cv_results]

        from sklearn.pipeline import make_pipeline, FeatureUnion
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.linear_model import LogisticRegression as SklearnLR
        from sklearn.model_selection import cross_val_score
        union = FeatureUnion([("pca", SklearnPCA(n_components=3, random_state=42, svd_solver='arpack')),
                            ("nys", SklearnNystroem(n_components=10, random_state=42))])
        lr = SklearnLR(random_state=42, C=0.1)
        pipeline = make_pipeline(union, lr)

        scikit_cv_results = cross_val_score(pipeline, X, y, cv = 5)
        scikit_cv_results = ['{0:.1%}'.format(score) for score in scikit_cv_results]
        self.assertEqual(cv_results, scikit_cv_results)
        warnings.resetwarnings()
예제 #6
0
    def test_import_from_sklearn_pipeline_nested_pipeline2(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.feature_selection import SelectKBest
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
        from sklearn.pipeline import FeatureUnion

        union = FeatureUnion([
            (
                "selectkbest_pca",
                sklearn.pipeline.make_pipeline(
                    SelectKBest(k=3),
                    sklearn.pipeline.make_pipeline(SelectKBest(k=2),
                                                   SklearnPCA()),
                ),
            ),
            ("nys", SklearnNystroem(n_components=2, random_state=42)),
        ])
        sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
        lale_pipeline = typing.cast(
            lale.operators.TrainablePipeline,
            import_from_sklearn_pipeline(sklearn_pipeline),
        )
        self.assertEqual(len(lale_pipeline.edges()), 5)
        from lale.lib.lale.concat_features import ConcatFeaturesImpl
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
        from lale.lib.sklearn.nystroem import NystroemImpl
        from lale.lib.sklearn.pca import PCAImpl
        from lale.lib.sklearn.select_k_best import SelectKBestImpl

        self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(),
                         SelectKBestImpl)
        self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(),
                         SelectKBestImpl)
        self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(),
                         SelectKBestImpl)
        self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), PCAImpl)
        self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), PCAImpl)
        self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(),
                         NystroemImpl)
        self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(),
                         KNeighborsClassifierImpl)

        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)