Esempio n. 1
0
    def test_import_from_sklearn_pipeline_feature_union(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
        from sklearn.pipeline import FeatureUnion

        union = FeatureUnion([
            ("pca", SklearnPCA(n_components=1)),
            ("nys", SklearnNystroem(n_components=2, random_state=42)),
        ])
        sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
        lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
        self.assertEqual(len(lale_pipeline.edges()), 3)
        from lale.lib.lale.concat_features import ConcatFeaturesImpl
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
        from lale.lib.sklearn.nystroem import NystroemImpl
        from lale.lib.sklearn.pca import PCAImpl

        self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl)
        self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(),
                         NystroemImpl)
        self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(),
                         KNeighborsClassifierImpl)
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Esempio n. 2
0
    def test_import_from_sklearn_pipeline_feature_union(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
        from sklearn.pipeline import FeatureUnion

        union = FeatureUnion([
            ("pca", SklearnPCA(n_components=1)),
            ("nys", SklearnNystroem(n_components=2, random_state=42)),
        ])
        sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
        lale_pipeline = typing.cast(
            lale.operators.TrainablePipeline,
            import_from_sklearn_pipeline(sklearn_pipeline),
        )
        self.assertEqual(len(lale_pipeline.edges()), 3)
        from lale.lib.lale.concat_features import ConcatFeatures
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier
        from lale.lib.sklearn.nystroem import Nystroem
        from lale.lib.sklearn.pca import PCA

        self.assertIsInstance(lale_pipeline.edges()[0][0], PCA)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[0][1],
                              ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[1][0],
                              Nystroem)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[1][1],
                              ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[2][0],
                              ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[2][1],
                              KNeighborsClassifier)  # type: ignore
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Esempio n. 3
0
    def test_import_from_sklearn_pipeline_nested_pipeline1(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.feature_selection import SelectKBest
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
        from sklearn.pipeline import FeatureUnion

        union = FeatureUnion(
            [
                (
                    "selectkbest_pca",
                    sklearn.pipeline.make_pipeline(
                        SelectKBest(k=3),
                        FeatureUnion(
                            [
                                ("pca", SklearnPCA(n_components=1)),
                                (
                                    "nested_pipeline",
                                    sklearn.pipeline.make_pipeline(
                                        SelectKBest(k=2), SklearnNystroem()
                                    ),
                                ),
                            ]
                        ),
                    ),
                ),
                ("nys", SklearnNystroem(n_components=2, random_state=42)),
            ]
        )
        sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
        lale_pipeline = typing.cast(
            lale.operators.TrainablePipeline,
            import_from_sklearn_pipeline(sklearn_pipeline),
        )
        self.assertEqual(len(lale_pipeline.edges()), 8)
        # These assertions assume topological sort, which may not be unique. So the assertions are brittle.
        from lale.lib.lale.concat_features import ConcatFeatures
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier
        from lale.lib.sklearn.nystroem import Nystroem
        from lale.lib.sklearn.pca import PCA
        from lale.lib.sklearn.select_k_best import SelectKBest

        self.assertIsInstance(lale_pipeline.edges()[0][0], SelectKBest)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[0][1], PCA)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[1][0], SelectKBest)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[1][1], SelectKBest)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[2][0], SelectKBest)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[2][1], Nystroem)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[3][0], PCA)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[3][1], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[4][0], Nystroem)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[4][1], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[5][0], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[5][1], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[6][0], Nystroem)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[6][1], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[7][0], ConcatFeatures)  # type: ignore
        self.assertIsInstance(lale_pipeline.edges()[7][1], KNeighborsClassifier)  # type: ignore
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Esempio n. 4
0
 def test_import_from_sklearn_pipeline_nested_pipeline1(self):
     from sklearn.pipeline import FeatureUnion
     from sklearn.decomposition import PCA as SklearnPCA
     from sklearn.kernel_approximation import Nystroem as SklearnNystroem
     from sklearn.feature_selection import SelectKBest
     from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
     union = FeatureUnion([
         ("selectkbest_pca",
          sklearn.pipeline.make_pipeline(
              SelectKBest(k=3),
              FeatureUnion([
                  ('pca', SklearnPCA(n_components=1)),
                  ('nested_pipeline',
                   sklearn.pipeline.make_pipeline(SelectKBest(k=2),
                                                  SklearnNystroem()))
              ]))), ("nys", SklearnNystroem(n_components=2,
                                            random_state=42))
     ])
     sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
     lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
     self.assertEqual(len(lale_pipeline.edges()), 8)
     #These assertions assume topological sort, which may not be unique. So the assertions are brittle.
     from lale.lib.sklearn.pca import PCAImpl
     from lale.lib.sklearn.nystroem import NystroemImpl
     from lale.lib.lale.concat_features import ConcatFeaturesImpl
     from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
     from lale.lib.sklearn.select_k_best import SelectKBestImpl
     self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), PCAImpl)
     self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(),
                      SelectKBestImpl)
     self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), PCAImpl)
     self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[5][0]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[5][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[6][0]._impl_class(),
                      NystroemImpl)
     self.assertEqual(lale_pipeline.edges()[6][1]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[7][0]._impl_class(),
                      ConcatFeaturesImpl)
     self.assertEqual(lale_pipeline.edges()[7][1]._impl_class(),
                      KNeighborsClassifierImpl)
     self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Esempio n. 5
0
 def test_import_from_sklearn_pipeline1(self):
     from sklearn.decomposition import PCA as SklearnPCA
     from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
     sklearn_pipeline = sklearn.pipeline.make_pipeline(
         SklearnPCA(n_components=3), SklearnKNN())
     lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
     for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
         sklearn_step_params = sklearn_pipeline.named_steps[
             pipeline_step].get_params()
         lale_sklearn_params = lale_pipeline.steps(
         )[i]._impl._wrapped_model.get_params()
         self.assertEqual(sklearn_step_params, lale_sklearn_params)
     self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Esempio n. 6
0
    def test_import_from_sklearn_pipeline_nested_pipeline2(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.feature_selection import SelectKBest
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN
        from sklearn.pipeline import FeatureUnion

        union = FeatureUnion([
            (
                "selectkbest_pca",
                sklearn.pipeline.make_pipeline(
                    SelectKBest(k=3),
                    sklearn.pipeline.make_pipeline(SelectKBest(k=2),
                                                   SklearnPCA()),
                ),
            ),
            ("nys", SklearnNystroem(n_components=2, random_state=42)),
        ])
        sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN())
        lale_pipeline = typing.cast(
            lale.operators.TrainablePipeline,
            import_from_sklearn_pipeline(sklearn_pipeline),
        )
        self.assertEqual(len(lale_pipeline.edges()), 5)
        from lale.lib.lale.concat_features import ConcatFeaturesImpl
        from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
        from lale.lib.sklearn.nystroem import NystroemImpl
        from lale.lib.sklearn.pca import PCAImpl
        from lale.lib.sklearn.select_k_best import SelectKBestImpl

        self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(),
                         SelectKBestImpl)
        self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(),
                         SelectKBestImpl)
        self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(),
                         SelectKBestImpl)
        self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), PCAImpl)
        self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), PCAImpl)
        self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(),
                         NystroemImpl)
        self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(),
                         ConcatFeaturesImpl)
        self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(),
                         KNeighborsClassifierImpl)

        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Esempio n. 7
0
    def test_import_from_sklearn_pipeline1(self):
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.neighbors import KNeighborsClassifier as SklearnKNN

        sklearn_pipeline = sklearn.pipeline.make_pipeline(
            SklearnPCA(n_components=3), SklearnKNN())
        lale_pipeline = typing.cast(
            lale.operators.TrainablePipeline,
            import_from_sklearn_pipeline(sklearn_pipeline),
        )
        for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
            sklearn_step_params = sklearn_pipeline.named_steps[
                pipeline_step].get_params()
            lale_sklearn_params = self.get_sklearn_params(
                lale_pipeline.steps()[i])
            self.assertEqual(sklearn_step_params, lale_sklearn_params)
        self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Esempio n. 8
0
    def __init__(self):
        input_type = NumericalDataTypesEnum.table
        output_type = NumericalDataTypesEnum.vector

        super().__init__(input_type=input_type, output_type=output_type)
        self.__model = SklearnKNN(n_neighbors=15)
Esempio n. 9
0
    def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_timestamps)
            Training vector.

        y : array-like, shape = (n_samples,)
            Class labels for each data sample.

        Returns
        -------
        self : object

        """
        X, y = check_X_y(X, y)
        self._le = LabelEncoder().fit(y)
        self.classes_ = self._le.classes_

        if self.metric == 'dtw':
            self._clf = SklearnKNN(
                n_neighbors=self.n_neighbors, weights=self.weights,
                algorithm='brute', metric=dtw,
                metric_params=self.metric_params,
                n_jobs=self.n_jobs, **self.kwargs
            )

        elif self.metric == 'dtw_classic':
            self._clf = SklearnKNN(
                n_neighbors=self.n_neighbors, weights=self.weights,
                algorithm='brute', metric=dtw_classic,
                metric_params=self.metric_params,
                n_jobs=self.n_jobs, **self.kwargs
            )

        elif self.metric == 'dtw_sakoechiba':
            n_timestamps = X.shape[1]
            if self.metric_params is None:
                region = sakoe_chiba_band(n_timestamps)
            else:
                if 'window_size' not in self.metric_params.keys():
                    window_size = 0.1
                else:
                    window_size = self.metric_params['window_size']
                region = sakoe_chiba_band(n_timestamps, window_size)
            self._clf = SklearnKNN(
                n_neighbors=self.n_neighbors, weights=self.weights,
                algorithm='brute', metric=dtw_region,
                metric_params={'region': region},
                n_jobs=self.n_jobs, **self.kwargs
            )

        elif self.metric == 'dtw_itakura':
            n_timestamps = X.shape[1]
            if self.metric_params is None:
                region = itakura_parallelogram(n_timestamps)
            else:
                if 'max_slope' not in self.metric_params.keys():
                    max_slope = 2.
                else:
                    max_slope = self.metric_params['max_slope']
                region = itakura_parallelogram(n_timestamps, max_slope)
            self._clf = SklearnKNN(
                n_neighbors=self.n_neighbors, weights=self.weights,
                algorithm='brute', metric=dtw_region,
                metric_params={'region': region},
                n_jobs=self.n_jobs, **self.kwargs
            )

        elif self.metric == 'dtw_multiscale':
            self._clf = SklearnKNN(
                n_neighbors=self.n_neighbors, weights=self.weights,
                algorithm='brute', metric=dtw_multiscale,
                metric_params=self.metric_params,
                n_jobs=self.n_jobs, **self.kwargs
            )

        elif self.metric == 'dtw_fast':
            self._clf = SklearnKNN(
                n_neighbors=self.n_neighbors, weights=self.weights,
                algorithm='brute', metric=dtw_fast,
                metric_params=self.metric_params,
                n_jobs=self.n_jobs, **self.kwargs
            )

        elif self.metric == 'boss':
            self._clf = SklearnKNN(
                n_neighbors=self.n_neighbors, weights=self.weights,
                algorithm='brute', metric=boss,
                metric_params=self.metric_params,
                n_jobs=self.n_jobs, **self.kwargs
            )

        else:
            self._clf = SklearnKNN(
                n_neighbors=self.n_neighbors, weights=self.weights,
                algorithm=self.algorithm, leaf_size=self.leaf_size,
                p=self.p, metric=self.metric, metric_params=self.metric_params,
                n_jobs=self.n_jobs, **self.kwargs
            )

        self._clf.fit(X, y)
        return self