def test_import_from_sklearn_pipeline_feature_union(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion([ ("pca", SklearnPCA(n_components=1)), ("nys", SklearnNystroem(n_components=2, random_state=42)), ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 3) from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.sklearn.pca import PCAImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_feature_union(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion([ ("pca", SklearnPCA(n_components=1)), ("nys", SklearnNystroem(n_components=2, random_state=42)), ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 3) from lale.lib.lale.concat_features import ConcatFeatures from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier from lale.lib.sklearn.nystroem import Nystroem from lale.lib.sklearn.pca import PCA self.assertIsInstance(lale_pipeline.edges()[0][0], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[0][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][1], KNeighborsClassifier) # type: ignore self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_nested_pipeline1(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.feature_selection import SelectKBest from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion( [ ( "selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), FeatureUnion( [ ("pca", SklearnPCA(n_components=1)), ( "nested_pipeline", sklearn.pipeline.make_pipeline( SelectKBest(k=2), SklearnNystroem() ), ), ] ), ), ), ("nys", SklearnNystroem(n_components=2, random_state=42)), ] ) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 8) # These assertions assume topological sort, which may not be unique. So the assertions are brittle. from lale.lib.lale.concat_features import ConcatFeatures from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier from lale.lib.sklearn.nystroem import Nystroem from lale.lib.sklearn.pca import PCA from lale.lib.sklearn.select_k_best import SelectKBest self.assertIsInstance(lale_pipeline.edges()[0][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[0][1], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][1], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][1], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[3][0], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[3][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[4][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[4][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[5][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[5][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[6][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[6][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[7][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[7][1], KNeighborsClassifier) # type: ignore self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_nested_pipeline1(self): from sklearn.pipeline import FeatureUnion from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.feature_selection import SelectKBest from sklearn.neighbors import KNeighborsClassifier as SklearnKNN union = FeatureUnion([ ("selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), FeatureUnion([ ('pca', SklearnPCA(n_components=1)), ('nested_pipeline', sklearn.pipeline.make_pipeline(SelectKBest(k=2), SklearnNystroem())) ]))), ("nys", SklearnNystroem(n_components=2, random_state=42)) ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 8) #These assertions assume topological sort, which may not be unique. So the assertions are brittle. from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl from lale.lib.sklearn.select_k_best import SelectKBestImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[5][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[5][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[6][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[6][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[7][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[7][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline1(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.neighbors import KNeighborsClassifier as SklearnKNN sklearn_pipeline = sklearn.pipeline.make_pipeline( SklearnPCA(n_components=3), SklearnKNN()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[ pipeline_step].get_params() lale_sklearn_params = lale_pipeline.steps( )[i]._impl._wrapped_model.get_params() self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_nested_pipeline2(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.feature_selection import SelectKBest from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion([ ( "selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), sklearn.pipeline.make_pipeline(SelectKBest(k=2), SklearnPCA()), ), ), ("nys", SklearnNystroem(n_components=2, random_state=42)), ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 5) from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.select_k_best import SelectKBestImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline1(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.neighbors import KNeighborsClassifier as SklearnKNN sklearn_pipeline = sklearn.pipeline.make_pipeline( SklearnPCA(n_components=3), SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[ pipeline_step].get_params() lale_sklearn_params = self.get_sklearn_params( lale_pipeline.steps()[i]) self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def __init__(self): input_type = NumericalDataTypesEnum.table output_type = NumericalDataTypesEnum.vector super().__init__(input_type=input_type, output_type=output_type) self.__model = SklearnKNN(n_neighbors=15)
def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : array-like, shape = (n_samples, n_timestamps) Training vector. y : array-like, shape = (n_samples,) Class labels for each data sample. Returns ------- self : object """ X, y = check_X_y(X, y) self._le = LabelEncoder().fit(y) self.classes_ = self._le.classes_ if self.metric == 'dtw': self._clf = SklearnKNN( n_neighbors=self.n_neighbors, weights=self.weights, algorithm='brute', metric=dtw, metric_params=self.metric_params, n_jobs=self.n_jobs, **self.kwargs ) elif self.metric == 'dtw_classic': self._clf = SklearnKNN( n_neighbors=self.n_neighbors, weights=self.weights, algorithm='brute', metric=dtw_classic, metric_params=self.metric_params, n_jobs=self.n_jobs, **self.kwargs ) elif self.metric == 'dtw_sakoechiba': n_timestamps = X.shape[1] if self.metric_params is None: region = sakoe_chiba_band(n_timestamps) else: if 'window_size' not in self.metric_params.keys(): window_size = 0.1 else: window_size = self.metric_params['window_size'] region = sakoe_chiba_band(n_timestamps, window_size) self._clf = SklearnKNN( n_neighbors=self.n_neighbors, weights=self.weights, algorithm='brute', metric=dtw_region, metric_params={'region': region}, n_jobs=self.n_jobs, **self.kwargs ) elif self.metric == 'dtw_itakura': n_timestamps = X.shape[1] if self.metric_params is None: region = itakura_parallelogram(n_timestamps) else: if 'max_slope' not in self.metric_params.keys(): max_slope = 2. else: max_slope = self.metric_params['max_slope'] region = itakura_parallelogram(n_timestamps, max_slope) self._clf = SklearnKNN( n_neighbors=self.n_neighbors, weights=self.weights, algorithm='brute', metric=dtw_region, metric_params={'region': region}, n_jobs=self.n_jobs, **self.kwargs ) elif self.metric == 'dtw_multiscale': self._clf = SklearnKNN( n_neighbors=self.n_neighbors, weights=self.weights, algorithm='brute', metric=dtw_multiscale, metric_params=self.metric_params, n_jobs=self.n_jobs, **self.kwargs ) elif self.metric == 'dtw_fast': self._clf = SklearnKNN( n_neighbors=self.n_neighbors, weights=self.weights, algorithm='brute', metric=dtw_fast, metric_params=self.metric_params, n_jobs=self.n_jobs, **self.kwargs ) elif self.metric == 'boss': self._clf = SklearnKNN( n_neighbors=self.n_neighbors, weights=self.weights, algorithm='brute', metric=boss, metric_params=self.metric_params, n_jobs=self.n_jobs, **self.kwargs ) else: self._clf = SklearnKNN( n_neighbors=self.n_neighbors, weights=self.weights, algorithm=self.algorithm, leaf_size=self.leaf_size, p=self.p, metric=self.metric, metric_params=self.metric_params, n_jobs=self.n_jobs, **self.kwargs ) self._clf.fit(X, y) return self