def test_import_from_sklearn_pipeline_nested_pipeline1(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.feature_selection import SelectKBest from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion( [ ( "selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), FeatureUnion( [ ("pca", SklearnPCA(n_components=1)), ( "nested_pipeline", sklearn.pipeline.make_pipeline( SelectKBest(k=2), SklearnNystroem() ), ), ] ), ), ), ("nys", SklearnNystroem(n_components=2, random_state=42)), ] ) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 8) # These assertions assume topological sort, which may not be unique. So the assertions are brittle. from lale.lib.lale.concat_features import ConcatFeatures from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier from lale.lib.sklearn.nystroem import Nystroem from lale.lib.sklearn.pca import PCA from lale.lib.sklearn.select_k_best import SelectKBest self.assertIsInstance(lale_pipeline.edges()[0][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[0][1], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][1], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][1], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[3][0], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[3][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[4][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[4][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[5][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[5][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[6][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[6][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[7][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[7][1], KNeighborsClassifier) # type: ignore self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_nested_pipeline1(self): from sklearn.pipeline import FeatureUnion from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.feature_selection import SelectKBest from sklearn.neighbors import KNeighborsClassifier as SklearnKNN union = FeatureUnion([ ("selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), FeatureUnion([ ('pca', SklearnPCA(n_components=1)), ('nested_pipeline', sklearn.pipeline.make_pipeline(SelectKBest(k=2), SklearnNystroem())) ]))), ("nys", SklearnNystroem(n_components=2, random_state=42)) ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 8) #These assertions assume topological sort, which may not be unique. So the assertions are brittle. from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl from lale.lib.sklearn.select_k_best import SelectKBestImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[5][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[5][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[6][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[6][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[7][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[7][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_feature_union(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion([ ("pca", SklearnPCA(n_components=1)), ("nys", SklearnNystroem(n_components=2, random_state=42)), ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 3) from lale.lib.lale.concat_features import ConcatFeatures from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier from lale.lib.sklearn.nystroem import Nystroem from lale.lib.sklearn.pca import PCA self.assertIsInstance(lale_pipeline.edges()[0][0], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[0][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][1], KNeighborsClassifier) # type: ignore self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_feature_union(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion([ ("pca", SklearnPCA(n_components=1)), ("nys", SklearnNystroem(n_components=2, random_state=42)), ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 3) from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.sklearn.pca import PCAImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_comparison_with_scikit(self): import warnings warnings.filterwarnings("ignore") from lale.lib.sklearn import PCA import sklearn.datasets from lale.helpers import cross_val_score pca = PCA(n_components=3, random_state=42, svd_solver='arpack') nys = Nystroem(n_components=10, random_state=42) concat = ConcatFeatures() lr = LogisticRegression(random_state=42, C=0.1) trainable = (pca & nys) >> concat >> lr digits = sklearn.datasets.load_digits() X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42) cv_results = cross_val_score(trainable, X, y) cv_results = ['{0:.1%}'.format(score) for score in cv_results] from sklearn.pipeline import make_pipeline, FeatureUnion from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.linear_model import LogisticRegression as SklearnLR from sklearn.model_selection import cross_val_score union = FeatureUnion([("pca", SklearnPCA(n_components=3, random_state=42, svd_solver='arpack')), ("nys", SklearnNystroem(n_components=10, random_state=42))]) lr = SklearnLR(random_state=42, C=0.1) pipeline = make_pipeline(union, lr) scikit_cv_results = cross_val_score(pipeline, X, y, cv = 5) scikit_cv_results = ['{0:.1%}'.format(score) for score in scikit_cv_results] self.assertEqual(cv_results, scikit_cv_results) warnings.resetwarnings()
def test_import_from_sklearn_pipeline_nested_pipeline2(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.feature_selection import SelectKBest from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion([ ( "selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), sklearn.pipeline.make_pipeline(SelectKBest(k=2), SklearnPCA()), ), ), ("nys", SklearnNystroem(n_components=2, random_state=42)), ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 5) from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.select_k_best import SelectKBestImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)