def test_import_from_sklearn_pipeline_feature_union(self): from sklearn.pipeline import FeatureUnion from sklearn.decomposition import PCA from sklearn.kernel_approximation import Nystroem from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline union = FeatureUnion([("pca", PCA(n_components=1)), ("nys", Nystroem(n_components=2, random_state=42))]) sklearn_pipeline = make_pipeline(union, KNeighborsClassifier()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 3) from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_nested_pipeline(self): from sklearn.pipeline import FeatureUnion, make_pipeline from sklearn.decomposition import PCA from sklearn.kernel_approximation import Nystroem from sklearn.feature_selection import SelectKBest from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline union = FeatureUnion([("selectkbest_pca", make_pipeline(SelectKBest(k=3), PCA(n_components=1))), ("nys", Nystroem(n_components=2, random_state=42))]) sklearn_pipeline = make_pipeline(union, KNeighborsClassifier()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 4) from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl #These assertions assume topological sort self.assertIsInstance(lale_pipeline.edges()[0][0]._impl, SelectKBest) self.assertIsInstance(lale_pipeline.edges()[0][1]._impl, PCAImpl) self.assertIsInstance(lale_pipeline.edges()[1][0]._impl, PCAImpl) self.assertIsInstance(lale_pipeline.edges()[1][1]._impl, ConcatFeaturesImpl) self.assertIsInstance(lale_pipeline.edges()[2][0]._impl, NystroemImpl) self.assertIsInstance(lale_pipeline.edges()[2][1]._impl, ConcatFeaturesImpl) self.assertIsInstance(lale_pipeline.edges()[3][0]._impl, ConcatFeaturesImpl) self.assertIsInstance(lale_pipeline.edges()[3][1]._impl, KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_feature_union(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion([ ("pca", SklearnPCA(n_components=1)), ("nys", SklearnNystroem(n_components=2, random_state=42)), ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 3) from lale.lib.lale.concat_features import ConcatFeatures from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier from lale.lib.sklearn.nystroem import Nystroem from lale.lib.sklearn.pca import PCA self.assertIsInstance(lale_pipeline.edges()[0][0], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[0][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][1], KNeighborsClassifier) # type: ignore self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_noop1(self): from sklearn.pipeline import Pipeline from sklearn.ensemble import GradientBoostingClassifier from lale.helpers import import_from_sklearn_pipeline pipe = Pipeline([('noop', NoOp()), ('gbc', GradientBoostingClassifier())]) imported_pipeline = import_from_sklearn_pipeline(pipe)
def test_import_from_sklearn_pipeline_nested_pipeline1(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.feature_selection import SelectKBest from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion( [ ( "selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), FeatureUnion( [ ("pca", SklearnPCA(n_components=1)), ( "nested_pipeline", sklearn.pipeline.make_pipeline( SelectKBest(k=2), SklearnNystroem() ), ), ] ), ), ), ("nys", SklearnNystroem(n_components=2, random_state=42)), ] ) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 8) # These assertions assume topological sort, which may not be unique. So the assertions are brittle. from lale.lib.lale.concat_features import ConcatFeatures from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifier from lale.lib.sklearn.nystroem import Nystroem from lale.lib.sklearn.pca import PCA from lale.lib.sklearn.select_k_best import SelectKBest self.assertIsInstance(lale_pipeline.edges()[0][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[0][1], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[1][1], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][0], SelectKBest) # type: ignore self.assertIsInstance(lale_pipeline.edges()[2][1], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[3][0], PCA) # type: ignore self.assertIsInstance(lale_pipeline.edges()[3][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[4][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[4][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[5][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[5][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[6][0], Nystroem) # type: ignore self.assertIsInstance(lale_pipeline.edges()[6][1], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[7][0], ConcatFeatures) # type: ignore self.assertIsInstance(lale_pipeline.edges()[7][1], KNeighborsClassifier) # type: ignore self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_noop(self): from sklearn.pipeline import Pipeline from sklearn.ensemble import GradientBoostingClassifier from lale.helpers import import_from_sklearn_pipeline pipe = Pipeline([('noop', None), ('gbc', GradientBoostingClassifier())]) with self.assertRaises(ValueError): imported_pipeline = import_from_sklearn_pipeline(pipe)
def test_import_from_sklearn_pipeline_nested_pipeline1(self): from sklearn.pipeline import FeatureUnion from sklearn.decomposition import PCA as SklearnPCA from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.feature_selection import SelectKBest from sklearn.neighbors import KNeighborsClassifier as SklearnKNN union = FeatureUnion([ ("selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), FeatureUnion([ ('pca', SklearnPCA(n_components=1)), ('nested_pipeline', sklearn.pipeline.make_pipeline(SelectKBest(k=2), SklearnNystroem())) ]))), ("nys", SklearnNystroem(n_components=2, random_state=42)) ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) self.assertEqual(len(lale_pipeline.edges()), 8) #These assertions assume topological sort, which may not be unique. So the assertions are brittle. from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl from lale.lib.sklearn.select_k_best import SelectKBestImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[5][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[5][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[6][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[6][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[7][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[7][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline2(self): from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn import svm from sklearn.pipeline import Pipeline anova_filter = SelectKBest(f_regression, k=3) clf = svm.SVC(kernel='linear') sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)]) sklearn_pipeline.fit(self.X_train, self.y_train) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) lale_pipeline.predict(self.X_test)
def test_import_from_sklearn_pipeline2(self): from sklearn.feature_selection import SelectKBest, f_regression from sklearn.pipeline import Pipeline from sklearn.svm import SVC as SklearnSVC anova_filter = SelectKBest(f_regression, k=3) clf = SklearnSVC(kernel="linear") sklearn_pipeline = Pipeline([("anova", anova_filter), ("svc", clf)]) sklearn_pipeline.fit(self.X_train, self.y_train) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) lale_pipeline.predict(self.X_test)
def test_import_from_sklearn(self): pipeline_str = """from lale.lib.autoai_libs import NumpyColumnSelector from lale.lib.autoai_libs import CompressStrings from lale.lib.autoai_libs import NumpyReplaceMissingValues from lale.lib.autoai_libs import NumpyReplaceUnknownValues from lale.lib.autoai_libs import boolean2float from lale.lib.autoai_libs import CatImputer from lale.lib.autoai_libs import CatEncoder import numpy as np from lale.lib.autoai_libs import float32_transform from lale.operators import make_pipeline from lale.lib.autoai_libs import FloatStr2Float from lale.lib.autoai_libs import NumImputer from lale.lib.autoai_libs import OptStandardScaler from lale.operators import make_union from lale.lib.autoai_libs import NumpyPermuteArray from lale.lib.autoai_libs import TA1 import autoai_libs.utils.fc_methods from lale.lib.autoai_libs import FS1 from xgboost import XGBRegressor numpy_column_selector_0 = NumpyColumnSelector(columns=[1]) compress_strings = CompressStrings(compress_type='hash', dtypes_list=['int_num'], missing_values_reference_list=['', '-', '?', float('nan')], misslist_list=[[]]) numpy_replace_missing_values_0 = NumpyReplaceMissingValues(filling_values=float('nan'), missing_values=[]) numpy_replace_unknown_values = NumpyReplaceUnknownValues(filling_values=float('nan'), filling_values_list=[float('nan')], known_values_list=[[36, 45, 56, 67, 68, 75, 78, 89]], missing_values_reference_list=['', '-', '?', float('nan')]) cat_imputer = CatImputer(missing_values=float('nan'), sklearn_version_family='20', strategy='most_frequent') cat_encoder = CatEncoder(dtype=np.float64, handle_unknown='error', sklearn_version_family='20') pipeline_0 = make_pipeline(numpy_column_selector_0, compress_strings, numpy_replace_missing_values_0, numpy_replace_unknown_values, boolean2float(), cat_imputer, cat_encoder, float32_transform()) numpy_column_selector_1 = NumpyColumnSelector(columns=[0]) float_str2_float = FloatStr2Float(dtypes_list=['int_num'], missing_values_reference_list=[]) numpy_replace_missing_values_1 = NumpyReplaceMissingValues(filling_values=float('nan'), missing_values=[]) num_imputer = NumImputer(missing_values=float('nan'), strategy='median') opt_standard_scaler = OptStandardScaler(num_scaler_copy=None, num_scaler_with_mean=None, num_scaler_with_std=None, use_scaler_flag=False) pipeline_1 = make_pipeline(numpy_column_selector_1, float_str2_float, numpy_replace_missing_values_1, num_imputer, opt_standard_scaler, float32_transform()) union = make_union(pipeline_0, pipeline_1) numpy_permute_array = NumpyPermuteArray(axis=0, permutation_indices=[1, 0]) ta1_0 = TA1(fun=np.tan, name='tan', datatypes=['float'], feat_constraints=[autoai_libs.utils.fc_methods.is_not_categorical], col_names=['age', 'weight'], col_dtypes=[np.dtype('float32'), np.dtype('float32')]) fs1_0 = FS1(cols_ids_must_keep=range(0, 2), additional_col_count_to_keep=4, ptype='regression') ta1_1 = TA1(fun=np.square, name='square', datatypes=['numeric'], feat_constraints=[autoai_libs.utils.fc_methods.is_not_categorical], col_names=['age', 'weight', 'tan(age)'], col_dtypes=[np.dtype('float32'), np.dtype('float32'), np.dtype('float32')]) fs1_1 = FS1(cols_ids_must_keep=range(0, 2), additional_col_count_to_keep=4, ptype='regression') ta1_2 = TA1(fun=np.sin, name='sin', datatypes=['float'], feat_constraints=[autoai_libs.utils.fc_methods.is_not_categorical], col_names=['age', 'weight', 'tan(age)', 'square(age)', 'square(tan(age))'], col_dtypes=[np.dtype('float32'), np.dtype('float32'), np.dtype('float32'), np.dtype('float32'), np.dtype('float32')]) fs1_2 = FS1(cols_ids_must_keep=range(0, 2), additional_col_count_to_keep=4, ptype='regression') xgb_regressor = XGBRegressor(missing=float('nan'), n_jobs=4, random_state=33, silent=True, verbosity=0) pipeline = make_pipeline(union, numpy_permute_array, ta1_0, fs1_0, ta1_1, fs1_1, ta1_2, fs1_2, xgb_regressor) """ globals2 = {} exec(pipeline_str, globals2) pipeline2 = globals2["pipeline"] sklearn_pipeline = pipeline2.export_to_sklearn_pipeline() from lale import helpers new_pipeline = helpers.import_from_sklearn_pipeline(sklearn_pipeline)
def test_import_from_sklearn_pipeline1(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.neighbors import KNeighborsClassifier as SklearnKNN sklearn_pipeline = sklearn.pipeline.make_pipeline( SklearnPCA(n_components=3), SklearnKNN()) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[ pipeline_step].get_params() lale_sklearn_params = lale_pipeline.steps( )[i]._impl._wrapped_model.get_params() self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline3(self): from sklearn.feature_selection import SelectKBest, f_regression from sklearn.pipeline import Pipeline from sklearn.svm import SVC as SklearnSVC anova_filter = SelectKBest(f_regression, k=3) clf = SklearnSVC(kernel="linear") sklearn_pipeline = Pipeline([("anova", anova_filter), ("svc", clf)]) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline, fitted=False) with self.assertRaises( ValueError ): # fitted=False returns a Trainable, so calling predict is invalid. lale_pipeline.predict(self.X_test)
def test_import_from_sklearn_pipeline_nested_pipeline2(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.feature_selection import SelectKBest from sklearn.kernel_approximation import Nystroem as SklearnNystroem from sklearn.neighbors import KNeighborsClassifier as SklearnKNN from sklearn.pipeline import FeatureUnion union = FeatureUnion([ ( "selectkbest_pca", sklearn.pipeline.make_pipeline( SelectKBest(k=3), sklearn.pipeline.make_pipeline(SelectKBest(k=2), SklearnPCA()), ), ), ("nys", SklearnNystroem(n_components=2, random_state=42)), ]) sklearn_pipeline = sklearn.pipeline.make_pipeline(union, SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) self.assertEqual(len(lale_pipeline.edges()), 5) from lale.lib.lale.concat_features import ConcatFeaturesImpl from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl from lale.lib.sklearn.nystroem import NystroemImpl from lale.lib.sklearn.pca import PCAImpl from lale.lib.sklearn.select_k_best import SelectKBestImpl self.assertEqual(lale_pipeline.edges()[0][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[0][1]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][0]._impl_class(), SelectKBestImpl) self.assertEqual(lale_pipeline.edges()[1][1]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[2][0]._impl_class(), PCAImpl) self.assertEqual(lale_pipeline.edges()[2][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[3][0]._impl_class(), NystroemImpl) self.assertEqual(lale_pipeline.edges()[3][1]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][0]._impl_class(), ConcatFeaturesImpl) self.assertEqual(lale_pipeline.edges()[4][1]._impl_class(), KNeighborsClassifierImpl) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline3(self): from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn import svm from sklearn.pipeline import Pipeline anova_filter = SelectKBest(f_regression, k=3) clf = svm.SVC(kernel='linear') sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)]) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline, fitted=False) with self.assertRaises( ValueError ): #fitted=False returns a Trainable, so calling predict is invalid. lale_pipeline.predict(self.X_test)
def test_import_export_trainable(self): from sklearn.exceptions import NotFittedError from sklearn.pipeline import Pipeline from lale.helpers import import_from_sklearn_pipeline pipeline = self.create_pipeline() self.assertEquals(isinstance(pipeline, Pipeline), True) pipeline.fit(self.X_train, self.y_train) lale_pipeline = import_from_sklearn_pipeline(pipeline, fitted=False) with self.assertRaises(ValueError): lale_pipeline.predict(self.X_test) sklearn_pipeline = lale_pipeline.export_to_sklearn_pipeline() with self.assertRaises(NotFittedError): sklearn_pipeline.predict(self.X_test)
def test_import_from_sklearn_pipeline(self): from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression from sklearn import svm from sklearn.pipeline import Pipeline anova_filter = SelectKBest(f_regression, k=3) clf = svm.SVC(kernel='linear') sklearn_pipeline = Pipeline([('anova', anova_filter), ('svc', clf)]) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[ pipeline_step].get_params() lale_sklearn_params = lale_pipeline.steps( )[i]._impl._wrapped_model.get_params() self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline(self): from sklearn.feature_selection import SelectKBest, f_regression from sklearn.pipeline import Pipeline from sklearn.svm import SVC as SklearnSVC anova_filter = SelectKBest(f_regression, k=3) clf = SklearnSVC(kernel="linear") sklearn_pipeline = Pipeline([("anova", anova_filter), ("svc", clf)]) lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[ pipeline_step].get_params() lale_sklearn_params = lale_pipeline.steps( )[i]._impl._wrapped_model.get_params() self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline1(self): from sklearn.decomposition import PCA as SklearnPCA from sklearn.neighbors import KNeighborsClassifier as SklearnKNN sklearn_pipeline = sklearn.pipeline.make_pipeline( SklearnPCA(n_components=3), SklearnKNN()) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[ pipeline_step].get_params() lale_sklearn_params = self.get_sklearn_params( lale_pipeline.steps()[i]) self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_export_trained(self): import numpy as np from sklearn.pipeline import Pipeline from lale.helpers import import_from_sklearn_pipeline pipeline = self.create_pipeline() self.assertEquals(isinstance(pipeline, Pipeline), True) pipeline.fit(self.X_train, self.y_train) predictions_before = pipeline.predict(self.X_test) lale_pipeline = import_from_sklearn_pipeline(pipeline) predictions_after = lale_pipeline.predict(self.X_test) sklearn_pipeline = lale_pipeline.export_to_sklearn_pipeline() predictions_after_1 = sklearn_pipeline.predict(self.X_test) self.assertEquals(np.all(predictions_before == predictions_after), True) self.assertEquals(np.all(predictions_before == predictions_after_1), True)
def test_import_from_sklearn_pipeline(self): from sklearn.feature_selection import SelectKBest, f_regression from sklearn.pipeline import Pipeline from sklearn.svm import SVC as SklearnSVC anova_filter = SelectKBest(f_regression, k=3) clf = SklearnSVC(kernel="linear") sklearn_pipeline = Pipeline([("anova", anova_filter), ("svc", clf)]) lale_pipeline = typing.cast( lale.operators.TrainablePipeline, import_from_sklearn_pipeline(sklearn_pipeline), ) for i, pipeline_step in enumerate(sklearn_pipeline.named_steps): sklearn_step_params = sklearn_pipeline.named_steps[ pipeline_step].get_params() lale_sklearn_params = self.get_sklearn_params( lale_pipeline.steps_list()[i]) self.assertEqual(sklearn_step_params, lale_sklearn_params) self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
def test_import_from_sklearn_pipeline_no_wrapper(self): from sklearn.neighbors import LocalOutlierFactor from sklearn.pipeline import make_pipeline sklearn_pipeline = make_pipeline(PCA(), LocalOutlierFactor()) _ = import_from_sklearn_pipeline(sklearn_pipeline, fitted=False)