Ejemplo n.º 1
0
 def test_trainable_pipe_left(self):
     from lale.lib.lale import NoOp
     from lale.lib.sklearn import LogisticRegression
     from sklearn.decomposition import PCA
     iris = sklearn.datasets.load_iris()
     pipeline = PCA() >> LogisticRegression(random_state=42)
     pipeline.fit(iris.data, iris.target)
Ejemplo n.º 2
0
    def test_feature_preprocessor(self):
        X_train, y_train = self.X_train, self.y_train
        import importlib

        module_name = ".".join(fproc_name.split(".")[0:-1])
        class_name = fproc_name.split(".")[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        fproc = class_()

        from lale.lib.sklearn.one_hot_encoder import OneHotEncoder

        if isinstance(fproc, OneHotEncoder):  # type: ignore
            # fproc = OneHotEncoder(handle_unknown = 'ignore')
            # remove the hack when this is fixed
            fproc = PCA()
        # test_schemas_are_schemas
        lale.type_checking.validate_is_schema(fproc.input_schema_fit())
        lale.type_checking.validate_is_schema(fproc.input_schema_transform())
        lale.type_checking.validate_is_schema(fproc.output_schema_transform())
        lale.type_checking.validate_is_schema(fproc.hyperparam_schema())

        # test_init_fit_transform
        trained = fproc.fit(self.X_train, self.y_train)
        _ = trained.transform(self.X_test)

        # test_predict_on_trainable
        trained = fproc.fit(X_train, y_train)
        fproc.transform(X_train)

        # test_to_json
        fproc.to_json()

        # test_in_a_pipeline
        # This test assumes that the output of feature processing is compatible with LogisticRegression
        from lale.lib.sklearn import LogisticRegression

        pipeline = fproc >> LogisticRegression()
        trained = pipeline.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)

        # Tune the pipeline with LR using Hyperopt
        from lale.lib.lale import Hyperopt

        hyperopt = Hyperopt(estimator=pipeline,
                            max_evals=1,
                            verbose=True,
                            cv=3)
        trained = hyperopt.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)
Ejemplo n.º 3
0
    def test_planned_pipeline_with_choice(self):
        planned = PCA() >> (LogisticRegression() | KNeighborsClassifier())
        try:
            planned.fit(self.X, self.y)
        except AttributeError as e:
            self.assertEqual(
                e.__str__(),
                """The pipeline is not trainable, which means you can not call fit on it.

Suggested fixes:
Fix [A]: You can make the following changes in the pipeline in order to make it trainable:
[A.1] Please remove the operator choice `|` from `LogisticRegression | KNeighborsClassifier` and keep only one of those operators.

Fix [B]: Alternatively, you could use `auto_configure(X, y, Hyperopt, max_evals=5)` on the pipeline
to use Hyperopt for `max_evals` iterations for hyperparameter tuning. `Hyperopt` can be imported as `from lale.lib.lale import Hyperopt`.""",
            )
Ejemplo n.º 4
0
 def test_fit3(self):
     from lale.lib.sklearn import MinMaxScaler, MLPClassifier, PCA
     pipeline = PCA() >> Batching(
         operator=MinMaxScaler() >> MLPClassifier(random_state=42),
         batch_size=10)
     trained = pipeline.fit(self.X_train, self.y_train)
     predictions = trained.predict(self.X_test)
Ejemplo n.º 5
0
 def test_export_to_sklearn_pipeline_with_noop_3(self):
     from lale.lib.sklearn import PCA, KNeighborsClassifier
     from lale.lib.lale import NoOp
     from sklearn.pipeline import make_pipeline
     # This test is probably unnecessary, but doesn't harm at this point
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier() >> NoOp()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
Ejemplo n.º 6
0
 def test_export_to_sklearn_pipeline_with_noop_2(self):
     from lale.lib.sklearn import PCA, KNeighborsClassifier
     from lale.lib.lale import NoOp
     from sklearn.pipeline import make_pipeline
     lale_pipeline = PCA(n_components=3) >> NoOp() >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Ejemplo n.º 7
0
    def test_feature_preprocessor(self):
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(fproc_name.split('.')[0:-1])
        class_name = fproc_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        fproc = class_()

        from lale.lib.sklearn.one_hot_encoder import OneHotEncoderImpl
        if isinstance(fproc._impl, OneHotEncoderImpl):
            #fproc = OneHotEncoder(handle_unknown = 'ignore')
            #remove the hack when this is fixed
            fproc = PCA()
        #test_schemas_are_schemas
        from lale.helpers import validate_is_schema
        validate_is_schema(fproc.input_schema_fit())
        validate_is_schema(fproc.input_schema_transform())
        validate_is_schema(fproc.output_schema())
        validate_is_schema(fproc.hyperparam_schema())

        #test_init_fit_transform
        trained = fproc.fit(self.X_train, self.y_train)
        predictions = trained.transform(self.X_test)

        #test_predict_on_trainable
        trained = fproc.fit(X_train, y_train)
        fproc.transform(X_train)

        #test_to_json
        fproc.to_json()

        #test_in_a_pipeline
        #This test assumes that the output of feature processing is compatible with LogisticRegression
        from lale.lib.sklearn import LogisticRegression
        pipeline = fproc >> LogisticRegression()
        trained = pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #Tune the pipeline with LR using HyperoptClassifier
        from lale.lib.lale import HyperoptClassifier
        hyperopt = HyperoptClassifier(model=pipeline, max_evals=1)
        trained = hyperopt.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)
Ejemplo n.º 8
0
 def test_pipeline_create_trained(self):
     orig_trainable = PCA() >> LogisticRegression()
     orig_trained = orig_trainable.fit(self.X_train, self.y_train)
     self.assertIsInstance(orig_trained, lale.operators.TrainedPipeline)
     pca_trained, lr_trained = orig_trained.steps()
     pre_trained = lale.lib.sklearn.Pipeline(
         steps=[("pca1", pca_trained), ("lr1", lr_trained)])
     self.assertIsInstance(pre_trained, lale.operators.TrainedIndividualOp)
     predictions = pre_trained.predict(self.X_test)
     accuracy_score(self.y_test, predictions)
Ejemplo n.º 9
0
 def test_export_to_sklearn_pipeline(self):
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
         sklearn_step_params = sklearn_pipeline.named_steps[
             pipeline_step].get_params()
         lale_sklearn_params = self.get_sklearn_params(
             trained_lale_pipeline.steps()[i])
         self.assertEqual(sklearn_step_params, lale_sklearn_params)
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Ejemplo n.º 10
0
 def test_make_choice_with_instance(self):
     from lale.operators import make_union, make_choice, make_pipeline
     from sklearn.datasets import load_iris
     iris = load_iris()
     X, y = iris.data, iris.target
     tfm = PCA() | Nystroem() | NoOp()
     with self.assertRaises(AttributeError):
         trained = tfm.fit(X, y)
     planned_pipeline1 = (OneHotEncoder | NoOp) >> tfm >> (LogisticRegression | KNeighborsClassifier)
     planned_pipeline2 = (OneHotEncoder | NoOp) >> (PCA | Nystroem) >> (LogisticRegression | KNeighborsClassifier)
     planned_pipeline3 = make_choice(OneHotEncoder, NoOp) >> make_choice(PCA, Nystroem) >> make_choice(LogisticRegression, KNeighborsClassifier)
Ejemplo n.º 11
0
    def test_resampler(self):
        from lale.lib.sklearn import PCA, Nystroem, LogisticRegression, RandomForestClassifier
        from lale.lib.lale import NoOp, ConcatFeatures
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(res_name.split('.')[0:-1])
        class_name = res_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        with self.assertRaises(ValueError):
            res = class_()

        #test_schemas_are_schemas
        lale.type_checking.validate_is_schema(class_.input_schema_fit())
        lale.type_checking.validate_is_schema(class_.input_schema_predict())
        lale.type_checking.validate_is_schema(class_.output_schema_predict())
        lale.type_checking.validate_is_schema(class_.hyperparam_schema())

        #test_init_fit_predict
        from lale.operators import make_pipeline
        pipeline1 = PCA() >> class_(operator=make_pipeline(LogisticRegression()))
        trained = pipeline1.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        pipeline2 = class_(operator=make_pipeline(PCA(), LogisticRegression()))
        trained = pipeline2.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        #test_with_hyperopt
        from lale.lib.lale import Hyperopt
        optimizer = Hyperopt(estimator=PCA >> class_(operator=make_pipeline(LogisticRegression())), max_evals = 1, show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline3 = class_(operator= PCA() >> (Nystroem & NoOp) >> ConcatFeatures >> LogisticRegression())
        optimizer = Hyperopt(estimator=pipeline3, max_evals = 1, show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline4 = (PCA >> class_(operator=make_pipeline(Nystroem())) & class_(operator=make_pipeline(Nystroem()))) >> ConcatFeatures >> LogisticRegression()
        optimizer = Hyperopt(estimator=pipeline4, max_evals = 1, scoring='roc_auc', show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        #test_cross_validation
        from lale.helpers import cross_val_score
        cv_results = cross_val_score(pipeline1, X_train, y_train, cv = 2)
        self.assertEqual(len(cv_results), 2)

        #test_to_json
        pipeline1.to_json()
Ejemplo n.º 12
0
 def test_export_to_sklearn_pipeline(self):
     from lale.lib.sklearn import PCA
     from lale.lib.sklearn import KNeighborsClassifier
     from sklearn.pipeline import make_pipeline
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
         sklearn_step_params = sklearn_pipeline.named_steps[
             pipeline_step].get_params()
         lale_sklearn_params = trained_lale_pipeline.steps(
         )[i]._impl._wrapped_model.get_params()
         self.assertEqual(sklearn_step_params, lale_sklearn_params)
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Ejemplo n.º 13
0
    def test_make_choice_with_instance(self):
        from sklearn.datasets import load_iris

        from lale.operators import make_choice

        iris = load_iris()
        X, y = iris.data, iris.target
        tfm = PCA() | Nystroem() | NoOp()
        with self.assertRaises(AttributeError):
            # we are trying to trigger a runtime error here, so we ignore the static warning
            _ = tfm.fit(X, y)  # type: ignore
        _ = (OneHotEncoder | NoOp) >> tfm >> (LogisticRegression
                                              | KNeighborsClassifier)
        _ = ((OneHotEncoder | NoOp) >> (PCA | Nystroem) >>
             (LogisticRegression | KNeighborsClassifier))
        _ = (make_choice(OneHotEncoder, NoOp) >> make_choice(PCA, Nystroem) >>
             make_choice(LogisticRegression, KNeighborsClassifier))
Ejemplo n.º 14
0
    def test_disable_schema_validation_pipeline(self):
        os.environ["LALE_DISABLE_SCHEMA_VALIDATION"]='True'
        from lale.lib.sklearn import PCA, LogisticRegression
        import lale.schemas as schemas

        lr_input = schemas.Object(required=['X', 'y'], X=schemas.AnyOf([
            schemas.Array(
                schemas.Array(
                    schemas.String())),
            schemas.Array(
                schemas.String())]),
            y=schemas.Array(schemas.String()))

        foo = LogisticRegression.customize_schema(input_fit=lr_input)
        abc = foo()
        pipeline = PCA() >> abc
        trained_pipeline = pipeline.fit(self.X_train, self.y_train)
        trained_pipeline.predict(self.X_test)
        os.environ["LALE_DISABLE_SCHEMA_VALIDATION"]='False'
Ejemplo n.º 15
0
    def test_enable_schema_validation_pipeline(self):
        with EnableSchemaValidation():
            import lale.schemas as schemas
            from lale.lib.sklearn import PCA, LogisticRegression

            lr_input = schemas.Object(
                required=["X", "y"],
                X=schemas.AnyOf([
                    schemas.Array(schemas.Array(schemas.String())),
                    schemas.Array(schemas.String()),
                ]),
                y=schemas.Array(schemas.String()),
            )

            foo = LogisticRegression.customize_schema(input_fit=lr_input)
            abc = foo()
            pipeline = PCA() >> abc
            with self.assertRaises(ValueError):
                trained_pipeline = pipeline.fit(self.X_train, self.y_train)
                trained_pipeline.predict(self.X_test)
Ejemplo n.º 16
0
    def test_disable_schema_validation_pipeline(self):
        existing_flag = disable_data_schema_validation
        set_disable_data_schema_validation(True)
        import lale.schemas as schemas
        from lale.lib.sklearn import PCA, LogisticRegression

        lr_input = schemas.Object(
            required=["X", "y"],
            X=schemas.AnyOf([
                schemas.Array(schemas.Array(schemas.String())),
                schemas.Array(schemas.String()),
            ]),
            y=schemas.Array(schemas.String()),
        )

        foo = LogisticRegression.customize_schema(input_fit=lr_input)
        abc = foo()
        pipeline = PCA() >> abc
        trained_pipeline = pipeline.fit(self.X_train, self.y_train)
        trained_pipeline.predict(self.X_test)
        set_disable_data_schema_validation(existing_flag)
Ejemplo n.º 17
0
 def test_export_to_sklearn_pipeline_with_noop_3(self):
     # This test is probably unnecessary, but doesn't harm at this point
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier() >> NoOp()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     _ = trained_lale_pipeline.export_to_sklearn_pipeline()
Ejemplo n.º 18
0
 def test_export_to_sklearn_pipeline_with_noop_2(self):
     lale_pipeline = PCA(n_components=3) >> NoOp() >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Ejemplo n.º 19
0
 def test_PCA(self):
     op = PCA()
     op.fit(self.X, [])