Beispiel #1
0
    def test_empty_schema(self):
        pca = PCA().customize_schema(whiten=schemas.Schema())
        plan = ((pca & (MinMaxScaler | Normalizer)) >> ConcatFeatures() >>
                (MinMaxScaler | Normalizer) >>
                (LogisticRegression | KNeighborsClassifier))
        from lale.search.schema2search_space import OperatorSchemaError

        with self.assertRaises(OperatorSchemaError):
            run_hyperopt_on_planned_pipeline(plan)
Beispiel #2
0
 def test_remove_last5(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & PassiveAggressiveClassifier())
         >> ConcatFeatures()
         >> NoOp()
         >> PassiveAggressiveClassifier()
     )
     pipeline.remove_last(inplace=True).freeze_trainable()
Beispiel #3
0
    def test_no_max_schema(self):
        pca = PCA().customize_schema(n_components=schemas.Float(min=0.0))
        plan = ((pca & (MinMaxScaler | Normalizer)) >> ConcatFeatures() >>
                (MinMaxScaler | Normalizer) >>
                (LogisticRegression | KNeighborsClassifier))
        from lale.search.search_space import SearchSpaceError

        with self.assertRaises(SearchSpaceError):
            run_hyperopt_on_planned_pipeline(plan)
Beispiel #4
0
 def test_two_estimators_predict_proba(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & LogisticRegression())
         >> ConcatFeatures()
         >> NoOp()
         >> LogisticRegression()
     )
     trained = pipeline.fit(self.X_train, self.y_train)
     trained.predict_proba(self.X_test)
Beispiel #5
0
 def test_remove_last2(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & PassiveAggressiveClassifier())
         >> ConcatFeatures()
         >> NoOp()
         >> (PassiveAggressiveClassifier() & LogisticRegression())
     )
     with self.assertRaises(ValueError):
         pipeline.remove_last()
Beispiel #6
0
 def test_two_estimators_predict_proba1(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & GaussianNB())
         >> ConcatFeatures()
         >> NoOp()
         >> GaussianNB()
     )
     pipeline.fit(self.X_train, self.y_train)
     pipeline.predict_proba(self.X_test)
Beispiel #7
0
 def do1DTest(self, trainable, train_X, train_y, test_X, test_y):
     #Test for 1-D array as input to the transformers
     train_X = train_X[:,0]
     test_X = test_X[:,0]
     trainable_pipeline = (trainable & NoOp()) >> ConcatFeatures() >> float32_transform() >> LR()
     trained_pipeline = trainable_pipeline.fit(train_X, train_y)
     trained_pipeline.predict(test_X)
     hyperopt = Hyperopt(estimator=trainable_pipeline, max_evals=1)
     trained_hyperopt = hyperopt.fit(train_X, train_y)
     trained_hyperopt.predict(test_X)
Beispiel #8
0
 def test_two_estimators_predict1(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & PassiveAggressiveClassifier())
         >> ConcatFeatures()
         >> NoOp()
         >> PassiveAggressiveClassifier()
     )
     trained = pipeline.fit(self.X_train, self.y_train)
     trained.predict(self.X_test)
Beispiel #9
0
    def test_export_to_sklearn_pipeline2(self):
        from lale.lib.lale import ConcatFeatures
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import KNeighborsClassifier
        from sklearn.feature_selection import SelectKBest
        from lale.lib.sklearn import Nystroem
        from sklearn.pipeline import FeatureUnion

        lale_pipeline = (
            ((PCA() & SelectKBest(k=3)) >> ConcatFeatures())
            & Nystroem()) >> ConcatFeatures() >> KNeighborsClassifier()
        trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
        sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
        self.assertIsInstance(sklearn_pipeline.named_steps['featureunion'],
                              FeatureUnion)
        from sklearn.neighbors import KNeighborsClassifier
        self.assertIsInstance(
            sklearn_pipeline.named_steps['kneighborsclassifier'],
            KNeighborsClassifier)
        self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Beispiel #10
0
 def test_remove_last4(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & PassiveAggressiveClassifier())
         >> ConcatFeatures()
         >> NoOp()
         >> PassiveAggressiveClassifier()
     )
     new_pipeline = pipeline.remove_last(inplace=True)
     self.assertEqual(len(new_pipeline._steps), 6)
     self.assertEqual(len(pipeline._steps), 6)
Beispiel #11
0
    def test_concat_with_hyperopt(self):
        from lale.lib.lale import Hyperopt
        pca = PCA(n_components=3)
        nys = Nystroem(n_components=10)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)

        trainable = (pca & nys) >> concat >> lr
        clf = Hyperopt(estimator=trainable, max_evals=2)
        from sklearn.datasets import load_iris
        iris_data = load_iris()
        clf.fit(iris_data.data, iris_data.target)
        clf.predict(iris_data.data)
Beispiel #12
0
    def test_init_fit_predict(self):
        trainable_cf = ConcatFeatures()
        A = [[11, 12, 13], [21, 22, 23], [31, 32, 33]]
        B = [[14, 15], [24, 25], [34, 35]]

        trained_cf = trainable_cf.fit(X=[A, B])
        transformed = trained_cf.transform([A, B])
        expected = [[11, 12, 13, 14, 15], [21, 22, 23, 24, 25],
                    [31, 32, 33, 34, 35]]
        for i_sample in range(len(transformed)):
            for i_feature in range(len(transformed[i_sample])):
                self.assertEqual(transformed[i_sample][i_feature],
                                 expected[i_sample][i_feature])
Beispiel #13
0
    def test_with_pandas(self):
        from lale.datasets import load_iris_df
        import warnings
        warnings.filterwarnings("ignore")
        pca = PCA(n_components=3)
        nys = Nystroem(n_components=10)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        trainable = (pca & nys) >> concat >> lr

        (X_train, y_train), (X_test, y_test) = load_iris_df()
        trained = trainable.fit(X_train, y_train)
        predicted = trained.predict(X_test)
    def test_comparison_with_scikit(self):
        import warnings

        warnings.filterwarnings("ignore")
        import sklearn.datasets
        import sklearn.utils

        from lale.helpers import cross_val_score
        from lale.lib.sklearn import PCA

        pca = PCA(n_components=3, random_state=42, svd_solver="arpack")
        nys = Nystroem(n_components=10, random_state=42)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        trainable = (pca & nys) >> concat >> lr
        digits = sklearn.datasets.load_digits()
        X, y = sklearn.utils.shuffle(digits.data,
                                     digits.target,
                                     random_state=42)

        cv_results = cross_val_score(trainable, X, y)
        cv_results = ["{0:.1%}".format(score) for score in cv_results]

        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.linear_model import LogisticRegression as SklearnLR
        from sklearn.model_selection import cross_val_score
        from sklearn.pipeline import FeatureUnion, make_pipeline

        union = FeatureUnion([
            (
                "pca",
                SklearnPCA(n_components=3,
                           random_state=42,
                           svd_solver="arpack"),
            ),
            ("nys", SklearnNystroem(n_components=10, random_state=42)),
        ])
        lr = SklearnLR(random_state=42, C=0.1)
        pipeline = make_pipeline(union, lr)

        scikit_cv_results = cross_val_score(pipeline, X, y, cv=5)
        scikit_cv_results = [
            "{0:.1%}".format(score) for score in scikit_cv_results
        ]
        self.assertEqual(cv_results, scikit_cv_results)
        warnings.resetwarnings()
Beispiel #15
0
 def test_init_fit_predict_pandas_series(self):
     trainable_cf = ConcatFeatures()
     A = [[11, 12, 13], [21, 22, 23], [31, 32, 33]]
     B = [14, 24, 34]
     A = pd.DataFrame(A, columns=["a", "b", "c"])
     B = pd.Series(B, name="d")
     A = add_table_name(A, "A")
     B = add_table_name(B, "B")
     trained_cf = trainable_cf.fit(X=[A, B])
     transformed = trained_cf.transform([A, B])
     expected = [
         [11, 12, 13, 14],
         [21, 22, 23, 24],
         [31, 32, 33, 34],
     ]
     expected = pd.DataFrame(expected, columns=["a", "b", "c", "d"])
     for c in expected.columns:
         self.assertEqual(list(transformed[c]), list(expected[c]))
Beispiel #16
0
    def test_export_to_sklearn_pipeline3(self):
        from sklearn.feature_selection import SelectKBest
        from sklearn.pipeline import FeatureUnion

        lale_pipeline = (((PCA() >> SelectKBest(k=2))
                          & (Nystroem(random_state=42) >> SelectKBest(k=3))
                          & (SelectKBest(k=3))) >> ConcatFeatures() >>
                         SelectKBest(k=2) >> LogisticRegression())
        trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
        sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
        self.assertIsInstance(sklearn_pipeline.named_steps["featureunion"],
                              FeatureUnion)
        self.assertIsInstance(sklearn_pipeline.named_steps["selectkbest"],
                              SelectKBest)
        from sklearn.linear_model import LogisticRegression as SklearnLR

        self.assertIsInstance(
            sklearn_pipeline.named_steps["logisticregression"], SklearnLR)
        self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Beispiel #17
0
    def test_with_concat_features1(self):
        import warnings
        warnings.filterwarnings("ignore")

        from sklearn.datasets import load_iris
        from lale.lib.lale import Hyperopt
        from sklearn.metrics import accuracy_score
        data = load_iris()
        X, y = data.data, data.target
        pca = PCA(n_components=3)
        nys = Nystroem(n_components=10)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        pipeline = ((pca & nys) >> concat >> lr) | KNeighborsClassifier()
        clf = Hyperopt(estimator=pipeline, max_evals=1)
        trained = clf.fit(X, y)
        predictions = trained.predict(X)
        print(accuracy_score(y, predictions))
        warnings.resetwarnings()
Beispiel #18
0
    def test_export_to_sklearn_pipeline3(self):
        from lale.lib.lale import ConcatFeatures
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import KNeighborsClassifier, LogisticRegression, SVC
        from sklearn.feature_selection import SelectKBest
        from lale.lib.sklearn import Nystroem
        from sklearn.pipeline import FeatureUnion

        lale_pipeline = (
            (PCA() >> SelectKBest(k=2)) &
            (Nystroem(random_state=42) >> SelectKBest(k=3))
            & (SelectKBest(k=3))) >> ConcatFeatures() >> SelectKBest(
                k=2) >> LogisticRegression()
        trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
        sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
        self.assertIsInstance(sklearn_pipeline.named_steps['featureunion'],
                              FeatureUnion)
        self.assertIsInstance(sklearn_pipeline.named_steps['selectkbest'],
                              SelectKBest)
        from sklearn.linear_model import LogisticRegression
        self.assertIsInstance(
            sklearn_pipeline.named_steps['logisticregression'],
            LogisticRegression)
        self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Beispiel #19
0
 def test_hyperparam_defaults(self):
     cf = ConcatFeatures()
Beispiel #20
0
        'input_fit': _input_fit_schema,
        'input_predict': _input_predict_schema,
        'output': _output_predict_schema
    }
}

HyperoptRegressor = lale.operators.make_operator(HyperoptRegressorImpl,
                                                 _combined_schemas)

if __name__ == '__main__':
    from lale.lib.lale import ConcatFeatures
    from lale.lib.sklearn import Nystroem, PCA, RandomForestRegressor
    from sklearn.metrics import r2_score
    pca = PCA(n_components=3)
    nys = Nystroem(n_components=3)
    concat = ConcatFeatures()
    rf = RandomForestRegressor()

    trainable = (pca & nys) >> concat >> rf
    #trainable = nys >>rf
    import sklearn.datasets
    from lale.helpers import cross_val_score
    diabetes = sklearn.datasets.load_diabetes()
    X, y = sklearn.utils.shuffle(diabetes.data,
                                 diabetes.target,
                                 random_state=42)

    hp_n = HyperoptRegressor(estimator=trainable, max_evals=20)

    hp_n_trained = hp_n.fit(X, y)
    predictions = hp_n_trained.predict(X)
Beispiel #21
0
 def test_planned_pipeline_3(self):
     plan = ((MinMaxScaler() & NoOp()) >> ConcatFeatures() >>
             (StandardScaler &
              (NoOp() | MinMaxScaler())) >> ConcatFeatures() >>
             (LogisticRegression | KNeighborsClassifier))
     run_hyperopt_on_planned_pipeline(plan)
Beispiel #22
0
 def test_planned_pipeline_1(self):
     plan = ((PCA & (MinMaxScaler | Normalizer)) >> ConcatFeatures() >>
             (MinMaxScaler | Normalizer) >>
             (LogisticRegression | KNeighborsClassifier))
     run_hyperopt_on_planned_pipeline(plan)
Beispiel #23
0
    def test_with_hyperopt2(self):
        from lale.expressions import (
            count,
            it,
            max,
            mean,
            min,
            string_indexer,
            sum,
            variance,
        )

        wrap_imported_operators()
        scan = Scan(table=it["main"])
        scan_0 = Scan(table=it["customers"])
        join = Join(pred=[(it["main"]["group_customer_id"] == it["customers"]
                           ["group_customer_id"])])
        map = Map(
            columns={
                "[main](group_customer_id)[customers]|number_children|identity":
                it["number_children"],
                "[main](group_customer_id)[customers]|name|identity":
                it["name"],
                "[main](group_customer_id)[customers]|income|identity":
                it["income"],
                "[main](group_customer_id)[customers]|address|identity":
                it["address"],
                "[main](group_customer_id)[customers]|age|identity":
                it["age"],
            },
            remainder="drop",
        )
        pipeline_4 = join >> map
        scan_1 = Scan(table=it["purchase"])
        join_0 = Join(
            pred=[(it["main"]["group_id"] == it["purchase"]["group_id"])],
            join_limit=50.0,
        )
        aggregate = Aggregate(
            columns={
                "[main](group_id)[purchase]|price|variance":
                variance(it["price"]),
                "[main](group_id)[purchase]|time|sum": sum(it["time"]),
                "[main](group_id)[purchase]|time|mean": mean(it["time"]),
                "[main](group_id)[purchase]|time|min": min(it["time"]),
                "[main](group_id)[purchase]|price|sum": sum(it["price"]),
                "[main](group_id)[purchase]|price|count": count(it["price"]),
                "[main](group_id)[purchase]|price|mean": mean(it["price"]),
                "[main](group_id)[purchase]|price|min": min(it["price"]),
                "[main](group_id)[purchase]|price|max": max(it["price"]),
                "[main](group_id)[purchase]|time|max": max(it["time"]),
                "[main](group_id)[purchase]|time|variance":
                variance(it["time"]),
            },
            group_by=it["row_id"],
        )
        pipeline_5 = join_0 >> aggregate
        map_0 = Map(
            columns={
                "[main]|group_customer_id|identity": it["group_customer_id"],
                "[main]|transaction_id|identity": it["transaction_id"],
                "[main]|group_id|identity": it["group_id"],
                "[main]|comments|identity": it["comments"],
                "[main]|id|identity": it["id"],
                "prefix_0_id": it["prefix_0_id"],
                "next_purchase": it["next_purchase"],
                "[main]|time|identity": it["time"],
            },
            remainder="drop",
        )
        scan_2 = Scan(table=it["transactions"])
        scan_3 = Scan(table=it["products"])
        join_1 = Join(pred=[
            (it["main"]["transaction_id"] == it["transactions"]
             ["transaction_id"]),
            (it["transactions"]["product_id"] == it["products"]["product_id"]),
        ])
        map_1 = Map(
            columns={
                "[main](transaction_id)[transactions](product_id)[products]|price|identity":
                it["price"],
                "[main](transaction_id)[transactions](product_id)[products]|type|identity":
                it["type"],
            },
            remainder="drop",
        )
        pipeline_6 = join_1 >> map_1
        join_2 = Join(pred=[(it["main"]["transaction_id"] == it["transactions"]
                             ["transaction_id"])])
        map_2 = Map(
            columns={
                "[main](transaction_id)[transactions]|description|identity":
                it["description"],
                "[main](transaction_id)[transactions]|product_id|identity":
                it["product_id"],
            },
            remainder="drop",
        )
        pipeline_7 = join_2 >> map_2
        map_3 = Map(columns=[
            string_indexer(it["[main]|comments|identity"]),
            string_indexer(
                it["[main](transaction_id)[transactions]|description|identity"]
            ),
            string_indexer(it[
                "[main](transaction_id)[transactions](product_id)[products]|type|identity"]
                           ),
            string_indexer(
                it["[main](group_customer_id)[customers]|name|identity"]),
            string_indexer(
                it["[main](group_customer_id)[customers]|address|identity"]),
        ])
        pipeline_8 = ConcatFeatures() >> map_3
        relational = Relational(operator=make_pipeline_graph(
            steps=[
                scan,
                scan_0,
                pipeline_4,
                scan_1,
                pipeline_5,
                map_0,
                scan_2,
                scan_3,
                pipeline_6,
                pipeline_7,
                pipeline_8,
            ],
            edges=[
                (scan, pipeline_4),
                (scan, pipeline_5),
                (scan, map_0),
                (scan, pipeline_6),
                (scan, pipeline_7),
                (scan_0, pipeline_4),
                (pipeline_4, pipeline_8),
                (scan_1, pipeline_5),
                (pipeline_5, pipeline_8),
                (map_0, pipeline_8),
                (scan_2, pipeline_6),
                (scan_2, pipeline_7),
                (scan_3, pipeline_6),
                (pipeline_6, pipeline_8),
                (pipeline_7, pipeline_8),
            ],
        ))
        pipeline = relational >> (KNeighborsClassifier | LogisticRegression)
        from sklearn.datasets import load_iris

        X, y = load_iris(return_X_y=True)
        from lale.lib.lale import Hyperopt

        opt = Hyperopt(estimator=pipeline, max_evals=2)
        opt.fit(X, y)