Beispiel #1
0
    def test_choice(self):
        planned = LogisticRegression() | KNeighborsClassifier()
        try:
            planned.fit(self.X, self.y)
        except AttributeError as e:
            self.assertEqual(
                e.__str__(),
                """The pipeline is not trainable, which means you can not call fit on it.

Suggested fixes:
Fix [A]: You can make the following changes in the pipeline in order to make it trainable:
[A.1] Please remove the operator choice `|` from `LogisticRegression | KNeighborsClassifier` and keep only one of those operators.

Fix [B]: Alternatively, you could use `auto_configure(X, y, Hyperopt, max_evals=5)` on the pipeline
to use Hyperopt for `max_evals` iterations for hyperparameter tuning. `Hyperopt` can be imported as `from lale.lib.lale import Hyperopt`.""",
            )
Beispiel #2
0
 def test_fit_clones_impl(self):
     from sklearn.datasets import load_iris
     lr_trainable = LogisticRegression()
     iris = load_iris()
     X, y = iris.data, iris.target
     lr_trained = lr_trainable.fit(X, y)
     self.assertIsNot(lr_trainable._impl, lr_trained._impl)
Beispiel #3
0
 def test_predict_proba(self):
     import numpy as np
     trainable_lr = LogisticRegression(n_jobs=1)
     iris = sklearn.datasets.load_iris()
     trained_lr = trainable_lr.fit(iris.data, iris.target, sample_weight = np.arange(len(iris.target)))
     #with self.assertWarns(DeprecationWarning):
     predicted = trainable_lr.predict_proba(iris.data)
     predicted = trained_lr.predict_proba(iris.data)
Beispiel #4
0
 def test_missing_iris(self):
     # classification, only numbers, synthetically added missing values
     all_X, all_y = sklearn.datasets.load_iris(return_X_y=True)
     with_missing_X = lale.helpers.add_missing_values(all_X)
     with self.assertRaisesRegex(ValueError, "Input contains NaN"):
         lr_trainable = LogisticRegression()
         _ = lr_trainable.fit(with_missing_X, all_y)
     self._fit_predict("classification", with_missing_X, all_y)
Beispiel #5
0
 def test_clone_of_trained(self):
     from sklearn.base import clone
     lr = LogisticRegression()
     from sklearn.datasets import load_iris
     iris = load_iris()
     X, y = iris.data, iris.target
     trained = lr.fit(X, y)
     trained2 = clone(trained)
Beispiel #6
0
    def test_sample_weight(self):
        import numpy as np

        trainable_lr = LogisticRegression(n_jobs=1)
        iris = load_iris()
        trained_lr = trainable_lr.fit(
            iris.data, iris.target, sample_weight=np.arange(len(iris.target))
        )
        _ = trained_lr.predict(iris.data)
Beispiel #7
0
 def test_compose5(self):
     ohe = OneHotEncoder(handle_unknown=OneHotEncoder.handle_unknown.ignore)
     digits = sklearn.datasets.load_digits()
     lr = LogisticRegression()
     lr_trained = lr.fit(digits.data, digits.target)
     lr_trained.predict(digits.data)
     pipeline1 = ohe >> lr
     pipeline1_trained = pipeline1.fit(digits.data, digits.target)
     pipeline1_trained.predict(digits.data)
Beispiel #8
0
 def test_decision_function(self):
     import numpy as np
     trainable_lr = LogisticRegression(n_jobs=1)
     iris = sklearn.datasets.load_iris()
     trained_lr = trainable_lr.fit(iris.data,
                                   iris.target,
                                   sample_weight=np.arange(len(
                                       iris.target)))
     predicted = trained_lr.decision_function(iris.data)
Beispiel #9
0
 def test_scorers_np_num(self):
     fairness_info = self.creditg_np_num["fairness_info"]
     trainable = LogisticRegression(max_iter=1000)
     train_X = self.creditg_np_num["train_X"]
     train_y = self.creditg_np_num["train_y"]
     trained = trainable.fit(train_X, train_y)
     test_X = self.creditg_np_num["test_X"]
     test_y = self.creditg_np_num["test_y"]
     self._attempt_scorers(fairness_info, trained, test_X, test_y)
Beispiel #10
0
    def test_score_trained_op(self):
        from sklearn.metrics import accuracy_score

        from lale.lib.sklearn import LogisticRegression

        trainable = LogisticRegression()
        trained_lr = trainable.fit(self.X_train, self.y_train)
        score = trained_lr.score(self.X_test, self.y_test)
        predictions = trained_lr.predict(self.X_test)
        accuracy = accuracy_score(self.y_test, predictions)
        self.assertEqual(score, accuracy)
Beispiel #11
0
    def test_grid_search_on_trained(self):
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.model_selection import GridSearchCV

        iris = load_iris()
        X, y = iris.data, iris.target
        lr = LogisticRegression()
        trained = lr.fit(X, y)
        parameters = {"solver": ("liblinear", "lbfgs"), "penalty": ["l2"]}

        _ = GridSearchCV(trained, parameters, cv=5, scoring=make_scorer(accuracy_score))
Beispiel #12
0
    def test_grid_search_on_trained(self):
        from sklearn.model_selection import GridSearchCV
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        iris = load_iris()
        X, y = iris.data, iris.target
        lr = LogisticRegression()
        trained = lr.fit(X, y)
        parameters = {'solver':('liblinear', 'lbfgs'), 'penalty':['l2']}

        clf = GridSearchCV(trained, parameters, cv=5, scoring=make_scorer(accuracy_score))
Beispiel #13
0
    def test_grid_search_on_trained_auto(self):
        from sklearn.model_selection import GridSearchCV
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        iris = load_iris()
        X, y = iris.data, iris.target
        lr = LogisticRegression()
        trained = lr.fit(X, y)
        parameters = get_grid_search_parameter_grids(lr, num_samples=2)

        clf = GridSearchCV(trained, parameters, cv=5, scoring=make_scorer(accuracy_score))
Beispiel #14
0
 def test_log_fit_predict(self):
     import lale.datasets
     trainable = LogisticRegression()
     (X_train, y_train), (X_test, y_test) = lale.datasets.load_iris_df()
     trained = trainable.fit(X_train, y_train)
     predicted = trained.predict(X_test)
     self.handler.flush()
     s1, s2, s3, s4 = self.stream.getvalue().strip().split('\n')
     self.assertTrue(s1.endswith('enter fit LogisticRegression'))
     self.assertTrue(s2.endswith('exit  fit LogisticRegression'))
     self.assertTrue(s3.endswith('enter predict LogisticRegression'))
     self.assertTrue(s4.endswith('exit  predict LogisticRegression'))
Beispiel #15
0
    def test_clone_with_scikit1(self):
        lr = LogisticRegression()
        lr.get_params()
        from sklearn.base import clone

        lr_clone = clone(lr)
        self.assertNotEqual(lr, lr_clone)
        self.assertNotEqual(lr._impl, lr_clone._impl)
        iris = load_iris()
        trained_lr = lr.fit(iris.data, iris.target)
        _ = trained_lr.predict(iris.data)
        cloned_trained_lr = clone(trained_lr)
        self.assertNotEqual(trained_lr._impl, cloned_trained_lr._impl)
Beispiel #16
0
    def test_score_trained_op_sample_wt(self):
        import numpy as np
        from sklearn.metrics import accuracy_score

        from lale.lib.sklearn import LogisticRegression

        trainable = LogisticRegression()
        trained_lr = trainable.fit(self.X_train, self.y_train)
        rng = np.random.RandomState(0)
        iris_weights = rng.randint(10, size=self.y_test.shape)
        score = trained_lr.score(self.X_test, self.y_test, sample_weight=iris_weights)
        predictions = trained_lr.predict(self.X_test)
        accuracy = accuracy_score(self.y_test, predictions, sample_weight=iris_weights)
        self.assertEqual(score, accuracy)
Beispiel #17
0
 def test_bare_array(self):
     from lale.datasets.data_schemas import NDArrayWithSchema
     from numpy import ndarray
     import sklearn.metrics
     X, y = sklearn.datasets.load_iris(return_X_y=True)
     self.assertIsInstance(X, ndarray)
     self.assertIsInstance(y, ndarray)
     self.assertNotIsInstance(X, NDArrayWithSchema)
     self.assertNotIsInstance(y, NDArrayWithSchema)
     trainable = LogisticRegression()
     trained = trainable.fit(X, y)
     scorer = sklearn.metrics.make_scorer(sklearn.metrics.accuracy_score)
     out = scorer(trained, X, y)
     self.assertIsInstance(out, float)
     self.assertNotIsInstance(out, NDArrayWithSchema)
Beispiel #18
0
 def test_clone_with_scikit1(self):
     lr = LogisticRegression()
     lr.get_params()
     from sklearn.base import clone
     lr_clone = clone(lr)
     self.assertNotEqual(lr, lr_clone)
     self.assertNotEqual(lr._impl, lr_clone._impl)
     iris = sklearn.datasets.load_iris()
     trained_lr = lr.fit(iris.data, iris.target)
     predicted = trained_lr.predict(iris.data)
     cloned_trained_lr = clone(trained_lr)
     self.assertNotEqual(trained_lr._impl, cloned_trained_lr._impl)
     predicted_clone = cloned_trained_lr.predict(iris.data)
     for i in range(len(iris.target)):
         self.assertEqual(predicted[i], predicted_clone[i])
Beispiel #19
0
 def test_disparate_impact_remover_np_num(self):
     fairness_info = self.creditg_np_num["fairness_info"]
     trainable_orig = LogisticRegression(max_iter=1000)
     trainable_remi = DisparateImpactRemover(**fairness_info) >> trainable_orig
     train_X = self.creditg_np_num["train_X"]
     train_y = self.creditg_np_num["train_y"]
     trained_orig = trainable_orig.fit(train_X, train_y)
     trained_remi = trainable_remi.fit(train_X, train_y)
     test_X = self.creditg_np_num["test_X"]
     test_y = self.creditg_np_num["test_y"]
     disparate_impact_scorer = lale.lib.aif360.disparate_impact(**fairness_info)
     impact_orig = disparate_impact_scorer(trained_orig, test_X, test_y)
     self.assertTrue(0.6 < impact_orig < 1.0, f"impact_orig {impact_orig}")
     impact_remi = disparate_impact_scorer(trained_remi, test_X, test_y)
     self.assertTrue(0.8 < impact_remi < 1.0, f"impact_remi {impact_remi}")
Beispiel #20
0
    def test_using_individual_operator(self):
        from lale.lib.lale import Hyperopt, OptimizeLast

        lr = LogisticRegression()  # Individual Operator
        trained_operator = lr.fit(self.X_train, self.y_train)

        # Now let's use Hyperopt to optimize the classifier
        hyperopt_args = {"scoring": "accuracy", "cv": 3, "max_evals": 2}
        opt_last = OptimizeLast(
            estimator=trained_operator,
            last_optimizer=Hyperopt,
            optimizer_args=hyperopt_args,
        )

        res_last = opt_last.fit(self.X_train, self.y_train)
        predictions = res_last.predict(self.X_test)
        predictions_1 = opt_last.predict(self.X_test)
        best_pipeline = res_last.get_pipeline()

        self.assertEqual(type(trained_operator), type(best_pipeline))
        assert np.array_equal(predictions_1, predictions)
Beispiel #21
0
 def test_with_defaults(self):
     trainable = LogisticRegression()
     trained = trainable.fit(self.train_X, self.train_y)
     _ = trained.predict(self.test_X)
Beispiel #22
0
    def test_score_trainable_op(self):
        from lale.lib.sklearn import LogisticRegression

        trainable = LogisticRegression()
        _ = trainable.fit(self.X_train, self.y_train)
        trainable.score(self.X_test, self.y_test)