Beispiel #1
0
 def test_duplicate_instances(self):
     tfm = PCA()
     clf = LogisticRegression(LogisticRegression.solver.lbfgs,
                              LogisticRegression.multi_class.auto)
     with self.assertRaises(ValueError):
         _ = lale.operators.make_pipeline(tfm, tfm, clf)
Beispiel #2
0
 def test_redacting_pd_cat(self):
     fairness_info = self.creditg_pd_cat["fairness_info"]
     estim = self.prep_pd_cat >> LogisticRegression(max_iter=1000)
     trainable_remi = Redacting(**fairness_info) >> estim
     self._attempt_remi_creditg_pd_cat(fairness_info, trainable_remi, 0.81,
                                       0.91)
Beispiel #3
0
 def test_reweighing_pd_cat(self):
     fairness_info = self.creditg_pd_cat["fairness_info"]
     estim = self.prep_pd_cat >> LogisticRegression(max_iter=1000)
     trainable_remi = Reweighing(estimator=estim, **fairness_info)
     self._attempt_remi_creditg_pd_cat(fairness_info, trainable_remi, 0.85,
                                       1.00)
Beispiel #4
0
 def test_reweighing_pd_num(self):
     fairness_info = self.creditg_pd_num["fairness_info"]
     estim = LogisticRegression(max_iter=1000)
     trainable_remi = Reweighing(estimator=estim, **fairness_info)
     self._attempt_remi_creditg_pd_num(fairness_info, trainable_remi, 0.82,
                                       0.92)
Beispiel #5
0
 def test_eq_odds_postprocessing_pd_cat(self):
     fairness_info = self.creditg_pd_cat["fairness_info"]
     estim = self.prep_pd_cat >> LogisticRegression(max_iter=1000)
     trainable_remi = EqOddsPostprocessing(**fairness_info, estimator=estim)
     self._attempt_remi_creditg_pd_cat(fairness_info, trainable_remi, 0.88,
                                       0.98)
Beispiel #6
0
    def dont_test_smac_choice(self):

        import numpy as np
        from sklearn import svm, datasets
        from sklearn.model_selection import cross_val_score

        # Import ConfigSpace and different types of parameters
        from smac.configspace import ConfigurationSpace

        # Import SMAC-utilities
        from smac.tae.execute_func import ExecuteTAFuncDict
        from smac.scenario.scenario import Scenario
        from smac.facade.smac_facade import SMAC as orig_SMAC


        tfm = PCA() | Nystroem() | NoOp()
        planned_pipeline1 = (OneHotEncoder(handle_unknown = 'ignore',  sparse = False) | NoOp()) >> tfm >> (LogisticRegression() | KNeighborsClassifier())

        cs:ConfigurationSpace = get_smac_space(planned_pipeline1, lale_num_grids=1)

        # Scenario object
        scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                            "runcount-limit": 1,  # maximum function evaluations
                            "cs": cs,               # configuration space
                            "deterministic": "true"
                            })

        # Optimize, using a SMAC-object
        tae = iris_fmin_tae(planned_pipeline1, num_folds=2)
        print("Optimizing! Depending on your machine, this might take a few minutes.")
        smac = orig_SMAC(scenario=scenario, rng=np.random.RandomState(42),
                tae_runner=tae)

        incumbent = smac.optimize()

        inc_value = tae(incumbent)

        print("Optimized Value: %.2f" % (inc_value))
Beispiel #7
0
 def test_disparate_impact_remover_pd_num(self):
     fairness_info = self.creditg_pd_num["fairness_info"]
     trainable_remi = DisparateImpactRemover(
         **fairness_info) >> LogisticRegression(max_iter=1000)
     self._attempt_remi_creditg_pd_num(fairness_info, trainable_remi, 0.78,
                                       0.88)
Beispiel #8
0
 def test_unknown_arg(self):
     with self.assertRaises(jsonschema.ValidationError) as cm:
         LogisticRegression(activation='relu')
     summary = cm.exception.message.split('\n')[0]
     self.assertEqual(summary, "Invalid configuration for LogisticRegression(activation='relu') due to argument 'activation' was unexpected.")
Beispiel #9
0
 def test_constraint(self):
     with self.assertRaises(jsonschema.ValidationError) as cm:
         LogisticRegression(solver='sag', penalty='l1')
     summary = cm.exception.message.split('\n')[0]
     self.assertEqual(summary, "Invalid configuration for LogisticRegression(solver='sag', penalty='l1') due to constraint the newton-cg, sag, and lbfgs solvers support only l2 penalties.")
Beispiel #10
0
 def test_decision_function_binary(self):
     from lale.lib.lale import Project
     train_X, train_y = self._creditG['X'], self._creditG['y']
     trainable = Project(columns={'type': 'number'}) >> LogisticRegression()
     trained = trainable.fit(train_X, train_y)
     decisions = trained.decision_function(train_X)
Beispiel #11
0
 def test_wrong_cat(self):
     with self.assertRaises(jsonschema.ValidationError) as cm:
         LogisticRegression(solver='adam')
     summary = cm.exception.message.split('\n')[0]
     self.assertEqual(summary, "Invalid configuration for LogisticRegression(solver='adam') due to invalid value solver=adam.")
Beispiel #12
0
class _HyperoptImpl:
    def __init__(
        self,
        estimator=None,
        max_evals=50,
        frac_evals_with_defaults=0,
        algo="tpe",
        cv=5,
        handle_cv_failure=False,
        scoring=None,
        best_score=0.0,
        max_opt_time=None,
        max_eval_time=None,
        pgo: Optional[PGO] = None,
        show_progressbar=True,
        args_to_scorer=None,
        verbose=False,
    ):
        self.max_evals = max_evals
        if estimator is None:
            self.estimator = LogisticRegression()
        else:
            self.estimator = estimator
        if frac_evals_with_defaults > 0:
            self.evals_with_defaults = int(frac_evals_with_defaults * max_evals)
        else:
            self.evals_with_defaults = 0
        self.algo = algo
        self.scoring = scoring
        if self.scoring is None:
            is_clf = self.estimator.is_classifier()
            if is_clf:
                self.scoring = "accuracy"
            else:
                self.scoring = "r2"
        self.best_score = best_score
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self._trials = hyperopt.Trials()
        self._default_trials = hyperopt.Trials()
        self.max_opt_time = max_opt_time
        self.max_eval_time = max_eval_time
        self.pgo = pgo
        self.show_progressbar = show_progressbar
        if args_to_scorer is not None:
            self.args_to_scorer = args_to_scorer
        else:
            self.args_to_scorer = {}
        self.verbose = verbose

    def _summarize_statuses(self):
        status_list = self._trials.statuses()
        status_hist = {}
        for status in status_list:
            status_hist[status] = 1 + status_hist.get(status, 0)
        if hyperopt.STATUS_FAIL in status_hist:
            print(
                f"{status_hist[hyperopt.STATUS_FAIL]} out of {len(status_list)} trials failed, call summary() for details."
            )
            if not self.verbose:
                print("Run with verbose=True to see per-trial exceptions.")

    def fit(self, X_train, y_train):
        opt_start_time = time.time()
        is_clf = self.estimator.is_classifier()
        self.cv = check_cv(self.cv, y=y_train, classifier=is_clf)
        data_schema = lale.helpers.fold_schema(X_train, y_train, self.cv, is_clf)
        self.search_space = hyperopt.hp.choice(
            "meta_model",
            [
                hyperopt_search_space(
                    self.estimator, pgo=self.pgo, data_schema=data_schema
                )
            ],
        )
        # Create a search space with default hyperparameters for all trainable parts of the pipeline.
        # This search space is used for `frac_evals_with_defaults` fraction of the total trials.
        try:
            self.search_space_with_defaults = hyperopt.hp.choice(
                "meta_model",
                [
                    hyperopt_search_space(
                        self.estimator.freeze_trainable(),
                        pgo=self.pgo,
                        data_schema=data_schema,
                    )
                ],
            )
        except Exception:
            logger.warning(
                "Exception caught during generation of default search space, setting frac_evals_with_defaults to zero."
            )
            self.evals_with_defaults = 0

        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            trainable = create_instance_from_hyperopt_search_space(
                self.estimator, params
            )
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    trainable,
                    X_train,
                    y_train,
                    cv=self.cv,
                    scoring=self.scoring,
                    args_to_scorer=self.args_to_scorer,
                )
                logger.debug(
                    "Successful trial of hyperopt with hyperparameters:{}".format(
                        params
                    )
                )
            except BaseException as e:
                # If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    (
                        X_train_part,
                        X_validation,
                        y_train_part,
                        y_validation,
                    ) = train_test_split(X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score = scorer(
                        trained, X_validation, y_validation, **self.args_to_scorer
                    )
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug(e)
                    logger.debug(
                        "Error {} with pipeline:{}".format(e, trainable.to_json())
                    )
                    raise e
            return cv_score, logloss, execution_time

        def merge_trials(trials1, trials2):
            max_tid = max([trial["tid"] for trial in trials1.trials])

            for trial in trials2:
                tid = trial["tid"] + max_tid + 1
                hyperopt_trial = hyperopt.Trials().new_trial_docs(
                    tids=[None], specs=[None], results=[None], miscs=[None]
                )
                hyperopt_trial[0] = trial
                hyperopt_trial[0]["tid"] = tid
                hyperopt_trial[0]["misc"]["tid"] = tid
                for key in hyperopt_trial[0]["misc"]["idxs"].keys():
                    hyperopt_trial[0]["misc"]["idxs"][key] = [tid]
                trials1.insert_trial_docs(hyperopt_trial)
                trials1.refresh()
            return trials1

        def proc_train_test(params, X_train, y_train, return_dict):
            return_dict["params"] = copy.deepcopy(params)
            try:
                score, logloss, execution_time = hyperopt_train_test(
                    params, X_train=X_train, y_train=y_train
                )
                return_dict["loss"] = self.best_score - score
                return_dict["time"] = execution_time
                return_dict["log_loss"] = logloss
                return_dict["status"] = hyperopt.STATUS_OK
            except BaseException as e:
                exception_type = f"{type(e).__module__}.{type(e).__name__}"
                try:
                    trainable = create_instance_from_hyperopt_search_space(
                        self.estimator, params
                    )
                    trial_info = (
                        f'pipeline: """{trainable.pretty_print(show_imports=False)}"""'
                    )
                except BaseException:
                    trial_info = f"hyperparams: {params}"
                error_msg = f"Exception caught in Hyperopt: {exception_type}, {traceback.format_exc()}with {trial_info}"
                logger.warning(error_msg + ", setting status to FAIL")
                return_dict["status"] = hyperopt.STATUS_FAIL
                return_dict["error_msg"] = error_msg
                if self.verbose:
                    print(return_dict["error_msg"])

        def get_final_trained_estimator(params, X_train, y_train):
            warnings.filterwarnings("ignore")
            trainable = create_instance_from_hyperopt_search_space(
                self.estimator, params
            )
            trained = trainable.fit(X_train, y_train)
            return trained

        def f(params):
            current_time = time.time()
            if (self.max_opt_time is not None) and (
                (current_time - opt_start_time) > self.max_opt_time
            ):
                # if max optimization time set, and we have crossed it, exit optimization completely
                sys.exit(0)
            if self.max_eval_time:
                # Run hyperopt in a subprocess that can be interupted
                manager = multiprocessing.Manager()
                proc_dict = manager.dict()
                p = multiprocessing.Process(
                    target=proc_train_test, args=(params, X_train, y_train, proc_dict)
                )
                p.start()
                p.join(self.max_eval_time)
                if p.is_alive():
                    p.terminate()
                    p.join()
                    logger.warning(
                        f"Maximum alloted evaluation time exceeded. with hyperparams: {params}, setting status to FAIL"
                    )
                    proc_dict["status"] = hyperopt.STATUS_FAIL
                if "status" not in proc_dict:
                    logger.warning("Corrupted results, setting status to FAIL")
                    proc_dict["status"] = hyperopt.STATUS_FAIL
            else:
                proc_dict = {}
                proc_train_test(params, X_train, y_train, proc_dict)
            return proc_dict

        algo = getattr(hyperopt, self.algo)
        # Search in the search space with defaults
        if self.evals_with_defaults > 0:
            try:
                hyperopt.fmin(
                    f,
                    self.search_space_with_defaults,
                    algo=algo.suggest,
                    max_evals=self.evals_with_defaults,
                    trials=self._default_trials,
                    rstate=np.random.RandomState(SEED),
                    show_progressbar=self.show_progressbar,
                )
            except SystemExit:
                logger.warning(
                    "Maximum alloted optimization time exceeded. Optimization exited prematurely"
                )
            except AllTrialsFailed:
                self._best_estimator = None
                if hyperopt.STATUS_OK not in self._trials.statuses():
                    raise ValueError(
                        "Error from hyperopt, none of the trials succeeded."
                    )

        try:
            hyperopt.fmin(
                f,
                self.search_space,
                algo=algo.suggest,
                max_evals=self.max_evals - self.evals_with_defaults,
                trials=self._trials,
                rstate=np.random.RandomState(SEED),
                show_progressbar=self.show_progressbar,
            )
        except SystemExit:
            logger.warning(
                "Maximum alloted optimization time exceeded. Optimization exited prematurely"
            )
        except AllTrialsFailed:
            self._best_estimator = None
            if hyperopt.STATUS_OK not in self._trials.statuses():
                self._summarize_statuses()
                raise ValueError("Error from hyperopt, none of the trials succeeded.")
        self._trials = merge_trials(self._trials, self._default_trials)
        if self.show_progressbar:
            self._summarize_statuses()
        try:
            best_trial = self._trials.best_trial
            val_loss = self._trials.best_trial["result"]["loss"]
            if len(self._default_trials) > 0:
                default_val_loss = self._default_trials.best_trial["result"]["loss"]
                if default_val_loss < val_loss:
                    best_trial = self._default_trials.best_trial
            best_params = best_trial["result"]["params"]
            logger.info(
                "best score: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}".format(
                    self.best_score - self._trials.average_best_error(),
                    self.max_evals,
                    best_params,
                )
            )
            trained = get_final_trained_estimator(best_params, X_train, y_train)
            self._best_estimator = trained
        except BaseException as e:
            logger.warning(
                "Unable to extract the best parameters from optimization, the error: {}".format(
                    e
                )
            )
            self._best_estimator = None

        return self

    def predict(self, X_eval):
        import warnings

        warnings.filterwarnings("ignore")
        if self._best_estimator is None:
            raise ValueError(
                "Can not predict as the best estimator is None. Either an attempt to call `predict` "
                "before calling `fit` or all the trials during `fit` failed."
            )
        trained = self._best_estimator
        try:
            predictions = trained.predict(X_eval)
        except ValueError as e:
            logger.warning(
                "ValueError in predicting using Hyperopt:{}, the error is:{}".format(
                    trained, e
                )
            )
            predictions = None

        return predictions

    def summary(self):
        """Table summarizing the trial results (ID, loss, time, log_loss, status).

Returns
-------
result : DataFrame"""

        def make_record(trial_dict):
            return {
                "name": f'p{trial_dict["tid"]}',
                "tid": trial_dict["tid"],
                "loss": trial_dict["result"].get("loss", float("nan")),
                "time": trial_dict["result"].get("time", float("nan")),
                "log_loss": trial_dict["result"].get("log_loss", float("nan")),
                "status": trial_dict["result"]["status"],
            }

        records = [make_record(td) for td in self._trials.trials]
        result = pd.DataFrame.from_records(records, index="name")
        return result

    def get_pipeline(self, pipeline_name=None, astype="lale"):
        """Retrieve one of the trials.

Parameters
----------
pipeline_name : union type, default None

    - string
        Key for table returned by summary(), return a trainable pipeline.

    - None
        When not specified, return the best trained pipeline found.

astype : 'lale' or 'sklearn', default 'lale'
    Type of resulting pipeline.

Returns
-------
result : Trained operator if best, trainable operator otherwise.
"""
        best_name = None
        if self._best_estimator is not None:
            best_name = f'p{self._trials.best_trial["tid"]}'
        if pipeline_name is None:
            pipeline_name = best_name
        if pipeline_name == best_name:
            result = getattr(self, "_best_estimator", None)
        else:
            tid = int(pipeline_name[1:])
            params = self._trials.trials[tid]["result"]["params"]
            result = create_instance_from_hyperopt_search_space(self.estimator, params)
        if result is None or astype == "lale":
            return result
        assert astype == "sklearn", astype
        return result.export_to_sklearn_pipeline()
Beispiel #13
0
        if i > max_evals:
            assert False
        try:
            X, y = data_loader()
            clf = Hyperopt(estimator=pipeline, max_evals=i, scoring=scoring)
            trained_pipeline = clf.fit(X, y)
            trained_pipeline.predict(X)
            return True
        except Exception:
            test(3 * i)

    test(1)


kls = inspect.getmembers(autogen, lambda m: isinstance(m, Operator))
LR = LogisticRegression.customize_schema(relevantToOptimizer=[])

classifiers = [
    "AdaBoostClassifier",
    "BernoulliNB",
    "CalibratedClassifierCV",
    "ComplementNB",
    "DecisionTreeClassifier",
    "ExtraTreesClassifier",
    "GaussianNB",
    "GaussianProcessClassifier",
    "GradientBoostingClassifier",
    "KNeighborsClassifier",
    "LGBMClassifier",
    "LabelPropagation",
    "LabelSpreading",
Beispiel #14
0
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size = test_size, random_state = 0)
    if verbose:
        print(f'training set shapes: X {X_train.shape}, y {y_train.shape}')
        print(f'test set shapes:     X {X_test.shape}, y {y_test.shape}')
    if preprocess:
        from lale.datasets.data_schemas import add_schema
        X_train = add_schema(X_train.astype(np.number), recalc=True)
        y_train = add_schema(y_train.astype(np.int), recalc=True)
        X_test = add_schema(X_test.astype(np.number), recalc=True)
        y_test = add_schema(y_test.astype(np.int), recalc=True)
    else:
        X_train, X_test, y_train, y_test = add_schemas( \
            schema_orig, target_col, X_train, X_test, y_train, y_test)
    return (X_train, y_train), (X_test, y_test)


if __name__ == "__main__":
    datasets = ['spectf', 'diabetes', 'breast-cancer', 'hill-valley']
    from lale.lib.sklearn import LogisticRegression
    for dataset_name in datasets:
        try:
            (X_train,
             y_train), (X_test, y_test) = fetch(dataset_name, 'classification')
            trained = LogisticRegression().fit(X_train, y_train)
            trained.predict(X_test)
        except BaseException as e:
            import traceback
            traceback.print_exc()
 def test_hyperparam_exclusive_min(self):
     with EnableSchemaValidation():
         with self.assertRaises(jsonschema.ValidationError):
             _ = LogisticRegression(LogisticRegression.penalty.l1, C=0.0)
Beispiel #16
0
 def test_no_partial_fit(self):
     pipeline = Batching(operator=NoOp() >> LogisticRegression())
     with self.assertRaises(AttributeError):
         _ = pipeline.fit(self.X_train, self.y_train)
 def test_hyperparam_penalty_solver_dependence(self):
     with EnableSchemaValidation():
         with self.assertRaises(jsonschema.ValidationError):
             _ = LogisticRegression(LogisticRegression.penalty.l1,
                                    LogisticRegression.solver.newton_cg)
Beispiel #18
0
    def create_pipeline(self):
        from sklearn.decomposition import PCA
        from sklearn.pipeline import make_pipeline

        pipeline = make_pipeline(PCA(), LogisticRegression())
        return pipeline
Beispiel #19
0
 def test_input_schema_fit(self):
     self.maxDiff = None
     self.assertEqual(
         LogisticRegression.input_schema_fit(),
         LogisticRegression.get_schema("input_fit"),
     )
     self.assertEqual((NMF >> LogisticRegression).input_schema_fit(),
                      NMF.get_schema("input_fit"))
     self.assertEqual(
         IdentityWrapper(op=LogisticRegression).input_schema_fit(),
         LogisticRegression.get_schema("input_fit"),
     )
     actual = (TfidfVectorizer | NMF).input_schema_fit()
     expected = {
         "anyOf": [
             {
                 "type": "object",
                 "required": ["X"],
                 "additionalProperties": False,
                 "properties": {
                     "X": {
                         "anyOf": [
                             {
                                 "type": "array",
                                 "items": {
                                     "type": "string"
                                 }
                             },
                             {
                                 "type": "array",
                                 "items": {
                                     "type": "array",
                                     "minItems": 1,
                                     "maxItems": 1,
                                     "items": {
                                         "type": "string"
                                     },
                                 },
                             },
                         ]
                     },
                     "y": {},
                 },
             },
             {
                 "type": "object",
                 "required": ["X"],
                 "additionalProperties": False,
                 "properties": {
                     "X": {
                         "type": "array",
                         "items": {
                             "type": "array",
                             "items": {
                                 "type": "number",
                                 "minimum": 0.0
                             },
                         },
                     },
                     "y": {},
                 },
             },
         ]
     }
     self.assertEqual(actual, expected)
Beispiel #20
0
    def test_resampler(self):
        from lale.lib.lale import ConcatFeatures, NoOp
        from lale.lib.sklearn import (
            PCA,
            LogisticRegression,
            Nystroem,
            RandomForestClassifier,
        )

        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib

        module_name = ".".join(res_name.split(".")[0:-1])
        class_name = res_name.split(".")[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        with self.assertRaises(ValidationError):
            res = class_()

        # test_schemas_are_schemas
        lale.type_checking.validate_is_schema(class_.input_schema_fit())
        lale.type_checking.validate_is_schema(class_.input_schema_predict())
        lale.type_checking.validate_is_schema(class_.output_schema_predict())
        lale.type_checking.validate_is_schema(class_.hyperparam_schema())

        # test_init_fit_predict
        from lale.operators import make_pipeline

        pipeline1 = PCA() >> class_(operator=make_pipeline(LogisticRegression()))
        trained = pipeline1.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        pipeline2 = class_(operator=make_pipeline(PCA(), LogisticRegression()))
        trained = pipeline2.fit(X_train, y_train)
        predictions = trained.predict(X_test)

        # test_with_hyperopt
        from lale.lib.lale import Hyperopt

        optimizer = Hyperopt(
            estimator=PCA >> class_(operator=make_pipeline(LogisticRegression())),
            max_evals=1,
            show_progressbar=False,
        )
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline3 = class_(
            operator=PCA()
            >> (Nystroem & NoOp)
            >> ConcatFeatures
            >> LogisticRegression()
        )
        optimizer = Hyperopt(estimator=pipeline3, max_evals=1, show_progressbar=False)
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        pipeline4 = (
            (
                PCA >> class_(operator=make_pipeline(Nystroem()))
                & class_(operator=make_pipeline(Nystroem()))
            )
            >> ConcatFeatures
            >> LogisticRegression()
        )
        optimizer = Hyperopt(
            estimator=pipeline4, max_evals=1, scoring="roc_auc", show_progressbar=False
        )
        trained_optimizer = optimizer.fit(X_train, y_train)
        predictions = trained_optimizer.predict(X_test)

        # test_cross_validation
        from lale.helpers import cross_val_score

        cv_results = cross_val_score(pipeline1, X_train, y_train, cv=2)
        self.assertEqual(len(cv_results), 2)

        # test_to_json
        pipeline1.to_json()
Beispiel #21
0
 def test_lfr_pd_num(self):
     fairness_info = self.creditg_pd_num["fairness_info"]
     trainable_remi = LFR(**fairness_info) >> LogisticRegression(
         max_iter=1000)
     self._attempt_remi_creditg_pd_num(fairness_info, trainable_remi, 0.95,
                                       1.05)
Beispiel #22
0
 def test_hyperparam_dual_penalty_solver_dependence(self):
     with self.assertRaises(jsonschema.ValidationError):
         lr = LogisticRegression(LogisticRegression.penalty.l2,
                                 LogisticRegression.solver.sag,
                                 dual=True)
Beispiel #23
0
 def test_sans_mitigation_pd_num(self):
     fairness_info = self.creditg_pd_num["fairness_info"]
     trainable_remi = LogisticRegression(max_iter=1000)
     self._attempt_remi_creditg_pd_num(fairness_info, trainable_remi, 0.5,
                                       1.0)
Beispiel #24
0
 def test_with_lale_classifiers(self):
     from lale.lib.sklearn import BaggingClassifier
     from lale.sklearn_compat import make_sklearn_compat
     clf = BaggingClassifier(base_estimator=LogisticRegression())
     trained = clf.fit(self.X_train, self.y_train)
     trained.predict(self.X_test)
Beispiel #25
0
 def test_optim_preproc_pd_cat(self):
     # TODO: set the optimizer options as shown in the example https://github.com/Trusted-AI/AIF360/blob/master/examples/demo_optim_data_preproc.ipynb
     fairness_info = self.creditg_pd_cat["fairness_info"]
     _ = OptimPreproc(**fairness_info,
                      optim_options={}) >> LogisticRegression(max_iter=1000)
    def test_with_lale_pipeline(self):
        from lale.lib.sklearn import BaggingClassifier

        clf = BaggingClassifier(base_estimator=PCA() >> LogisticRegression())
        trained = clf.fit(self.X_train, self.y_train)
        trained.predict(self.X_test)
Beispiel #27
0
 def test_sans_mitigation_pd_cat(self):
     fairness_info = self.creditg_pd_cat["fairness_info"]
     trainable_remi = self.prep_pd_cat >> LogisticRegression(max_iter=1000)
     self._attempt_remi_creditg_pd_cat(fairness_info, trainable_remi, 0.66,
                                       0.76)
 def test_hyperparam_keyword_enum(self):
     _ = LogisticRegression(LogisticRegression.penalty.l1,
                            C=0.1,
                            solver=LogisticRegression.solver.saga)
Beispiel #29
0
    def test_lr_parameters(self):
        pgo = PGO.load_pgo_file(example_pgo_fp)

        lr = LogisticRegression()
        parameters: SearchSpace = hyperopt_search_space(lr, pgo=pgo)
Beispiel #30
0
 def test_two_estimators_predict_proba(self):
     pipeline = (StandardScaler() >>
                 (PCA() & Nystroem() & LogisticRegression()) >>
                 ConcatFeatures() >> NoOp() >> LogisticRegression())
     trained = pipeline.fit(self.X_train, self.y_train)
     trained.predict_proba(self.X_test)