コード例 #1
0
    def fit(self, X_train, y_train):
        self.cv = check_cv(
            self.cv, y=y_train, classifier=True
        )  #TODO: Replace the classifier flag value by using tags?

        def smac_train_test(trainable, X_train, y_train):
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    trainable,
                    X_train,
                    y_train,
                    cv=self.cv,
                    scoring=self.scoring)
                logger.debug("Successful trial of SMAC")
            except BaseException as e:
                #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(
                        X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score = scorer(trained, X_validation, y_validation)
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation,
                                           y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug("Error {} with pipeline:{}".format(
                        e, trainable.to_json()))
                    raise e
            return cv_score, logloss, execution_time

        def f(trainable):
            return_dict = {}
            try:
                score, logloss, execution_time = smac_train_test(
                    trainable, X_train=X_train, y_train=y_train)
                return_dict = {
                    'loss': self.best_score - score,
                    'time': execution_time,
                    'log_loss': logloss
                }
            except BaseException as e:
                logger.warning(
                    f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT."
                )
                raise e
            return return_dict['loss']

        try:
            smac = orig_SMAC(scenario=self.scenario,
                             rng=np.random.RandomState(42),
                             tae_runner=lale_op_smac_tae(self.estimator, f))
            incumbent = smac.optimize()
            self.trials = smac.get_runhistory()
            trainable = lale_trainable_op_from_config(self.estimator,
                                                      incumbent)
            #get the trainable corresponding to the best params and train it on the entire training dataset.
            trained = trainable.fit(X_train, y_train)
            self._best_estimator = trained
        except BudgetExhaustedException:
            logger.warning(
                'Maximum alloted optimization time exceeded. Optimization exited prematurely'
            )
        except BaseException as e:
            logger.warning('Error during optimization: {}'.format(e))
            self._best_estimator = None

        return self
コード例 #2
0
ファイル: test_optimizers.py プロジェクト: sks95/lale
def iris_fmin_tae(op, num_folds=5):
    return lale_op_smac_tae(op, iris_f_min_for_folds(num_folds=num_folds))
コード例 #3
0
ファイル: smac.py プロジェクト: shinnar/lale
    def fit(self, X_train, y_train):
        data_schema = lale.helpers.fold_schema(X_train, y_train, self.cv,
                                               self.estimator.is_classifier())
        self.search_space: ConfigurationSpace = get_smac_space(
            self.estimator,
            lale_num_grids=self.lale_num_grids,
            data_schema=data_schema)
        # Scenario object
        scenario_options = {
            "run_obj": "quality",  # optimize quality (alternatively runtime)
            "runcount-limit": self.max_evals,  # maximum function evaluations
            "cs": self.search_space,  # configuration space
            "deterministic": "true",
            "abort_on_first_run_crash": False,
        }
        if self.max_opt_time is not None:
            scenario_options["wallclock_limit"] = self.max_opt_time
        self.scenario = Scenario(scenario_options)

        self.cv = check_cv(self.cv,
                           y=y_train,
                           classifier=self.estimator.is_classifier())

        def smac_train_test(trainable, X_train, y_train):
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    trainable,
                    X_train,
                    y_train,
                    cv=self.cv,
                    scoring=self.scoring)
                logger.debug("Successful trial of SMAC")
            except BaseException as e:
                # If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    (
                        X_train_part,
                        X_validation,
                        y_train_part,
                        y_validation,
                    ) = train_test_split(X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score = scorer(trained, X_validation, y_validation)
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation,
                                           y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug("Error {} with pipeline:{}".format(
                        e, trainable.to_json()))
                    raise e
            return cv_score, logloss, execution_time

        def f(trainable):
            return_dict = {}
            try:
                score, logloss, execution_time = smac_train_test(
                    trainable, X_train=X_train, y_train=y_train)
                return_dict = {
                    "loss": self.best_score - score,
                    "time": execution_time,
                    "log_loss": logloss,
                }
            except BaseException as e:
                logger.warning(
                    f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT."
                )
                raise e
            return return_dict["loss"]

        try:
            smac = orig_SMAC(
                scenario=self.scenario,
                rng=np.random.RandomState(42),
                tae_runner=lale_op_smac_tae(self.estimator, f),
            )
            incumbent = smac.optimize()
            self.trials = smac.get_runhistory()
            trainable = lale_trainable_op_from_config(self.estimator,
                                                      incumbent)
            # get the trainable corresponding to the best params and train it on the entire training dataset.
            trained = trainable.fit(X_train, y_train)
            self._best_estimator = trained
        except BudgetExhaustedException:
            logger.warning(
                "Maximum alloted optimization time exceeded. Optimization exited prematurely"
            )
        except BaseException as e:
            logger.warning("Error during optimization: {}".format(e))
            self._best_estimator = None

        return self
コード例 #4
0
    def dont_test_car_smac(self):
        import numpy as np

        from lale.datasets.auto_weka import fetch_car
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.preprocessing import LabelEncoder
        import pandas as pd
        from lale.lib.weka import J48
        from lalegpl.lib.r import ArulesCBAClassifier 
        from lale.operators import make_pipeline
        from lale.lib.lale import HyperoptClassifier
        from lale.lib.sklearn import LogisticRegression, KNeighborsClassifier
        from smac.scenario.scenario import Scenario
        from smac.facade.smac_facade import SMAC
        from smac.configspace import ConfigurationSpace


        (X_train, y_train), (X_test, y_test) = fetch_car()
        y_name = y_train.name
        le = LabelEncoder()
        y_train = le.fit_transform(y_train)
        y_test = le.transform(y_test)

        y_train = pd.Series(y_train, name=y_name)
        y_test = pd.Series(y_test, name=y_name)

#        planned_pipeline = make_pipeline(J48() | ArulesCBAClassifier() | LogisticRegression() | KNeighborsClassifier())
        planned_pipeline = make_pipeline(ArulesCBAClassifier() | KNeighborsClassifier() | LogisticRegression())

        cs:ConfigurationSpace = get_smac_space(planned_pipeline)
        print(cs)
#        X_train = X_train[0:20]
#        y_train = y_train[0:20]
        # Scenario object
        run_count_limit = 1
        scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternatively runtime)
                            "runcount-limit": run_count_limit,  # maximum function evaluations
                            "cs": cs,               # configuration space
                            "deterministic": "true",
                            "abort_on_first_run_crash": False
                            })

        # Optimize, using a SMAC-object
        def f_min(op): 
            return test_f_min(op, X_train, y_train, num_folds=2)
        tae = lale_op_smac_tae(planned_pipeline, f_min)

        print("Optimizing! Depending on your machine, this might take a few minutes.")
        smac = SMAC(scenario=scenario, rng=np.random.RandomState(42),
                tae_runner=tae)

        incumbent = smac.optimize()
        trainable_pipeline = lale_trainable_op_from_config(planned_pipeline, incumbent)
        trained_pipeline = trainable_pipeline.fit(X_train, y_train)
        pred = trained_pipeline.predict(X_test)
        accuracy = accuracy_score(y_test, pred)
        print("Accuracy: %.2f" % (accuracy))
        inc_value = tae(incumbent)

        print("Optimized Value: %.2f" % (inc_value))
        print(f"Run count limit: {run_count_limit}")