Beispiel #1
0
 def test_basic(self):
     bandit = self._bandit_cls()
     algo = Random(bandit)
     trials = Trials()
     experiment = Experiment(trials, algo, async=False)
     experiment.max_queue_len = 50
     experiment.run(self._n_steps)
     print
     print self._bandit_cls
     print bandit.loss_target
     print trials.average_best_error(bandit)
     assert trials.average_best_error(bandit) - bandit.loss_target  < .2
     print
Beispiel #2
0
 def test_basic(self):
     bandit = self._bandit_cls()
     print 'bandit params', bandit.params
     algo = Random(bandit)
     print 'algo params', algo.vh.params
     trials = Trials()
     experiment = Experiment(trials, algo, async=False)
     experiment.catch_bandit_exceptions = False
     experiment.max_queue_len = 50
     experiment.run(self._n_steps)
     print
     print self._bandit_cls
     print bandit.loss_target
     print trials.average_best_error(bandit)
     assert trials.average_best_error(bandit) - bandit.loss_target  < .2
     print
 def test_basic(self):
     bandit = self._bandit_cls()
     print 'bandit params', bandit.params
     algo = Random(bandit)
     print 'algo params', algo.vh.params
     trials = Trials()
     experiment = Experiment(trials, algo, async=False)
     experiment.catch_bandit_exceptions = False
     experiment.max_queue_len = 50
     experiment.run(self._n_steps)
     print
     print self._bandit_cls
     print bandit.loss_target
     print trials.average_best_error(bandit)
     assert trials.average_best_error(bandit) - bandit.loss_target < .2
     print
def opt_method(hsidata, initializers, resdir, max_evals):
    dataset_name = hsidata.dataset_name

    __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))

    configpath = os.path.join(__location__, 'datasets.cfg')
    parser = ConfigParser()
    parser.read(configpath)
    max_iter = parser.getint(dataset_name, 'max_iter')

    def objective_func(hsidata, hyperpars):

        Y = hsidata.data
        ref_endmembers = hsidata.ref_endmembers
        initializer = hyperpars.pop('initializer')
        init_endmembers = initials[initializer][0]
        init_abundances = initials[initializer][1]

        A, S, J, SAD = lhalf(ref_endmembers, init_endmembers,
                             init_abundances, Y, **hyperpars, verbose=True)

        MSE = mse(Y, A, np.transpose(S))
        S = S.reshape(hsidata.n_rows, hsidata.n_cols, hsidata.n_endmembers).transpose((1, 0, 2))
        results = {'endmembers': A, 'abundances': S, 'loss': J, 'SAD': SAD, 'MSE': MSE}
        loss = SAD[-1] * (1 + np.std(np.sum(S, -1).flatten())) * (1 + np.abs(1 - np.mean(np.sum(S, -1).flatten())))
        return {'loss': loss, 'status': STATUS_OK, 'attachments': results}

    initials = {}
    initial_keys = []
    for key, value in initializers.items():
        initial_keys.append(key)
        initials[key] = (hsidata.initialize(value))

    space = {
        'max_iter': max_iter,
        'q': hp.uniform('lhalf_' + dataset_name + '_q', 0, 1),
        'delta': hp.lognormal('lhalf_' + dataset_name + '_delta', 0, 2),
        'initializer': hp.choice('lhalf_' + dataset_name + '_initializer', initializers)
    }


    h = [hp.lognormal('lhalf_' + dataset_name + '_h' + str(i), 0, 1) for i in range(hsidata.n_endmembers)]

    space['h'] = h

    trials = Trials()

    pars = fmin(lambda x: objective_func(hsidata, x),
                space=space,
                algo=tpe.suggest,
                max_evals=max_evals,
                trials=trials,
                rstate=np.random.RandomState(random_seed))

    improvements = reduce(improvement_only, trials.losses(), [])

    save_config(resdir, dataset_name, pars, trials.average_best_error())
    print(enumerate(initial_keys))
    return improvements, pars, trials
Beispiel #5
0
 def test_basic(self):
     bandit = self._bandit_cls()
     #print 'bandit params', bandit.params, bandit
     #print 'algo params', algo.vh.params
     trials = Trials()
     fmin(lambda x: x, bandit.expr,
          trials=trials,
          algo=suggest,
          max_evals=self._n_steps)
     assert trials.average_best_error(bandit) - bandit.loss_target  < .2
Beispiel #6
0
 def test_basic(self):
     domain = self._domain_cls()
     # print 'domain params', domain.params, domain
     # print 'algo params', algo.vh.params
     trials = Trials()
     fmin(lambda x: x, domain.expr,
          trials=trials,
          algo=suggest,
          max_evals=self._n_steps)
     assert trials.average_best_error(domain) - domain.loss_target < .2
Beispiel #7
0
class HyperoptRegressorImpl:
    def __init__(self,
                 estimator=None,
                 max_evals=50,
                 cv=5,
                 handle_cv_failure=False,
                 scoring='r2',
                 best_score=1.0,
                 max_opt_time=None,
                 pgo: Optional[PGO] = None):
        self.max_evals = max_evals
        if estimator is None:
            self.estimator = RandomForestRegressor
        else:
            self.estimator = estimator
        self.search_space = hp.choice(
            'meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)])
        self.scoring = scoring
        self.best_score = best_score
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self.trials = Trials()
        self.max_opt_time = max_opt_time

    def fit(self, X_train, y_train):
        opt_start_time = time.time()

        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            reg = create_instance_from_hyperopt_search_space(
                self.estimator, params)
            try:
                cv_score, _, execution_time = cross_val_score_track_trials(
                    reg,
                    X_train,
                    y_train,
                    cv=KFold(self.cv),
                    scoring=self.scoring)
                logger.debug("Successful trial of hyperopt")
            except BaseException as e:
                #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(
                        X_train, y_train, test_size=0.20)
                    start = time.time()
                    reg_trained = reg.fit(X_train_part, y_train_part)
                    scorer = check_scoring(reg, scoring=self.scoring)
                    cv_score = scorer(reg_trained, X_validation, y_validation)
                    execution_time = time.time() - start
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(
                        e, reg.to_json()))
                    raise e

            return cv_score, execution_time

        def get_final_trained_reg(params, X_train, y_train):
            warnings.filterwarnings("ignore")
            reg = create_instance_from_hyperopt_search_space(
                self.estimator, params)
            reg = reg.fit(X_train, y_train)
            return reg

        def f(params):
            current_time = time.time()
            if (self.max_opt_time is not None) and (
                (current_time - opt_start_time) > self.max_opt_time):
                # if max optimization time set, and we have crossed it, exit optimization completely
                sys.exit(0)

            return_dict = {}
            try:
                score, execution_time = hyperopt_train_test(params,
                                                            X_train=X_train,
                                                            y_train=y_train)
                return_dict = {
                    'loss': self.best_score - score,
                    'time': execution_time,
                    'status': STATUS_OK
                }
            except BaseException as e:

                logger.warning(
                    f'Exception caught in HyperoptRegressor: {type(e)}, {traceback.format_exc()} with hyperparams: {params}, setting loss to zero'
                )
                return_dict = {'status': STATUS_FAIL}
            return return_dict

        try:
            fmin(f,
                 self.search_space,
                 algo=tpe.suggest,
                 max_evals=self.max_evals,
                 trials=self.trials,
                 rstate=np.random.RandomState(SEED))
        except SystemExit:
            logger.warning(
                'Maximum alloted optimization time exceeded. Optimization exited prematurely'
            )

        try:
            best_params = space_eval(self.search_space, self.trials.argmin)
            logger.info(
                'best accuracy: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'
                .format(-1 * self.trials.average_best_error(), self.max_evals,
                        best_params))
            trained_reg = get_final_trained_reg(best_params, X_train, y_train)
            self.best_estimator = trained_reg
        except BaseException as e:
            logger.warning(
                'Unable to extract the best parameters from optimization, the error: {}'
                .format(e))
            trained_reg = None

        return self

    def predict(self, X_eval):
        import warnings
        warnings.filterwarnings("ignore")
        reg = self.best_estimator
        try:
            predictions = reg.predict(X_eval)
        except ValueError as e:
            logger.warning(
                "ValueError in predicting using regressor:{}, the error is:{}".
                format(reg, e))
            predictions = None

        return predictions

    def get_trials(self):
        return self.trials
Beispiel #8
0
class HyperoptClassifierImpl:
    def __init__(self,
                 estimator=None,
                 max_evals=50,
                 cv=5,
                 handle_cv_failure=False,
                 scoring='accuracy',
                 best_score=0.0,
                 max_opt_time=None,
                 pgo: Optional[PGO] = None):
        """ Instantiate the HyperoptClassifier that will use the given estimator and other parameters to select the 
        best performing trainable instantiation of the estimator. This optimizer uses negation of accuracy_score 
        as the performance metric to be minimized by Hyperopt.

        Parameters
        ----------
        estimator : lale.operators.IndividualOp or lale.operators.Pipeline, optional
            A valid Lale individual operator or pipeline, by default None
        max_evals : int, optional
            Number of trials of Hyperopt search, by default 50
        cv : an integer or an object that has a split function as a generator yielding (train, test) splits as arrays of indices.
            Integer value is used as number of folds in sklearn.model_selection.StratifiedKFold, default is 5.
            Note that any of the iterators from https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators can be used here.
            The fit method performs cross validation on the input dataset for per trial, 
            and uses the mean cross validation performance for optimization. This behavior is also impacted by handle_cv_failure flag, 
            by default 5
        handle_cv_failure : bool, optional
            A boolean flag to indicating how to deal with cross validation failure for a trial.
            If True, the trial is continued by doing a 80-20 percent train-validation split of the dataset input to fit
            and reporting the score on the validation part.
            If False, the trial is terminated by assigning accuracy to zero.
            , by default False
        scoring: string or a scorer object created using 
            https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html#sklearn.metrics.make_scorer.
            A string from sklearn.metrics.SCORERS.keys() can be used or a scorer created from one of 
            sklearn.metrics (https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics).
            A completely custom scorer object can be created from a python function following the example at 
            https://scikit-learn.org/stable/modules/model_evaluation.html
            The metric has to return a scalar value, and note that scikit-learns's scorer object always returns values such that
            higher score is better. Since Hyperopt solves a minimization problem, we negate the score value to pass to Hyperopt.
            by default 'accuracy'.
        best_score : float, optional
            The best score for the specified scorer. This allows us to return a loss to hyperopt that is
            greater than equal to zero, where zero is the best loss. By default, this is set to zero to
            follow current behavior.
        max_opt_time : float, optional
            Maximum amout of time in seconds for the optimization. By default, None, implying no runtime
            bound.
        pgo : Optional[PGO], optional
            [description], by default None
        
        Raises
        ------
        e
            [description]

        Examples
        --------
        >>> from sklearn.metrics import make_scorer, f1_score, accuracy_score
        >>> lr = LogisticRegression()
        >>> clf = HyperoptClassifier(estimator=lr, scoring='accuracy', cv=5, max_evals=2)
        >>> from sklearn import datasets
        >>> diabetes = datasets.load_diabetes()
        >>> X = diabetes.data[:150]
        >>> y = diabetes.target[:150]
        >>> trained = clf.fit(X, y)
        >>> predictions = trained.predict(X)

        Other scoring metrics:

        >>> clf = HyperoptClassifier(estimator=lr, scoring=make_scorer(f1_score, average='macro'), cv=3, max_evals=2)

        """
        self.max_evals = max_evals
        if estimator is None:
            self.estimator = LogisticRegression
        else:
            self.estimator = estimator
        self.search_space = hp.choice(
            'meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)])
        self.scoring = scoring
        self.best_score = best_score
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self.trials = Trials()
        self.max_opt_time = max_opt_time

    def fit(self, X_train, y_train):
        opt_start_time = time.time()

        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            clf = create_instance_from_hyperopt_search_space(
                self.estimator, params)
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    clf, X_train, y_train, cv=self.cv, scoring=self.scoring)
                logger.debug("Successful trial of hyperopt")
            except BaseException as e:
                #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(
                        X_train, y_train, test_size=0.20)
                    start = time.time()
                    clf_trained = clf.fit(X_train_part, y_train_part)
                    #predictions = clf_trained.predict(X_validation)
                    scorer = check_scoring(clf, scoring=self.scoring)
                    cv_score = scorer(clf_trained, X_validation, y_validation)
                    execution_time = time.time() - start
                    y_pred_proba = clf_trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation,
                                           y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(
                        e, clf.to_json()))
                    raise e
            return cv_score, logloss, execution_time

        def get_final_trained_clf(params, X_train, y_train):
            warnings.filterwarnings("ignore")
            clf = create_instance_from_hyperopt_search_space(
                self.estimator, params)
            clf = clf.fit(X_train, y_train)
            return clf

        def f(params):
            current_time = time.time()
            if (self.max_opt_time is not None) and (
                (current_time - opt_start_time) > self.max_opt_time):
                # if max optimization time set, and we have crossed it, exit optimization completely
                sys.exit(0)

            params_to_save = copy.deepcopy(params)
            return_dict = {}
            try:
                score, logloss, execution_time = hyperopt_train_test(
                    params, X_train=X_train, y_train=y_train)
                return_dict = {
                    'loss': self.best_score - score,
                    'time': execution_time,
                    'log_loss': logloss,
                    'status': STATUS_OK,
                    'params': params_to_save
                }
            except BaseException as e:
                logger.warning(
                    "Exception caught in HyperoptClassifier:{}, setting status to FAIL"
                    .format(e))
                return_dict = {'status': STATUS_FAIL}
            return return_dict

        try:
            fmin(f,
                 self.search_space,
                 algo=tpe.suggest,
                 max_evals=self.max_evals,
                 trials=self.trials,
                 rstate=np.random.RandomState(SEED))
        except SystemExit:
            logger.warning(
                'Maximum alloted optimization time exceeded. Optimization exited prematurely'
            )

        try:
            best_params = space_eval(self.search_space, self.trials.argmin)
            logger.info(
                'best score: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'
                .format(self.best_score - self.trials.average_best_error(),
                        self.max_evals, best_params))
            trained_clf = get_final_trained_clf(best_params, X_train, y_train)
            self.best_estimator = trained_clf
        except BaseException as e:
            logger.warning(
                'Unable to extract the best parameters from optimization, the error: {}'
                .format(e))
            trained_clf = None

        return self

    def predict(self, X_eval):
        import warnings
        warnings.filterwarnings("ignore")
        clf = self.best_estimator
        try:
            predictions = clf.predict(X_eval)
        except ValueError as e:
            logger.warning(
                "ValueError in predicting using classifier:{}, the error is:{}"
                .format(clf, e))
            predictions = None

        return predictions

    def get_trials(self):
        return self.trials
class HyperoptRegressor():
    def __init__(self,
                 model=None,
                 max_evals=50,
                 cv=5,
                 handle_cv_failure=False,
                 pgo: Optional[PGO] = None):
        self.max_evals = max_evals
        if model is None:
            self.model = RandomForestRegressor
        else:
            self.model = model
        self.search_space = hp.choice(
            'meta_model', [hyperopt_search_space(self.model, pgo=pgo)])
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self.trials = Trials()

    def fit(self, X_train, y_train):
        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            reg = create_instance_from_hyperopt_search_space(
                self.model, params)
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    reg, X_train, y_train, cv=KFold(self.cv), scoring=r2_score)
                logger.debug("Successful trial of hyperopt")
            except BaseException as e:
                #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(
                        X_train, y_train, test_size=0.20)
                    start = time.time()
                    reg_trained = reg.fit(X_train_part, y_train_part)
                    predictions = reg_trained.predict(X_validation)
                    execution_time = time.time() - start
                    cv_score = r2_score(y_validation, predictions)
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(
                        e, reg.to_json()))
                    raise e

            return cv_score, logloss, execution_time

        def get_final_trained_reg(params, X_train, y_train):
            warnings.filterwarnings("ignore")
            reg = create_instance_from_hyperopt_search_space(
                self.model, params)
            reg = reg.fit(X_train, y_train)
            return reg

        def f(params):
            try:
                r_squared, logloss, execution_time = hyperopt_train_test(
                    params, X_train=X_train, y_train=y_train)
            except BaseException as e:
                logger.warning(
                    "Exception caught in HyperoptClassifer:{} with hyperparams:{}, setting accuracy to zero"
                    .format(e, params))
                r_squared = 0
                execution_time = 0
                logloss = 0
            return {
                'loss': -r_squared,
                'time': execution_time,
                'log_loss': logloss,
                'status': STATUS_OK
            }

        fmin(f,
             self.search_space,
             algo=tpe.suggest,
             max_evals=self.max_evals,
             trials=self.trials,
             rstate=np.random.RandomState(SEED))
        best_params = space_eval(self.search_space, self.trials.argmin)
        logger.info(
            'best accuracy: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'
            .format(-1 * self.trials.average_best_error(), self.max_evals,
                    best_params))
        trained_reg = get_final_trained_reg(best_params, X_train, y_train)

        return trained_reg

    def predict(self, X_eval):
        import warnings
        warnings.filterwarnings("ignore")
        reg = self.model
        try:
            predictions = reg.predict(X_eval)
        except ValueError as e:
            logger.warning(
                "ValueError in predicting using classifier:{}, the error is:{}"
                .format(reg, e))
            predictions = None

        return predictions

    def get_trials(self):
        return self.trials
Beispiel #10
0
class HyperoptImpl:

    def __init__(self, estimator=None, max_evals=50, cv=5, handle_cv_failure=False, 
                scoring='accuracy', best_score=0.0, max_opt_time=None, max_eval_time=None, 
                pgo:Optional[PGO]=None, show_progressbar=True, args_to_scorer=None,
                verbose=False):
        self.max_evals = max_evals
        if estimator is None:
            self.estimator = LogisticRegression()
        else:
            self.estimator = estimator
        self.search_space = hp.choice('meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)])
        self.scoring = scoring
        self.best_score = best_score
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self._trials = Trials()
        self.max_opt_time = max_opt_time
        self.max_eval_time = max_eval_time
        self.show_progressbar = show_progressbar
        if args_to_scorer is not None:
            self.args_to_scorer = args_to_scorer
        else:
            self.args_to_scorer = {}
        self.verbose = verbose


    def fit(self, X_train, y_train):
        opt_start_time = time.time()
        self.cv = check_cv(self.cv, y = y_train, classifier=True) #TODO: Replace the classifier flag value by using tags?
        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            trainable = create_instance_from_hyperopt_search_space(self.estimator, params)
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(trainable, X_train, y_train, cv=self.cv, scoring=self.scoring, args_to_scorer=self.args_to_scorer)
                logger.debug("Successful trial of hyperopt with hyperparameters:{}".format(params))
            except BaseException as e:
                #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score  = scorer(trained, X_validation, y_validation, **self.args_to_scorer)
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(e, trainable.to_json()))
                    raise e
            return cv_score, logloss, execution_time
            
            
        def proc_train_test(params, X_train, y_train, return_dict):
            return_dict['params'] = copy.deepcopy(params)
            try:
                score, logloss, execution_time = hyperopt_train_test(params, X_train=X_train, y_train=y_train)
                return_dict['loss'] = self.best_score - score
                return_dict['time'] = execution_time
                return_dict['log_loss'] = logloss
                return_dict['status'] = STATUS_OK
            except BaseException as e:
                logger.warning(f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}, setting status to FAIL")
                return_dict['status'] = STATUS_FAIL
                return_dict['error_msg'] = f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}"
                if self.verbose:
                    print(return_dict['error_msg'])

        def get_final_trained_estimator(params, X_train, y_train):
            warnings.filterwarnings("ignore")
            trainable = create_instance_from_hyperopt_search_space(self.estimator, params)
            trained = trainable.fit(X_train, y_train)
            return trained

        def f(params):
            current_time = time.time()
            if (self.max_opt_time is not None) and ((current_time - opt_start_time) > self.max_opt_time) :
                # if max optimization time set, and we have crossed it, exit optimization completely
                sys.exit(0)
            if self.max_eval_time:
                # Run hyperopt in a subprocess that can be interupted
                manager = multiprocessing.Manager()
                proc_dict = manager.dict()
                p = multiprocessing.Process(
                    target=proc_train_test,
                    args=(params, X_train, y_train, proc_dict))
                p.start()
                p.join(self.max_eval_time)
                if p.is_alive():
                    p.terminate()
                    p.join()
                    logger.warning(f"Maximum alloted evaluation time exceeded. with hyperparams: {params}, setting status to FAIL")
                    proc_dict['status'] = STATUS_FAIL
                if 'status' not in proc_dict:
                    logger.warning(f"Corrupted results, setting status to FAIL")
                    proc_dict['status'] = STATUS_FAIL
            else:
                proc_dict = {}
                proc_train_test(params, X_train, y_train, proc_dict)
            return proc_dict

        try :
            fmin(f, self.search_space, algo=tpe.suggest, max_evals=self.max_evals, trials=self._trials, rstate=np.random.RandomState(SEED),
            show_progressbar=self.show_progressbar)
        except SystemExit :
            logger.warning('Maximum alloted optimization time exceeded. Optimization exited prematurely')
        except AllTrialsFailed:
            self._best_estimator = None
            if STATUS_OK not in self._trials.statuses():
                raise ValueError('Error from hyperopt, none of the trials succeeded.')

        try :
            best_params = space_eval(self.search_space, self._trials.argmin)
            logger.info(
                'best score: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'.format(
                    self.best_score - self._trials.average_best_error(), self.max_evals, best_params
                )
            )
            trained = get_final_trained_estimator(best_params, X_train, y_train)
            self._best_estimator = trained
        except BaseException as e :
            logger.warning('Unable to extract the best parameters from optimization, the error: {}'.format(e))
            self._best_estimator = None

        return self

    def predict(self, X_eval):
        import warnings
        warnings.filterwarnings("ignore")
        if self._best_estimator is None:
            raise ValueError("Can not predict as the best estimator is None. Either an attempt to call `predict` "
        "before calling `fit` or all the trials during `fit` failed.")
        trained = self._best_estimator
        try:
            predictions = trained.predict(X_eval)
        except ValueError as e:
            logger.warning("ValueError in predicting using Hyperopt:{}, the error is:{}".format(trained, e))
            predictions = None

        return predictions

    def summary(self):
        """Table summarizing the trial results (ID, loss, time, log_loss, status).

Returns
-------
result : DataFrame"""
        def make_record(trial_dict):
            try:
                loss = trial_dict['result']['loss']
            except BaseException:
                loss = np.nan
            try:
                time = trial_dict['result']['time']
            except BaseException:
                time = '-'
            try:
                log_loss = trial_dict['result']['log_loss']
            except BaseException:
                log_loss = np.nan

            return {
                'name': f'p{trial_dict["tid"]}',
                'tid': trial_dict['tid'],
                'loss': trial_dict['result'].get('loss', float('nan')),
                'time': trial_dict['result'].get('time', float('nan')),
                'log_loss': trial_dict['result'].get('log_loss', float('nan')),
                'status': trial_dict['result']['status']}
        records = [make_record(td) for td in self._trials.trials]
        result = pd.DataFrame.from_records(records, index='name')
        return result

    def get_pipeline(self, pipeline_name=None, astype='lale'):
        """Retrieve one of the trials.

Parameters
----------
pipeline_name : union type, default None

    - string
        Key for table returned by summary(), return a trainable pipeline.

    - None
        When not specified, return the best trained pipeline found.

astype : 'lale' or 'sklearn', default 'lale'
    Type of resulting pipeline.

Returns
-------
result : Trained operator if best, trainable operator otherwise.
"""
        if pipeline_name is None:
            result = getattr(self, '_best_estimator', None)
        else:
            tid = int(pipeline_name[1:])
            params = self._trials.trials[tid]['result']['params']
            result = create_instance_from_hyperopt_search_space(
                self.estimator, params)
        if result is None or astype == 'lale':
            return result
        assert astype == 'sklearn', astype
        return result.export_to_sklearn_pipeline()
Beispiel #11
0
class TPEOptimization(BaseOptimization):
    def __init__(self, sorter, recording, gt_sorting, params_to_opt,
                 space=None, run_schedule=[100], metric='accuracy',
                 recdir=None, outfile=None, x0=None, y0=None):

        BaseOptimization.__init__(self, sorter=sorter, recording=recording,
                                  gt_sorting=gt_sorting,
                                  params_to_opt=params_to_opt,
                                  space=space, run_schedule=run_schedule,
                                  metric=metric, recdir=recdir, outfile=outfile,
                                  x0=y0, y0=y0)
        self.trials = Trials()
        self.space = self.define_space(space)

    def run(self):
        results = self.optimise(
                self.params_to_opt, self.function_wrapper, self.run_schedule)
        self.results_obj = results
        if self.outfile is not None:
            self.save_results(self.outfile)

    def optimise(self, parameter_definitions, function, run_schedule):
        start_time = time.time()
        best = hyperopt.fmin(function,
                             self.space,
                             algo=tpe.suggest,
                             max_evals=run_schedule[0],
                             trials=self.trials,
                             show_progressbar=False)

        results_obj = self.get_optimization_details()
        results_obj['time_taken'] = start_time - time.time()
        print("--- %s seconds ---" % (time.time() - start_time))
        return results_obj

    def define_space(self, space):
        if space is not None:
            return space
        space = {}
        for key, value in self.params_to_opt.items():
            if type(value) is list:
                space[key] = hp.choice(key, value)
            if type(value[0]) is int:
                space[key] = hp.quniform(key, value[0], value[1], 1)
            if type(value[0]) is float:
                space[key] = hp.uniform(key, value[0], value[1])
        return space

    def get_best_params(self):
        best_params = {}
        for key, value in self.params_to_opt.items():
            if type(value[0]) is int:
                best_params[key] = int(self.trials.best_trial['misc']['vals'][key][0])
            else:
                best_params[key] = self.trials.best_trial['misc']['vals'][key][0]
        return best_params

    def get_trials(self):
        return self.trials
    
    def get_optimization_details(self):
        results_obj = {}
        results_obj['optimal_params'] = self.get_best_params() #self.trials.best_trial['misc']['vals']
        results_obj['best_score'] = -self.trials.best_trial['result']['loss']
        results_obj['params_evaluated'] = self.trials.vals
        results_obj['scores'] = [t['result']['loss'] for t in self.trials.trials] #self.results_obj#[]
        results_obj['iter_min_found'] = self.trials.best_trial['tid']
        results_obj['trials'] = self.trials
        results_obj['avg_best_score'] = self.trials.average_best_error()
        results_obj['total_iter'] = self.iteration
        return results_obj

    def plot_convergence(self):
        ys = [t['result']['loss'] for t in self.trials.trials]

        plt.figure(figsize=(15, 3.5))
        ax = plt.gca()
        ax.grid()
        n_calls = len(ys)
        mins = [np.min(ys[:i]) for i in range(1, n_calls + 1)]
        ax.plot(range(1, n_calls + 1), mins, c='b', marker=".", markersize=12, lw=2)
        plt.xlabel('n_calls')
        plt.ylabel('min(-Accuracy)')
        plt.title('Convergence of TPE in sorting optimisation')

    def plot_histograms(self):

        parameters = list(trials.trials[0]['misc']['vals'].keys())
        n = len(parameters)
        cmap = plt.cm.jet
        for i, val in enumerate(parameters):
            xs = np.array([t['misc']['vals'][val] for t in self.trials.trials]).ravel()
            ys = [-t['result']['loss'] for t in trials.trials]

            ys = np.array(ys)
            plt.figure(figsize=(3, 3))
            plt.hist(xs)
            plt.title(val)
Beispiel #12
0
class HyperoptClassifier():
    def __init__(self,
                 model=None,
                 max_evals=50,
                 cv=5,
                 handle_cv_failure=False,
                 pgo: Optional[PGO] = None):
        """ Instantiate the HyperoptClassifier that will use the given model and other parameters to select the 
        best performing trainable instantiation of the model. This optimizer uses negation of accuracy_score 
        as the performance metric to be minimized by Hyperopt.

        Parameters
        ----------
        model : lale.operators.IndividualOp or lale.operators.Pipeline, optional
            A valid Lale individual operator or pipeline, by default None
        max_evals : int, optional
            Number of trials of Hyperopt search, by default 50
        cv : an integer or an object that has a split function as a generator yielding (train, test) splits as arrays of indices.
            Integer value is used as number of folds in sklearn.model_selection.StratifiedKFold, default is 5.
            Note that any of the iterators from https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators can be used here.
            The fit method performs cross validation on the input dataset for per trial, 
            and uses the mean cross validation performance for optimization. This behavior is also impacted by handle_cv_failure flag, 
            by default 5
        handle_cv_failure : bool, optional
            A boolean flag to indicating how to deal with cross validation failure for a trial.
            If True, the trial is continued by doing a 80-20 percent train-validation split of the dataset input to fit
            and reporting the accuracy on the validation part.
            If False, the trial is terminated by assigning accuracy to zero.
            , by default False
        pgo : Optional[PGO], optional
            [description], by default None
        
        Raises
        ------
        e
            [description]
        """
        self.max_evals = max_evals
        if model is None:
            self.model = LogisticRegression
        else:
            self.model = model
        self.search_space = hp.choice(
            'meta_model', [hyperopt_search_space(self.model, pgo=pgo)])
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self.trials = Trials()

    def fit(self, X_train, y_train):
        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            clf = create_instance_from_hyperopt_search_space(
                self.model, params)
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(
                    clf, X_train, y_train, cv=self.cv)
                logger.debug("Successful trial of hyperopt")
            except BaseException as e:
                #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(
                        X_train, y_train, test_size=0.20)
                    start = time.time()
                    clf_trained = clf.fit(X_train_part, y_train_part)
                    predictions = clf_trained.predict(X_validation)
                    execution_time = time.time() - start
                    y_pred_proba = clf_trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation,
                                           y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                    cv_score = accuracy_score(
                        y_validation, [round(pred) for pred in predictions])
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(
                        e, clf.to_json()))
                    raise e
            #print("TRIALS")
            #print(json.dumps(self.get_trials().trials, default = myconverter, indent=4))
            return cv_score, logloss, execution_time

        def get_final_trained_clf(params, X_train, y_train):
            warnings.filterwarnings("ignore")
            clf = create_instance_from_hyperopt_search_space(
                self.model, params)
            clf = clf.fit(X_train, y_train)
            return clf

        def f(params):
            params_to_save = copy.deepcopy(params)
            try:
                acc, logloss, execution_time = hyperopt_train_test(
                    params, X_train=X_train, y_train=y_train)
            except BaseException as e:
                logger.warning(
                    "Exception caught in HyperoptClassifer:{}, setting accuracy to zero"
                    .format(e))
                acc = 0
                execution_time = 0
                logloss = 0
            return {
                'loss': -acc,
                'time': execution_time,
                'log_loss': logloss,
                'status': STATUS_OK,
                'params': params_to_save
            }

        fmin(f,
             self.search_space,
             algo=tpe.suggest,
             max_evals=self.max_evals,
             trials=self.trials,
             rstate=np.random.RandomState(SEED))
        best_params = space_eval(self.search_space, self.trials.argmin)
        logger.info(
            'best accuracy: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'
            .format(-1 * self.trials.average_best_error(), self.max_evals,
                    best_params))
        trained_clf = get_final_trained_clf(best_params, X_train, y_train)

        return trained_clf

    def predict(self, X_eval):
        import warnings
        warnings.filterwarnings("ignore")
        clf = self.model
        try:
            predictions = clf.predict(X_eval)
        except ValueError as e:
            logger.warning(
                "ValueError in predicting using classifier:{}, the error is:{}"
                .format(clf, e))
            predictions = None

        return predictions

    def get_trials(self):
        return self.trials
def opt_method(hsidata, initializers, resdir, max_evals):
    dataset_name = hsidata.dataset_name

    __location__ = os.path.realpath(
        os.path.join(os.getcwd(), os.path.dirname(__file__)))

    mleng = matlab.engine.start_matlab()
    mleng.addpath(__location__)

    configpath = os.path.join(__location__, 'datasets.cfg')
    parser = ConfigParser()
    parser.read(configpath)
    max_iter = parser.getint(dataset_name, 'max_iter')

    y = hsidata.data
    Y = matlab.double(y.tolist())
    ref_endmembers = matlab.double(hsidata.ref_endmembers.tolist())
    init_endmembers = matlab.double(hsidata.init_endmembers.tolist())
    init_abundances = matlab.double(hsidata.init_abundances.tolist())
    verbose = True

    def objective_func(hyperpars):

        output = mleng.lhalf(ref_endmembers,
                             init_endmembers,
                             init_abundances,
                             Y,
                             hyperpars['q'],
                             hyperpars['delta'],
                             hyperpars['h'],
                             hyperpars['max_iter'],
                             verbose,
                             nargout=5)
        A = np.array(output[0])
        S = np.array(output[1])
        try:
            J = np.array(output[2]).tolist()[0]
        except TypeError:
            J = [output[2]]
        SAD = np.array(output[3]).tolist()[0]

        MSE = mse(y, A, np.transpose(S))

        S = S.reshape(hsidata.n_rows, hsidata.n_cols,
                      hsidata.n_endmembers).transpose((1, 0, 2))

        results = {
            'endmembers': A,
            'abundances': S,
            'loss': J,
            'SAD': SAD,
            'MSE': MSE
        }

        return {'loss': SAD[-1], 'status': STATUS_OK, 'attachments': results}

    space = {
        'max_iter':
        max_iter,
        'q':
        scope.matlab_double(hp.uniform('lhalf_' + dataset_name + '_q', 0, 1)),
        'delta':
        scope.matlab_double(
            hp.uniform('lhalf_' + dataset_name + '_delta', 0, 1000))
    }

    h = scope.matlab_double([
        hp.uniform('lhalf_' + dataset_name + '_h' + str(i), 0, 1000)
        for i in range(hsidata.n_endmembers)
    ])

    space['h'] = h

    trials = Trials()

    pars = fmin(lambda x: objective_func(x),
                space=space,
                algo=tpe.suggest,
                max_evals=max_evals,
                trials=trials,
                rstate=np.random.RandomState(random_seed))

    mleng.quit()

    improvements = reduce(improvement_only, trials.losses(), [])

    save_config(resdir, dataset_name, pars, trials.average_best_error())

    return improvements, pars, trials