コード例 #1
0
 def __init__(self,
              estimator=None,
              max_evals=50,
              algo='tpe',
              cv=5,
              handle_cv_failure=False,
              scoring='accuracy',
              best_score=0.0,
              max_opt_time=None,
              max_eval_time=None,
              pgo: Optional[PGO] = None,
              show_progressbar=True,
              args_to_scorer=None,
              verbose=False):
     self.max_evals = max_evals
     if estimator is None:
         self.estimator = LogisticRegression()
     else:
         self.estimator = estimator
     self.search_space = hyperopt.hp.choice(
         'meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)])
     self.algo = algo
     self.scoring = scoring
     self.best_score = best_score
     self.handle_cv_failure = handle_cv_failure
     self.cv = cv
     self._trials = hyperopt.Trials()
     self.max_opt_time = max_opt_time
     self.max_eval_time = max_eval_time
     self.show_progressbar = show_progressbar
     if args_to_scorer is not None:
         self.args_to_scorer = args_to_scorer
     else:
         self.args_to_scorer = {}
     self.verbose = verbose
コード例 #2
0
ファイル: hyperopt_regressor.py プロジェクト: kant/lale
 def __init__(self,
              model=None,
              max_evals=50,
              handle_cv_failure=False,
              pgo: Optional[PGO] = None):
     self.max_evals = max_evals
     if model is None:
         self.model = RandomForestRegressor
     else:
         self.model = model
     self.search_space = hp.choice(
         'meta_model', [hyperopt_search_space(self.model, pgo=pgo)])
     self.handle_cv_failure = handle_cv_failure
     self.trials = Trials()
コード例 #3
0
ファイル: hyperopt_classifier.py プロジェクト: kant/lale
    def __init__(self,
                 model=None,
                 max_evals=50,
                 cv=5,
                 handle_cv_failure=False,
                 pgo: Optional[PGO] = None):
        """ Instantiate the HyperoptClassifier that will use the given model and other parameters to select the 
        best performing trainable instantiation of the model. This optimizer uses negation of accuracy_score 
        as the performance metric to be minimized by Hyperopt.

        Parameters
        ----------
        model : lale.operators.IndividualOp or lale.operators.Pipeline, optional
            A valid Lale individual operator or pipeline, by default None
        max_evals : int, optional
            Number of trials of Hyperopt search, by default 50
        cv : an integer or an object that has a split function as a generator yielding (train, test) splits as arrays of indices.
            Integer value is used as number of folds in sklearn.model_selection.StratifiedKFold, default is 5.
            Note that any of the iterators from https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators can be used here.
            The fit method performs cross validation on the input dataset for per trial, 
            and uses the mean cross validation performance for optimization. This behavior is also impacted by handle_cv_failure flag, 
            by default 5
        handle_cv_failure : bool, optional
            A boolean flag to indicating how to deal with cross validation failure for a trial.
            If True, the trial is continued by doing a 80-20 percent train-validation split of the dataset input to fit
            and reporting the accuracy on the validation part.
            If False, the trial is terminated by assigning accuracy to zero.
            , by default False
        pgo : Optional[PGO], optional
            [description], by default None
        
        Raises
        ------
        e
            [description]
        """
        self.max_evals = max_evals
        if model is None:
            self.model = LogisticRegression
        else:
            self.model = model
        self.search_space = hp.choice(
            'meta_model', [hyperopt_search_space(self.model, pgo=pgo)])
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self.trials = Trials()
コード例 #4
0
ファイル: hyperopt_regressor.py プロジェクト: yutarochan/lale
 def __init__(self,
              estimator=None,
              max_evals=50,
              cv=5,
              handle_cv_failure=False,
              scoring='r2',
              best_score=1.0,
              max_opt_time=None,
              pgo: Optional[PGO] = None):
     self.max_evals = max_evals
     if estimator is None:
         self.estimator = RandomForestRegressor
     else:
         self.estimator = estimator
     self.search_space = hp.choice(
         'meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)])
     self.scoring = scoring
     self.best_score = best_score
     self.handle_cv_failure = handle_cv_failure
     self.cv = cv
     self.trials = Trials()
     self.max_opt_time = max_opt_time
コード例 #5
0
    def __init__(self,
                 estimator=None,
                 max_evals=50,
                 cv=5,
                 handle_cv_failure=False,
                 scoring='accuracy',
                 best_score=0.0,
                 max_opt_time=None,
                 pgo: Optional[PGO] = None):
        """ Instantiate the HyperoptClassifier that will use the given estimator and other parameters to select the 
        best performing trainable instantiation of the estimator. This optimizer uses negation of accuracy_score 
        as the performance metric to be minimized by Hyperopt.

        Parameters
        ----------
        estimator : lale.operators.IndividualOp or lale.operators.Pipeline, optional
            A valid Lale individual operator or pipeline, by default None
        max_evals : int, optional
            Number of trials of Hyperopt search, by default 50
        cv : an integer or an object that has a split function as a generator yielding (train, test) splits as arrays of indices.
            Integer value is used as number of folds in sklearn.model_selection.StratifiedKFold, default is 5.
            Note that any of the iterators from https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators can be used here.
            The fit method performs cross validation on the input dataset for per trial, 
            and uses the mean cross validation performance for optimization. This behavior is also impacted by handle_cv_failure flag, 
            by default 5
        handle_cv_failure : bool, optional
            A boolean flag to indicating how to deal with cross validation failure for a trial.
            If True, the trial is continued by doing a 80-20 percent train-validation split of the dataset input to fit
            and reporting the score on the validation part.
            If False, the trial is terminated by assigning accuracy to zero.
            , by default False
        scoring: string or a scorer object created using 
            https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html#sklearn.metrics.make_scorer.
            A string from sklearn.metrics.SCORERS.keys() can be used or a scorer created from one of 
            sklearn.metrics (https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics).
            A completely custom scorer object can be created from a python function following the example at 
            https://scikit-learn.org/stable/modules/model_evaluation.html
            The metric has to return a scalar value, and note that scikit-learns's scorer object always returns values such that
            higher score is better. Since Hyperopt solves a minimization problem, we negate the score value to pass to Hyperopt.
            by default 'accuracy'.
        best_score : float, optional
            The best score for the specified scorer. This allows us to return a loss to hyperopt that is
            greater than equal to zero, where zero is the best loss. By default, this is set to zero to
            follow current behavior.
        max_opt_time : float, optional
            Maximum amout of time in seconds for the optimization. By default, None, implying no runtime
            bound.
        pgo : Optional[PGO], optional
            [description], by default None
        
        Raises
        ------
        e
            [description]

        Examples
        --------
        >>> from sklearn.metrics import make_scorer, f1_score, accuracy_score
        >>> lr = LogisticRegression()
        >>> clf = HyperoptClassifier(estimator=lr, scoring='accuracy', cv=5, max_evals=2)
        >>> from sklearn import datasets
        >>> diabetes = datasets.load_diabetes()
        >>> X = diabetes.data[:150]
        >>> y = diabetes.target[:150]
        >>> trained = clf.fit(X, y)
        >>> predictions = trained.predict(X)

        Other scoring metrics:

        >>> clf = HyperoptClassifier(estimator=lr, scoring=make_scorer(f1_score, average='macro'), cv=3, max_evals=2)

        """
        self.max_evals = max_evals
        if estimator is None:
            self.estimator = LogisticRegression
        else:
            self.estimator = estimator
        self.search_space = hp.choice(
            'meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)])
        self.scoring = scoring
        self.best_score = best_score
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self.trials = Trials()
        self.max_opt_time = max_opt_time
コード例 #6
0
ファイル: test_optimizers.py プロジェクト: sks95/lale
 def test_hyperparam_overriding_with_hyperopt(self):
     pca1 = PCA(n_components=3)
     pca2 = PCA()
     search_space1 = hyperopt_search_space(pca1)
     search_space2 = hyperopt_search_space(pca2)
     self.assertNotEqual(search_space1, search_space2)
コード例 #7
0
    def test_lr_parameters(self):
        pgo = PGO.load_pgo_file(example_pgo_fp)

        lr = LogisticRegression()
        parameters: SearchSpace = hyperopt_search_space(lr, pgo=pgo)
コード例 #8
0
    def fit(self, X_train, y_train):
        opt_start_time = time.time()
        is_clf = self.estimator.is_classifier()
        self.cv = check_cv(self.cv, y = y_train, classifier=is_clf)
        data_schema = lale.helpers.fold_schema(
            X_train, y_train, self.cv, is_clf)
        self.search_space = hyperopt.hp.choice(
            'meta_model', [hyperopt_search_space(self.estimator, pgo=self.pgo,
                                                 data_schema=data_schema)])
        #Create a search space with default hyperparameters for all trainable parts of the pipeline. 
        #This search space is used for `frac_evals_with_defaults` fraction of the total trials. 
        try:
            self.search_space_with_defaults = hyperopt.hp.choice('meta_model', 
                                                [hyperopt_search_space(self.estimator.freeze_trainable(), 
                                                pgo=self.pgo, data_schema=data_schema)])
        except:
            logger.warning(f"Exception caught during generation of default search space, setting frac_evals_with_defaults to zero.")
            self.evals_with_defaults = 0

        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            trainable = create_instance_from_hyperopt_search_space(self.estimator, params)
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(trainable, X_train, y_train, cv=self.cv, scoring=self.scoring, args_to_scorer=self.args_to_scorer)
                logger.debug("Successful trial of hyperopt with hyperparameters:{}".format(params))
            except BaseException as e:
                #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score  = scorer(trained, X_validation, y_validation, **self.args_to_scorer)
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(e, trainable.to_json()))
                    raise e
            return cv_score, logloss, execution_time

        def merge_trials(trials1, trials2):
            max_tid = max([trial['tid'] for trial in trials1.trials])

            for trial in trials2:
                tid = trial['tid'] + max_tid + 1
                hyperopt_trial = hyperopt.Trials().new_trial_docs(
                        tids=[None],
                        specs=[None],
                        results=[None],
                        miscs=[None])
                hyperopt_trial[0] = trial
                hyperopt_trial[0]['tid'] = tid
                hyperopt_trial[0]['misc']['tid'] = tid
                for key in hyperopt_trial[0]['misc']['idxs'].keys():
                    hyperopt_trial[0]['misc']['idxs'][key] = [tid]
                trials1.insert_trial_docs(hyperopt_trial) 
                trials1.refresh()
            return trials1
            
        def proc_train_test(params, X_train, y_train, return_dict):
            return_dict['params'] = copy.deepcopy(params)
            try:
                score, logloss, execution_time = hyperopt_train_test(params, X_train=X_train, y_train=y_train)
                return_dict['loss'] = self.best_score - score
                return_dict['time'] = execution_time
                return_dict['log_loss'] = logloss
                return_dict['status'] = hyperopt.STATUS_OK
            except BaseException as e:
                logger.warning(f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}, setting status to FAIL")
                return_dict['status'] = hyperopt.STATUS_FAIL
                return_dict['error_msg'] = f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}"
                if self.verbose:
                    print(return_dict['error_msg'])

        def get_final_trained_estimator(params, X_train, y_train):
            warnings.filterwarnings("ignore")
            trainable = create_instance_from_hyperopt_search_space(self.estimator, params)
            trained = trainable.fit(X_train, y_train)
            return trained

        def f(params):
            current_time = time.time()
            if (self.max_opt_time is not None) and ((current_time - opt_start_time) > self.max_opt_time) :
                # if max optimization time set, and we have crossed it, exit optimization completely
                sys.exit(0)
            if self.max_eval_time:
                # Run hyperopt in a subprocess that can be interupted
                manager = multiprocessing.Manager()
                proc_dict = manager.dict()
                p = multiprocessing.Process(
                    target=proc_train_test,
                    args=(params, X_train, y_train, proc_dict))
                p.start()
                p.join(self.max_eval_time)
                if p.is_alive():
                    p.terminate()
                    p.join()
                    logger.warning(f"Maximum alloted evaluation time exceeded. with hyperparams: {params}, setting status to FAIL")
                    proc_dict['status'] = hyperopt.STATUS_FAIL
                if 'status' not in proc_dict:
                    logger.warning(f"Corrupted results, setting status to FAIL")
                    proc_dict['status'] = hyperopt.STATUS_FAIL
            else:
                proc_dict = {}
                proc_train_test(params, X_train, y_train, proc_dict)
            return proc_dict

        algo = getattr(hyperopt, self.algo)
        #Search in the search space with defaults
        if self.evals_with_defaults > 0:
            try:
                hyperopt.fmin(f, self.search_space_with_defaults, algo=algo.suggest, max_evals=self.evals_with_defaults, trials=self._default_trials, rstate=np.random.RandomState(SEED),
                show_progressbar=self.show_progressbar)
            except SystemExit :
                logger.warning('Maximum alloted optimization time exceeded. Optimization exited prematurely')
            except AllTrialsFailed:
                self._best_estimator = None
                if hyperopt.STATUS_OK not in self._trials.statuses():
                    raise ValueError('Error from hyperopt, none of the trials succeeded.')

        try :
            hyperopt.fmin(f, self.search_space, algo=algo.suggest, max_evals=self.max_evals-self.evals_with_defaults, trials=self._trials, rstate=np.random.RandomState(SEED),
            show_progressbar=self.show_progressbar)
        except SystemExit :
            logger.warning('Maximum alloted optimization time exceeded. Optimization exited prematurely')
        except AllTrialsFailed:
            self._best_estimator = None
            if hyperopt.STATUS_OK not in self._trials.statuses():
                raise ValueError('Error from hyperopt, none of the trials succeeded.')

        self._trials = merge_trials(self._trials, self._default_trials)
        try :
            best_trial = self._trials.best_trial
            val_loss = self._trials.best_trial['result']['loss']
            if len(self._default_trials) > 0:
                default_val_loss = self._default_trials.best_trial['result']['loss']
                if default_val_loss < val_loss:
                    best_trial = self._default_trials.best_trial
            best_params = best_trial['result']['params']            
            logger.info(
                'best score: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'.format(
                    self.best_score - self._trials.average_best_error(), self.max_evals, best_params
                )
            )
            trained = get_final_trained_estimator(best_params, X_train, y_train)
            self._best_estimator = trained
        except BaseException as e :
            logger.warning('Unable to extract the best parameters from optimization, the error: {}'.format(e))
            self._best_estimator = None

        return self
コード例 #9
0
 def test_hyperparam_defaults(self):
     trainable = J48()
     hyperopt_search_space(trainable)
コード例 #10
0
    del params['name']

    clf = get_classifier(t, params)
    clf_trained = clf.fit(X_train, y_train)
    predictions = clf_trained.predict(X_test)
    accuracy = accuracy_score(y_test, [round(pred) for pred in predictions])
    return accuracy

def get_classifier(t, param_dict):
    if 'LogisticRegression' in t:
        clf = LogisticRegression(**param_dict)
    else:
        return 0
    return clf

search_space = hp.choice('classifier', [hyperopt_search_space(LogisticRegression)])

count = 0
best = 0
def f(params):
    global best, count
    count += 1
    acc = hyperopt_train_test(params.copy())
    if acc > best:
        print('new best:', acc, 'using', params['name'])
        best = acc
    if count % 1 == 0:
        print('iters:', count, ', acc:', acc, 'using', params)
    return {'loss': -acc, 'status': STATUS_OK}

trials = Trials()