def __init__(self, estimator=None, max_evals=50, frac_evals_with_defaults = 0, algo='tpe', cv=5, handle_cv_failure=False, scoring='accuracy', best_score=0.0, max_opt_time=None, max_eval_time=None, pgo:Optional[PGO]=None, show_progressbar=True, args_to_scorer=None, verbose=False): self.max_evals = max_evals if estimator is None: self.estimator = LogisticRegression() else: self.estimator = estimator if frac_evals_with_defaults > 0: self.evals_with_defaults = int(frac_evals_with_defaults*max_evals) else: self.evals_with_defaults = 0 self.algo = algo self.scoring = scoring self.best_score = best_score self.handle_cv_failure = handle_cv_failure self.cv = cv self._trials = hyperopt.Trials() self._default_trials = hyperopt.Trials() self.max_opt_time = max_opt_time self.max_eval_time = max_eval_time self.pgo = pgo self.show_progressbar = show_progressbar if args_to_scorer is not None: self.args_to_scorer = args_to_scorer else: self.args_to_scorer = {} self.verbose = verbose
def slim(source: hyperopt.Trials) -> hyperopt.Trials: """ Strips trials to the basic values in order to pickle them """ _trials = hyperopt.Trials() for tid, trial in enumerate(source.trials): docs = hyperopt.Trials().new_trial_docs(tids=[trial['tid']], specs=[trial['spec']], results=[trial['result']], miscs=[trial['misc']]) _trials.insert_trial_docs(docs) _trials.refresh() return _trials
def test_har6(suggest=hp_gpsmbo.hpsuggest.suggest, seed=1, iters=10): # -- see shovel/hps.py for this test with debugging scaffolding # run it by typing e.g. # # shovel hps.run_har6 --seed=9 # # That should do a run that fails by only getting to -3.2 mins = [] for ii in range(int(seed), int(seed) + int(iters)): print 'SEED', ii space = { 'a': hp.uniform('a', 0, 1), 'b': hp.uniform('b', 0, 1), 'c': hp.uniform('c', 0, 1), 'x': hp.uniform('x', 0, 1), 'y': hp.uniform('y', 0, 1), 'z': hp.uniform('z', 0, 1), } trials = hyperopt.Trials() hyperopt.fmin(fn=har6.har6, space=space, trials=trials, algo=partial(suggest, stop_at=-3.32), rstate=np.random.RandomState(ii), max_evals=100) mins.append(min(trials.losses())) assert np.sum(mins > -3.32) < 3
def optimize(): save_trial = 1 max_trials = 1 space = { 'l1_reg': hyperopt.hp.choice('l1_reg', [0.001, 0.001, 0.001]), 'l2_reg': hyperopt.hp.choice('l2_reg', [0.001, 0.002]), 'learning_rate': hyperopt.hp.uniform('learning_rate', 0.0002, 0.001), 'num_layers': hyperopt.hp.choice('num_layers', [3, 4, 5]), 'layer_size': hyperopt.hp.choice('layer_size', [50, 60, 100]), 'batch_size': hyperopt.hp.choice('batch_size', [64]), 'dropout_keep_probability': hyperopt.hp.choice('dropout_keep_probability', [1]), 'validation_window': hyperopt.hp.choice('validation_window', [10]) } try: trials = pickle.load(open("trial_obj.pkl", "rb")) print("________Loading saved trials object__________") max_trials = len(trials.trials) + save_trial print("Rerunning from {} trials to {} (+{}) trials".format( len(trials.trials), max_trials, save_trial)) except: trials = hyperopt.Trials() best_model = hyperopt.fmin(objective, space, algo=hyperopt.tpe.suggest, trials=trials, max_evals=max_trials) with open("trial_obj.pkl", "wb") as f: pickle.dump(trials, f) print(best_model) print("*" * 150) print(hyperopt.space_eval(space, best_model)) print("*" * 150) f = open("trials.log", "w") for i, tr in enumerate(trials.trials): trail = tr['misc']['vals'] for key in trail.keys(): trail[key] = trail[key][0] f.write("Trail no. : %i\n" % i) f.write(str(hyperopt.space_eval(space, trail)) + "\n") f.write("Loss : " + str(1 - tr['result']['loss']) + ", ") f.write("Valid auc : " + str(1 - tr['result']['valid_auc']) + ", ") f.write("Train streaming accuracy : " + str(tr['result']['train_accu_str']) + ", ") f.write("Validation loss " + str(tr['result']['valid_loss'])) f.write("Train auc: " + str(tr['result']['train_auc']) + "\n") f.write("*" * 100 + "\n") f.close()
def tune(param_space: Dict[str, Any], objective_fn: Callable[[Dict[str, Any]], Dict[str, Any]], max_evaluations: int, spark_host: str) -> Tuple[Dict[str, Any], hyperopt.Trials]: start = time.time() if spark_host: import pyspark spark_session = pyspark.sql.SparkSession(pyspark.SparkContext(master=spark_host, appName=APP_NAME)) trials = hyperopt.SparkTrials(spark_session=spark_session) else: trials = hyperopt.Trials() best_params = hyperopt.fmin(objective_fn, param_space, algo=tpe.suggest, max_evals=max_evaluations, trials=trials, rstate=np.random.RandomState(1777)) evaluated_best_params = hyperopt.space_eval(param_space, best_params) losses = [x['result']['loss'] for x in trials.trials] logger.info('Score best parameters: %f', min(losses) * -1) logger.info('Best parameters: %s', evaluated_best_params) logger.info('Time elapsed: %s', time.strftime("%H:%M:%S", time.gmtime(time.time() - start))) logger.info('Parameter combinations evaluated: %d', max_evaluations) return evaluated_best_params, trials
def find_best_hyper_params( dataset, const_params, parameter_space, fold_count, eval_metric, max_evals=100, ): # we are going to optimize these three parameters, though there are a lot more of them (see CatBoost docs) # parameter_space = { # 'learning_rate': hyperopt.hp.uniform('learning_rate', 0.1, 1.0), # 'depth': hyperopt.hp.randint('depth', 7), # 'l2_leaf_reg': hyperopt.hp.uniform('l2_leaf_reg', 1, 10) # } objective = MyObjective(dataset=dataset, const_params=const_params, fold_count=fold_count, eval_metric=eval_metric) trials = hyperopt.Trials() best = hyperopt.fmin(fn=objective, space=parameter_space, algo=hyperopt.rand.suggest, max_evals=max_evals, rstate=np.random.RandomState(seed=42)) return best
def run(self, hyperparams: Dict): r"""Run the TPE algorithm with hyperparameters :attr:`hyperparams` Args: hyperparams: Dict The `(key, value)` pairs of hyperparameters along their range of values. """ space = {} for k, v in hyperparams.items(): if isinstance(v, dict): if v["dtype"] == int: space[k] = hpo.hp.choice(k, range(v["start"], v["end"])) else: space[k] = hpo.hp.uniform(k, v["start"], v["end"]) trials = hpo.Trials() hpo.fmin(fn=self.objective_func, space=space, algo=hpo.tpe.suggest, max_evals=3, trials=trials) _, best_trial = min( (trial["result"]["loss"], trial) for trial in trials.trials) # delete all the other models for trial in trials.trials: if trial is not best_trial: shutil.rmtree(trial["result"]["model"])
def convertResultsToTrials(self, results): trials = hyperopt.Trials() for resultIndex, result in enumerate(results): data = { 'book_time': datetime.datetime.now(), 'exp_key': None, 'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'), 'idxs': {}, 'tid': resultIndex, 'vals': {}, 'workdir': None}, 'owner': None, 'refresh_time': datetime.datetime.now(), 'result': {'loss': result['loss'], 'status': result['status']}, 'spec': None, 'state': 2, 'tid': resultIndex, 'version': 0 } for key in result.keys(): if key not in self.resultInformationKeys: value = result[key] if value is not "": data['misc']['idxs']['root.' + key] = [resultIndex] data['misc']['vals']['root.' + key] = [value] else: data['misc']['idxs']['root.' + key] = [] data['misc']['vals']['root.' + key] = [] trials.insert_trial_doc(data) return trials
def hyper_parameter_search(self): """ Perform hyper parameter search """ self._debug(f"Start") self.trials = hyperopt.Trials() self.objective = self.create_objective_function() # Create a search space self.search_space = self.create_search_space() # Get max_evals from YAML file paramters self.max_evals = self.parameters['max_evals'] # Select algorithm self.algorithm = self.get_algorithm() # Should we create a new dataset on each iteration? True if we have any hyper-parameter from HYPER_PARAM_CREATE_DATASET self.is_create_dataset = any([ self.space[param_type] for param_type in HYPER_PARAM_CREATE_DATASET if param_type in self.space ]) # Start search self._info(f"Search: Create dataset={self.is_create_dataset}") if self.random_seed is not None: self._debug(f"Using random seed {self.random_seed}") rand_state = np.random.RandomState(seed=self.random_seed) self.best = hyperopt.fmin(fn=self.objective, space=self.search_space, algo=self.algorithm, max_evals=self.max_evals, trials=self.trials, rstate=rand_state, show_progressbar=self.show_progressbar) self._info( f"Hyper parameter search best fit:{self.best}, best parameters: {self.best_params}" ) self.save_results() self._debug(f"End") return True
def __init__(self, estimator=None, max_evals=50, algo='tpe', cv=5, handle_cv_failure=False, scoring='accuracy', best_score=0.0, max_opt_time=None, max_eval_time=None, pgo: Optional[PGO] = None, show_progressbar=True, args_to_scorer=None, verbose=False): self.max_evals = max_evals if estimator is None: self.estimator = LogisticRegression() else: self.estimator = estimator self.search_space = hyperopt.hp.choice( 'meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)]) self.algo = algo self.scoring = scoring self.best_score = best_score self.handle_cv_failure = handle_cv_failure self.cv = cv self._trials = hyperopt.Trials() self.max_opt_time = max_opt_time self.max_eval_time = max_eval_time self.show_progressbar = show_progressbar if args_to_scorer is not None: self.args_to_scorer = args_to_scorer else: self.args_to_scorer = {} self.verbose = verbose
def __init__(self, space, max_concurrent=10, reward_attr="episode_reward_mean", points_to_evaluate=None, **kwargs): assert hpo is not None, "HyperOpt must be installed!" from hyperopt.fmin import generate_trials_to_calculate assert type(max_concurrent) is int and max_concurrent > 0 self._max_concurrent = max_concurrent self._reward_attr = reward_attr self.algo = hpo.tpe.suggest self.domain = hpo.Domain(lambda spc: spc, space) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert type(points_to_evaluate) == list self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} self.rstate = np.random.RandomState() super(HyperOptSearch, self).__init__(**kwargs)
def update_search_space(self, search_space): """ Update search space definition in tuner by search_space in parameters. Will called when first setup experiemnt or update search space in WebUI. Parameters ---------- search_space : dict """ self.json = search_space search_space_instance = json2space(self.json) rstate = np.random.RandomState() trials = hp.Trials() domain = hp.Domain(None, search_space_instance, pass_expr_memo_ctrl=None) algorithm = self._choose_tuner(self.algorithm_name) self.rval = hp.FMinIter(algorithm, domain, trials, max_evals=-1, rstate=rstate, verbose=0) self.rval.catch_eval_exceptions = False
def main(bee_length, autoopt, **kwargs): if not autoopt: run_wdd(bee_length=bee_length, **kwargs) print("\nStopping.") else: print("Optimizing hyperparameters..") import hyperopt from hyperopt import hp from wdd.evaluation import load_ground_truth, calculate_scores, WaggleMetadataSaver ground_truth = load_ground_truth(autoopt) search_space = dict( bee_length=hp.quniform("bee_length", bee_length * 0.8, bee_length * 1.2, q=1), subsample=hp.choice( "subsample", list( np.arange( max(int(math.log(bee_length, 2) / math.log(5, 2)), 1), int(math.log(bee_length, 2))))), binarization_threshold=hp.uniform("binarization_threshold", 2, 10), max_frame_distance=hp.uniform("max_frame_distance", 0.25, 0.6), min_num_detections=hp.uniform("min_num_detections", 0.05, 0.3), ) def objective(fun_kwargs): fun_kwargs = {**kwargs, **fun_kwargs} fun_kwargs["no_warmup"] = True fun_kwargs["verbose"] = False fun_kwargs["bee_length"] = int(fun_kwargs["bee_length"]) fun_kwargs["subsample"] = int(fun_kwargs["subsample"]) saver = WaggleMetadataSaver() fun_kwargs["export_steps"] = [saver] fps = run_wdd(**fun_kwargs) results = calculate_scores(saver.all_waggles, ground_truth, bee_length=bee_length, verbose=False) results["fps"] = fps results["loss"] = 1.0 - results["f_0.5"] results["status"] = hyperopt.STATUS_OK return results trials = hyperopt.Trials() best = hyperopt.fmin(objective, search_space, algo=hyperopt.tpe.suggest, max_evals=20, show_progressbar=True, trials=trials) print("Optimization finished!") print("Best parameters:") print(hyperopt.space_eval(search_space, best)) print(trials.best_trial["result"])
def tune(self): """ Start tuning. Notice that `tune` does not affect the tuner's inner state, so each new call to `tune` starts fresh. In other words, hyperspaces are suggestive only within the same `tune` call. """ if self.__curr_run_num != 0: print( """WARNING: `tune` does not affect the tuner's inner state, so each new call to `tune` starts fresh. In other words, hyperspaces are suggestive only within the same `tune` call.""" ) self.__curr_run_num = 0 logging.getLogger('hyperopt').setLevel(logging.CRITICAL) trials = hyperopt.Trials() self._fmin(trials) return { 'best': trials.best_trial['result']['mz_result'], 'trials': [trial['result']['mz_result'] for trial in trials.trials] }
def FineTune_hyperopt(self, X, y, mute=False): self.dataset(X, y) params_space = { 'l2_leaf_reg': hyperopt.hp.qloguniform('l2_leaf_reg', 0, 2, 1), 'learning_rate': hyperopt.hp.uniform('learning_rate', 1e-3, 5e-1), 'bagging_temperature': hyperopt.hp.uniform("bagging_temperature", 0, 0.3) } trials = hyperopt.Trials() best = hyperopt.fmin(self.hyperopt_objective, space=params_space, algo=hyperopt.tpe.suggest, max_evals=2, trials=trials, rstate=RandomState(self.random_state)) if not mute: print("\nBest parameters:") print(best) print("\n") _parameters = self.params _parameters.update(best) _model = CatBoostClassifier(**_parameters) _cv_data = catboost.cv(self.all_train_data, _model.get_params()) if not mute: print('\nPrecise validation accuracy score: {}'.format( np.max(_cv_data['test-Accuracy-mean']))) return best
def hyperopt_make_trials(values, losses, parameter_names=None): """ Parameters ---------- values : list of lists or 2D np.ndarray (n_trials, n_params) each element (or row) corresponds to a set of parameters previously tested losses : list of floats (n_params,) losses for previous trials parameter_names : list of str or None associated parameter names (must correspond to `spaces` passed to hyperopt). If None, defaults to ['X0', 'X1', ..., 'X`n_params`'] Returns ------- trials : hyperopt.Trials hyperopt Trials object containing reconstructed trials """ import hyperopt as hpo # uniform the inputs nparams = len(values[0]) if parameter_names is None: parameter_names = ['X{}'.format(i) for i in range(nparams)] vals = [{pn: [v] for pn, v in zip(parameter_names, val)} for val in values] trials = [] for i, (v, l) in enumerate(zip(vals, losses)): trials.append(hyperopt_make_trial_data(i, v, l)) hpo_trials = hpo.Trials() hpo_trials.insert_trial_docs(trials) hpo_trials.refresh() return hpo_trials
def hyper_opt_fmin(space, fun, additional_evals, verbose=0, trials_path='../trials.p', **kwargs): # This is a wrapper around the training process that enables warm starts from file. objective = partial(fun, **kwargs) # Try to recover trials object, else create new one! try: trials = pickle.load(open(trials_path, "rb")) if verbose > 0: print(f"Loaded trails from {trials_path}") except FileNotFoundError: trials = hp.Trials() # Compute the effect number of new trials that have to be run. past_evals = len(trials.losses()) new_evals = past_evals + additional_evals best = hp.fmin(fn=objective, space=space, algo=hp.tpe.suggest, max_evals=new_evals, trials=trials) if verbose > 0: print( f"HyperOpt got best loss {trials.best_trial['result']['loss']} with the following hyper paramters: \n{trials.best_trial['misc']['vals']}" ) # Store the trials object pickle.dump(trials, open(trials_path, "wb")) return best, trials
def __init__( self, space: Optional[Dict] = None, metric: Optional[str] = None, mode: Optional[str] = None, points_to_evaluate: Optional[List[Dict]] = None, n_initial_points: int = 20, random_state_seed: Optional[int] = None, gamma: float = 0.25, max_concurrent: Optional[int] = None, use_early_stopped_trials: Optional[bool] = None, ): assert hpo is not None, ( "HyperOpt must be installed! Run `pip install hyperopt`.") if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." from hyperopt.fmin import generate_trials_to_calculate super(HyperOptSearch, self).__init__(metric=metric, mode=mode, max_concurrent=max_concurrent, use_early_stopped_trials=use_early_stopped_trials) self.max_concurrent = max_concurrent # hyperopt internally minimizes, so "max" => -1 if mode == "max": self.metric_op = -1. elif mode == "min": self.metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial(hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert isinstance(points_to_evaluate, (list, tuple)) self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) self.domain = None if isinstance(space, dict) and space: resolved_vars, domain_vars, grid_vars = parse_spec_vars(space) if domain_vars or grid_vars: logger.warning( UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))) space = self.convert_search_space(space) self.domain = hpo.Domain(lambda spc: spc, space)
def test_branin(suggest=hp_gpsmbo.hpsuggest.suggest, seed=1, iters=10): import matplotlib.pyplot as plt plt.ion() mins = [] all_ys = [] for ii in range(int(seed), int(seed) + int(iters)): print 'SEED', ii space = branin() trials = hyperopt.Trials() hyperopt.fmin(fn=lambda x: x, space=space.expr, trials=trials, algo=partial(suggest, stop_at=0.398), rstate=np.random.RandomState(ii), max_evals=50) plt.subplot(2, 1, 1) plt.cla() ys = trials.losses() all_ys.append(ys) for ys_jj in all_ys: plt.plot(ys_jj) plt.plot(trials.losses()) plt.subplot(2, 1, 2) plt.cla() for ys_jj in all_ys: plt.plot(ys_jj) plt.ylim(0, 1) plt.axhline(np.min(ys)) plt.annotate('min=%f' % np.min(ys), xy=(1, np.min(ys))) plt.draw() mins.append(min(ys)) print 'MINS', mins assert np.max(mins) < 0.398
def find_best_params(train_pool, val_pool, model, const_params, parameter_space, fit_params=None, max_evals=25, cv_splitter=None, cv_scoring=None, cat_features=None): objective = HyperoptObjective(train_pool, val_pool, model, const_params, fit_params, cv_splitter, cv_scoring, cat_features) ''' HyperOpt Trials object stores details of every iteration. ''' trials = hyperopt.Trials() best_params = hyperopt.fmin(fn=objective, space=parameter_space, algo=hyperopt.tpe.suggest, rstate=np.random.RandomState(seed=42), max_evals=max_evals, trials=trials) best_params.update(const_params) return best_params, trials
def find_best_params( X_train, y_train, X_test, y_test, model, const_params, parameter_space, max_evals=25, ): objective = HyperoptObjective(X_train, y_train, X_test, y_test, model, const_params, is_multiclass) ''' HyperOpt Trials object stores details of every iteration. https://github.com/hyperopt/hyperopt/wiki/FMin#12-attaching-extra-information-via-the-trials-object ''' trials = hyperopt.Trials() ''' Hyperopt fmin function returns only parameters from the search space. Therefore, before returning best_params we will merge best_params with the const params, so we have all parameters in one place for training the best model. ''' best_params = hyperopt.fmin(fn=objective, space=parameter_space, algo=hyperopt.tpe.suggest, max_evals=max_evals, trials=trials) best_params = space_eval(parameter_space, best_params) best_params.update(const_params) return best_params, trials
def __init__(self, space, max_concurrent=10, reward_attr=None, metric="episode_reward_mean", mode="max", points_to_evaluate=None, n_initial_points=20, random_state_seed=None, gamma=0.25, **kwargs): assert hpo is not None, "HyperOpt must be installed!" from hyperopt.fmin import generate_trials_to_calculate assert type(max_concurrent) is int and max_concurrent > 0 assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" metric = reward_attr logger.warning( "`reward_attr` is deprecated and will be removed in a future " "version of Tune. " "Setting `metric={}` and `mode=max`.".format(reward_attr)) self._max_concurrent = max_concurrent self._metric = metric # hyperopt internally minimizes, so "max" => -1 if mode == "max": self._metric_op = -1. elif mode == "min": self._metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial(hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) self.domain = hpo.Domain(lambda spc: spc, space) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert type(points_to_evaluate) == list self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) super(HyperOptSearch, self).__init__(metric=self._metric, mode=mode, **kwargs)
def find_best_hyper_params(self): trials = hyperopt.Trials() best = hyperopt.fmin( fn=self.objective, space=self.space, algo=hyperopt.rand.suggest, max_evals=self.max_evals, rstate=np.random.RandomState(seed=self.random_state)) return best
def convertResultsToTrials(self, hyperparameterSpace, results): trials = hyperopt.Trials() parameters = Hyperparameter(hyperparameterSpace).getFlatParameters() for resultIndex, result in enumerate(results): data = { 'book_time': datetime.datetime.now(), 'exp_key': None, 'misc': { 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'idxs': {}, 'tid': resultIndex, 'vals': {}, 'workdir': None }, 'owner': None, 'refresh_time': datetime.datetime.now(), 'result': { 'loss': result['loss'], 'status': result['status'] }, 'spec': None, 'state': 2, 'tid': resultIndex, 'version': 0 } for key in result.keys(): if key not in self.resultInformationKeys: matchingParameters = [ parameter for parameter in parameters if parameter.name == key ] if len(matchingParameters) == 0: raise ValueError( "Our hyperparameter search space did not contain a " + key + " parameter.") parameter = matchingParameters[0] value = result[key] if value is not "": if 'enum' in parameter.config: data['misc']['idxs']['root.' + key] = [resultIndex] data['misc']['vals']['root.' + key] = [ parameter.config['enum'].index(value) ] elif parameter.config['type'] == 'number': data['misc']['idxs']['root.' + key] = [resultIndex] data['misc']['vals']['root.' + key] = [value] else: data['misc']['idxs']['root.' + key] = [] data['misc']['vals']['root.' + key] = [] trials.insert_trial_doc(data) return trials
def create_fmin(self): self.fmin = hyperopt.FMinIter(self.hyperopt_algorithm, self.hyperopt_domain, trials=hyperopt.Trials(), max_evals=-1, rstate=self.hyperopt_rstate, verbose=False) self.fmin.catch_eval_exceptions = False
def run_experiment(opt_space, evaluator: EvaluationFunction, n_searches: int, n_trials: int, n_final_trials: int, extra: ExtraArgs): def opt_wrapper(hh): perf, measurements = execute_trials(hh, evaluator, n_trials, extra=extra) return {'loss': perf, 'status': hyperopt.STATUS_OK, 'user_data': measurements, 'hp': hh} print("Hyperparameters grid:") print(opt_space) print("") extra.is_final_trials = False if n_searches > 0: trials = hyperopt.Trials() best = hyperopt.fmin( opt_wrapper, space=opt_space, algo=hyperopt.partial(hyperopt.mix.suggest, p_suggest=[ (.5, hyperopt.rand.suggest), (.4, hyperopt.tpe.suggest), (.1, hyperopt.anneal.suggest)]), max_evals=n_searches, trials=trials ) print(trials.best_trial) print(best) best_hyperparams = trials.best_trial['result']['hp'] msr = ModelSelectionResult(opt_space, best_hyperparams, n_searches, n_trials, trials.best_trial['result']['user_data']) # If using early stopping, average the number of epochs if 'epochs' in best_hyperparams: try: best_hyperparams['epochs'] = round(statistics.mean( [v.actual_epochs for v in trials.best_trial['result']['user_data'] if v.actual_epochs is not None])) except statistics.StatisticsError: pass # No actual_epochs != None else: best_hyperparams = opt_space msr = ModelSelectionResult(opt_space, best_hyperparams, n_searches, n_trials, []) print(f"\n\nNow running {n_final_trials} final trials...") # Enable the computation of the scores on the test set extra.is_final_trials = True # Compute more accurate values for the scores associated to the best hyperparametrization _, final_measures = execute_trials(best_hyperparams, evaluator, n_final_trials, extra=extra) print_final_trials_result(opt_space, msr, final_measures)
def tuning(cls, data, labels, space, max_evals, tune_algo='tpe', cv=10): tune = cls._get_tune(data, labels, cv=cv) trials = hyperopt.Trials() if tune_algo == 'tpe': algo = hyperopt.tpe.suggest elif tune_algo == 'random': algo = hyperopt.rand.suggest best = hyperopt.fmin(tune, space, algo=algo, max_evals=max_evals, trials=trials) return best, trials
def _load_trials(self) -> hyperopt.Trials: """ # At the moment deletes old trials if they contain any unused params. # Maybe we can go more thrifty way """ if os.path.isfile(self.trials_path): trials = pickle.load(open(self.trials_path, "rb")) else: trials = hyperopt.Trials() return trials
def hyperopt_searchcv(model, train_data, features): num_round = model_cv(model, train_data[features], train_data['y']) def XGB_CV(params): # x_train, x_predict, y_train, y_predict ratio = np.sum(train_data.y == 0) / float(np.sum(train_data.y == 1)) ss_fold = ShuffleSplit(n_splits=5, random_state=11, test_size=0.25) _model = xgb.XGBClassifier( max_depth=int(params['max_depth']), objective='binary:logistic', booster='gbtree', n_estimators=num_round, learning_rate=model.get_params('learning_rate')['learning_rate'], colsample_bytree=params['colsample_bytree'], colsample_bylevel=1, subsample=params['subsample'], gamma=params['gamma'], min_child_weight=params['min_child_weight'], scale_pos_weight=float(ratio), reg_alpha=params['reg_alpha'], reg_lambda=params['reg_lambda'], seed=11) metric = cross_val_score(_model, train_data[features], train_data.y, cv=ss_fold, scoring="neg_mean_squared_error") return min(-metric) from numpy.random import RandomState import hyperopt trials_2 = hyperopt.Trials() params_space = { "max_depth": hp.quniform("max_depth", 3, 6, 1), # "n_estimators": hp.randint("n_estimators", 300), # 'learning_rate': hp.uniform('learning_rate', 0.01, 0.3), 'gamma': hp.randint('gamma', 30), "subsample": hp.uniform("subsample", 0.4, 1), "min_child_weight": hp.randint("min_child_weight", 300), 'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1), 'reg_lambda': hp.randint('reg_lambda', 30), 'reg_alpha': hp.randint('reg_alpha', 30) } best = fmin(fn=XGB_CV, space=params_space, algo=tpe.suggest, max_evals=20, trials=trials_2, rstate=RandomState(123)) best_params = space_eval(params_space, best) best_params['n_estimators'] = num_round best_params['max_depth'] = int(best_params['max_depth']) return best_params
def __init__( self, space=None, metric=None, mode=None, points_to_evaluate=None, n_initial_points=20, random_state_seed=None, gamma=0.25, max_concurrent=None, use_early_stopped_trials=None, ): assert hpo is not None, ( "HyperOpt must be installed! Run `pip install hyperopt`.") if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." from hyperopt.fmin import generate_trials_to_calculate super(HyperOptSearch, self).__init__( metric=metric, mode=mode, max_concurrent=max_concurrent, use_early_stopped_trials=use_early_stopped_trials) self.max_concurrent = max_concurrent # hyperopt internally minimizes, so "max" => -1 if mode == "max": self.metric_op = -1. elif mode == "min": self.metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial( hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert isinstance(points_to_evaluate, (list, tuple)) self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) self.domain = None if space: self.domain = hpo.Domain(lambda spc: spc, space)