def get_iter(fn, space, algo, max_evals, trials=None, rstate=None, pass_expr_memo_ctrl=None, catch_eval_exceptions=False, verbose=0, points_to_evaluate=None, max_queue_len=1, show_progressbar=False, ): if rstate is None: env_rseed = os.environ.get('HYPEROPT_FMIN_SEED', '') if env_rseed: rstate = np.random.RandomState(int(env_rseed)) else: rstate = np.random.RandomState() if trials is None: if points_to_evaluate is None: trials = base.Trials() else: assert type(points_to_evaluate) == list trials = generate_trials_to_calculate(points_to_evaluate) domain = base.Domain(fn, space, pass_expr_memo_ctrl=pass_expr_memo_ctrl) rval = FMinIter(algo, domain, trials, max_evals=max_evals, rstate=rstate, verbose=verbose, max_queue_len=max_queue_len, show_progressbar=show_progressbar) rval.catch_eval_exceptions = catch_eval_exceptions return rval
def run_task_on_executor(_): domain = base.Domain(local_eval_function, local_space, pass_expr_memo_ctrl=None) result = domain.evaluate(params, ctrl=None, attach_attachments=False) yield result
def fmin(fn, space, algo, max_evals, param_batch_size, trials, rstate=None): """Minimize a function over a hyperparameter space. Partially copied over from the hyperopt. More realistically: *explore* a function over a hyperparameter space according to a given algorithm, allowing up to a certain number of function evaluations. As points are explored, they are accumulated in `trials` Parameters ---------- fn: function that takes a list of dicts of the params (e.g. [{x: 10, y: 20}, {x:5, y: -1.5}]) and returns results in the form of {'loss': float(rval), 'status': STATUS_OK} space : hyperopt.pyll.Apply node The set of possible arguments to `fn` is the set of objects that could be created with non-zero probability by drawing randomly from this stochastic program involving involving hp_<xxx> nodes (see `hyperopt.hp` and `hyperopt.pyll_utils`). algo : search algorithm This object, such as `hyperopt.rand.suggest` and `hyperopt.tpe.suggest` provides logic for sequential search of the hyperparameter space. max_evals : int Allow up to this many function evaluations before returning. param_batch_size : int Retrieve at most this many new parameters sets from the search algorithm for evaluation up to max_evals. Note that the actual number of new parameter sets to evaluate is dependent on the search algorithm. trials : None or base.Trials (or subclass) Storage for completed, ongoing, and scheduled evaluation points. If None, then a temporary `base.Trials` instance will be created. If a trials object, then that trials object will be affected by side-effect of this call. rstate : numpy.RandomState, default numpy.random""" if rstate is None: rstate = np.random.RandomState() # need a domain to pass to the algorithm to provide the space domain = base.Domain(fn, space, pass_expr_memo_ctrl=None) runner = Runner(algo, domain, max_evals, param_batch_size, trials, rstate) runner.run()
def run_task_on_executor(_): domain = base.Domain(local_eval_function, local_space, pass_expr_memo_ctrl=None) try: result = domain.evaluate(params, ctrl=None, attach_attachments=False) yield result except BaseException as e: # Because the traceback is not pickable, we need format it and pass it back # to driver _traceback_string = traceback.format_exc() logger.error(_traceback_string) e._tb_str = _traceback_string yield e
def get_new_params(experiment: Experiment, rstate, algo=tpe.suggest, n_points=1): params = [{p.name: p.value for p in result.params} for result in experiment.results] trials = generate_trials_to_calculate(params) trials.refresh() space = convert_parameter_space(experiment.parameter_spaces) domain = base.Domain( lambda args: experiment.results[params.index(args)].value, space) FMinIter(algo, domain, trials, rstate=rstate).serial_evaluate() new_ids = trials.new_trial_ids(n_points) new_points = algo(new_ids, domain, trials, rstate.randint(2**31 - 1)) new_params = [[ Parameter(name=k, value=v[0]) for k, v in point['misc']['vals'].items() ] for point in new_points] if experiment.results: experiment.best_params = [ Parameter(name=k, value=v[0]) for k, v in trials.best_trial['misc']['vals'].items() ] return new_params
def fmin_persist(fn, space, algo, max_evals, rstate=None, allow_trials_fmin=True, pass_expr_memo_ctrl=None, catch_eval_exceptions=False, verbose=0, trials_pickle=None): """Minimize a function over a hyperparameter space. More realistically: *explore* a function over a hyperparameter space according to a given algorithm, allowing up to a certain number of function evaluations. As points are explored, they are accumulated in `trials` Parameters ---------- fn : callable (trial point -> loss) This function will be called with a value generated from `space` as the first and possibly only argument. It can return either a scalar-valued loss, or a dictionary. A returned dictionary must contain a 'status' key with a value from `STATUS_STRINGS`, must contain a 'loss' key if the status is `STATUS_OK`. Particular optimization algorithms may look for other keys as well. An optional sub-dictionary associated with an 'attachments' key will be removed by fmin its contents will be available via `trials.trial_attachments`. The rest (usually all) of the returned dictionary will be stored and available later as some 'result' sub-dictionary within `trials.trials`. space : hyperopt.pyll.Apply node The set of possible arguments to `fn` is the set of objects that could be created with non-zero probability by drawing randomly from this stochastic program involving involving hp_<xxx> nodes (see `hyperopt.hp` and `hyperopt.pyll_utils`). algo : search algorithm This object, such as `hyperopt.rand.suggest` and `hyperopt.tpe.suggest` provides logic for sequential search of the hyperparameter space. max_evals : int Allow up to this many function evaluations before returning. trials : None or base.Trials (or subclass) Storage for completed, ongoing, and scheduled evaluation points. If None, then a temporary `base.Trials` instance will be created. If a trials object, then that trials object will be affected by side-effect of this call. rstate : numpy.RandomState, default numpy.random or `$HYPEROPT_FMIN_SEED` Each call to `algo` requires a seed value, which should be different on each call. This object is used to draw these seeds via `randint`. The default rstate is `numpy.random.RandomState(int(env['HYPEROPT_FMIN_SEED']))` if the `HYPEROPT_FMIN_SEED` environment variable is set to a non-empty string, otherwise np.random is used in whatever state it is in. verbose : int Print out some information to stdout during search. allow_trials_fmin : bool, default True If the `trials` argument pass_expr_memo_ctrl : bool, default False If set to True, `fn` will be called in a different more low-level way: it will receive raw hyperparameters, a partially-populated `memo`, and a Ctrl object for communication with this Trials object. return_argmin : bool, default True If set to False, this function returns nothing, which can be useful for example if it is expected that `len(trials)` may be zero after fmin, and therefore `trials.argmin` would be undefined. Returns ------- argmin : None or dictionary If `return_argmin` is False, this function returns nothing. Otherwise, it returns `trials.argmin`. This argmin can be converted to a point in the configuration space by calling `hyperopt.space_eval(space, best_vals)`. """ if trials_pickle is None: raise AttributeError( "trials_pickle filename is required to use this function") if rstate is None: env_rseed = os.environ.get('HYPEROPT_FMIN_SEED', '') if env_rseed: rstate = np.random.RandomState(int(env_rseed)) else: rstate = np.random.RandomState() try: with open(trials_pickle, 'rb') as trialf: trials = pickle.load(trialf) except: trials = base.Trials() domain = base.Domain(fn, space, pass_expr_memo_ctrl=pass_expr_memo_ctrl) rval = FMinIter(algo, domain, trials, max_evals=max_evals, rstate=rstate, verbose=verbose) rval.catch_eval_exceptions = catch_eval_exceptions if len(trials) >= max_evals: return trials else: for it in rval: print("Trial {} done!.. pickling...".format(len(trials))) with open(trials_pickle, 'wb') as trialf: pickle.dump(trials, trialf) # do it again since the weird way it does the generator with open(trials_pickle, 'wb') as trialf: pickle.dump(trials, trialf) return trials
res = pool.apply_async(fmin, args=(lgb_run, space_lgb), kwds={ "trials": trials_lgb, "algo": suggest, "max_evals": 20 }) res.get() # %% from hyperopt.plotting import main_plot_vars, main_plot_history, main_plot_histogram import matplotlib.pylab as plt for sp, trls in zip([space_lgb], [trials_lgb]): domain = base.Domain(lgb_run, sp) # plt.figure(figsize=(20, 40)) # main_plot_vars(trls, bandit=domain, colorize_best=30, columns=1) plt.figure(figsize=(20, 5)) # plt.ylim((-0.003, 0.003)) main_plot_history(trls, bandit=domain) # NOTE: 对trials_lgb的性能的评判应该是VA的第一个metric指标的负数,至少在这个例子里是这样 trials_lgb.trials # 所有模型的参数和结果的字典组成的一个list trials_lgb.results # 返回所有实验的结果 trials_lgb.miscs # 返回所有实验的参数 trials_lgb.vals # 返回所有实验的跟space更新有关的参数 trials_lgb.trials[0]['misc']['vals'] tmp1 = space_lgb['lgb_param']['bagging_fraction']
def fmin( fn, space, algo, max_evals, early_stop_round_mode_fun=None, early_stop_round=None, trials=None, rstate=None, allow_trials_fmin=False, pass_expr_memo_ctrl=None, catch_eval_exceptions=False, verbose=0, return_argmin=True, points_to_evaluate=None, max_queue_len=1, show_progressbar=True, ): """Minimize a function over a hyperparameter space. More realistically: *explore* a function over a hyperparameter space according to a given algorithm, allowing up to a certain number of function evaluations. As points are explored, they are accumulated in `trials` Parameters ---------- fn : callable (trial point -> loss) This function will be called with a value generated from `space` as the first and possibly only argument. It can return either a scalar-valued loss, or a dictionary. A returned dictionary must contain a 'status' key with a value from `STATUS_STRINGS`, must contain a 'loss' key if the status is `STATUS_OK`. Particular optimization algorithms may look for other keys as well. An optional sub-dictionary associated with an 'attachments' key will be removed by fmin its contents will be available via `trials.trial_attachments`. The rest (usually all) of the returned dictionary will be stored and available later as some 'result' sub-dictionary within `trials.trials`. space : hyperopt.pyll.Apply node The set of possible arguments to `fn` is the set of objects that could be created with non-zero probability by drawing randomly from this stochastic program involving involving hp_<xxx> nodes (see `hyperopt.hp` and `hyperopt.pyll_utils`). algo : search algorithm This object, such as `hyperopt.rand.suggest` and `hyperopt.tpe.suggest` provides logic for sequential search of the hyperparameter space. max_evals : int Allow up to this many function evaluations before returning. trials : None or base.Trials (or subclass) Storage for completed, ongoing, and scheduled evaluation points. If None, then a temporary `base.Trials` instance will be created. If a trials object, then that trials object will be affected by side-effect of this call. rstate : numpy.RandomState, default numpy.random or `$HYPEROPT_FMIN_SEED` Each call to `algo` requires a seed value, which should be different on each call. This object is used to draw these seeds via `randint`. The default rstate is `numpy.random.RandomState(int(env['HYPEROPT_FMIN_SEED']))` if the `HYPEROPT_FMIN_SEED` environment variable is set to a non-empty string, otherwise np.random is used in whatever state it is in. verbose : int Print out some information to stdout during search. allow_trials_fmin : bool, default True If the `trials` argument pass_expr_memo_ctrl : bool, default False If set to True, `fn` will be called in a different more low-level way: it will receive raw hyperparameters, a partially-populated `memo`, and a Ctrl object for communication with this Trials object. return_argmin : bool, default True If set to False, this function returns nothing, which can be useful for example if it is expected that `len(trials)` may be zero after fmin, and therefore `trials.argmin` would be undefined. points_to_evaluate : list, default None Only works if trials=None. If points_to_evaluate equals None then the trials are evaluated normally. If list of dicts is passed then given points are evaluated before optimisation starts, so the overall number of optimisation steps is len(points_to_evaluate) + max_evals. Elements of this list must be in a form of a dictionary with variable names as keys and variable values as dict values. Example points_to_evaluate value is [{'x': 0.0, 'y': 0.0}, {'x': 1.0, 'y': 2.0}] max_queue_len : integer, default 1 Sets the queue length generated in the dictionary or trials. Increasing this value helps to slightly speed up parallel simulatulations which sometimes lag on suggesting a new trial. show_progressbar : bool, default True Show a progressbar. Returns ------- argmin : dictionary If return_argmin is True returns `trials.argmin` which is a dictionary. Otherwise this function returns the result of `hyperopt.space_eval(space, trails.argmin)` if there were succesfull trails. This object shares the same structure as the space passed. If there were no succesfull trails, it returns None. """ if rstate is None: env_rseed = os.environ.get('HYPEROPT_FMIN_SEED', '') if env_rseed: rstate = np.random.RandomState(int(env_rseed)) else: rstate = np.random.RandomState() if allow_trials_fmin and hasattr(trials, 'fmin'): return trials.fmin( fn, space, algo=algo, max_evals=max_evals, max_queue_len=max_queue_len, rstate=rstate, pass_expr_memo_ctrl=pass_expr_memo_ctrl, verbose=verbose, catch_eval_exceptions=catch_eval_exceptions, return_argmin=return_argmin, show_progressbar=show_progressbar, ) if trials is None: if points_to_evaluate is None: trials = base.Trials() else: assert type(points_to_evaluate) == list trials = generate_trials_to_calculate(points_to_evaluate) domain = base.Domain(fn, space, pass_expr_memo_ctrl=pass_expr_memo_ctrl) if early_stop_round is not None: # max_evals = 1000 if early_stop_round_mode_fun is None: early_stop_round_mode_fun = lambda x: 1 rval = FMinIter(algo, domain, trials, max_evals=max_evals, early_stop_round_mode_fun=early_stop_round_mode_fun, early_stop_round=early_stop_round, rstate=rstate, verbose=verbose, max_queue_len=max_queue_len, show_progressbar=show_progressbar) rval.catch_eval_exceptions = catch_eval_exceptions rval.exhaust() if return_argmin: if len(trials.trials) == 0: raise Exception( "There are no evaluation tasks, cannot return argmin of task losses." ) return trials.argmin elif len(trials) > 0: # Only if there are some succesfull trail runs, return the best point in the evaluation space return space_eval(space, trials.argmin) else: return None
def search(self, run_name, store, context, hp_records, runs): def make_trial(tid, arg_dict, loss_value): trial = {"book_time": None, "exp_key": None, "owner": None, "refresh_time": None, "spec": None, "state": 0, "tid": tid, "version": 0} #trial["result"] = {"status": "New"} misc = {} trial["misc"] = misc misc["cmd"] = ("domain_attachment", "FMinIter_Domain") misc["idxs"] = {key: [tid] for key in arg_dict.keys()} misc["tid"] = tid misc["vals"] = arg_dict trial["state"] = 2 # done trial["result"] = {"loss": loss_value, "status": "ok"} #trial["refresh_time"] = coarse_utcnow() return trial dummy_loss = lambda x: None param_space = {r["name"]: r["space_func"] for r in hp_records} domain = base.Domain(dummy_loss, param_space) rstate = np.random.RandomState() # convert runs to Trials trial_list = [] for run in runs: # don't trip over inappropriate runs if (not "run_name" in run) or (not "hparams" in run) or (not context.primary_metric in run): continue run_name = run["run_name"] arg_dict = run["hparams"] loss_value = run[context.primary_metric] if context.maximize_metric: loss_value = -loss_value # extract a unique int from run_name (parent.childnum) tid = run_helper.get_int_from_run_name(run_name) trial = make_trial(tid, arg_dict, loss_value) trial_list.append(trial) # finally, add our trial_list to trials trials = Trials() trials.insert_trial_docs(trial_list) trials.refresh() # get next suggested hyperparameter values from TPE algorithm tid = run_helper.get_int_from_run_name(run_name) min_trials = 3 # before this, just do rand sampling seed = rstate.randint(2 ** 31 - 1) if len(trials) < min_trials: new_trials = rand.suggest([tid], domain, trials, seed) else: new_trials = tpe.suggest([tid], domain, trials, seed) # apply the suggested hparam values trial = new_trials[0] arg_dict = trial["misc"]["vals"] arg_dict = self.fixup_hyperopt_hparams(param_space, arg_dict) return arg_dict