def add_trials(points): test_trials = Trials() for tid, row in enumerate(points): vals = {} for key in sample(space).keys(): vals[key] = [row['params'][key]] hyperopt_trial = Trials().new_trial_docs( tids=[tid], specs=[None], results=[row], miscs=[{ 'tid': tid, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'workdir': None, 'idxs': { **{key: [tid] for key in sample(space).keys()} }, 'vals': vals }]) hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE test_trials.insert_trial_docs(hyperopt_trial) test_trials.refresh() return test_trials
def create_trials(losses, statuses, vals, scope_keys): trials = Trials() tids = trials.new_trial_ids(len(losses)) specs = [None for x in range(len(tids))] results = [] miscs = [] for i in range(len(tids)): idxs_content = [[i] for key in scope_keys] idxs_vals_content = [] for key in scope_keys: idxs_vals_content.append([vals[key][i]]) results.append(dict(loss=losses[i], status=statuses[i])) miscs.append( dict(tid=tids[i], cmd=None, idxs=dict(zip(scope_keys, idxs_content)), vals=dict(zip(scope_keys, idxs_vals_content)))) trials.insert_trial_docs( trials.new_trial_docs( tids, specs, results, miscs, )) trials.refresh() return trials
def merge_trials(trials: Trials, new_trials_data: List[Dict]) -> Trials: """ Merge a hyperopt trials object with the contents of another hyperopt trials object. :param trials: A hyperopt trials object containing trials data, organized into hierarchical dictionaries. :param trials_data: The contents of a hyperopt trials object, `Trials.trials`. :return: A hyperopt trials object, merged from the two inputs. """ max_tid = 0 if len(trials.trials) > 0: max_tid = max([trial['tid'] for trial in trials.trials]) for trial in new_trials_data: tid = trial[ 'tid'] + max_tid + 1 #trial id needs to be unique among this list of ids. hyperopt_trial = Trials().new_trial_docs(tids=[None], specs=[None], results=[None], miscs=[None]) hyperopt_trial[0] = trial hyperopt_trial[0]['tid'] = tid hyperopt_trial[0]['misc']['tid'] = tid for key in hyperopt_trial[0]['misc']['idxs'].keys(): hyperopt_trial[0]['misc']['idxs'][key] = [tid] trials.insert_trial_docs(hyperopt_trial) trials.refresh() return trials
def _get_trials(self, problem: HyperparameterOptimisationProblem, n_resources: int) -> Trials: """Based on the method found on Github issues to inject trials into Hyperopt.""" trials = Trials() if not self.eval_history: return trials hyperopt_selection = hyperopt.pyll.stochastic.sample(problem.get_hyperopt_space_from_hyperparams_to_opt()) print(hyperopt_selection) df_dict = { 'loss': [], **{hp_name: [] for hp_name in hyperopt_selection.keys()}, 'evaluator': [], 'optimisation_goals': [], 'eval_time': [] } for evaluator, (r, optimisation_goals, _) in self.evaluations_by_resources.items(): if self._is_transferable(r, n_resources): sign = -1 if self.min_or_max == max else 1 df_dict['loss'].append(sign * self.optimisation_func(optimisation_goals)) for hp_name in hyperopt_selection.keys(): df_dict[hp_name].append(getattr(evaluator.arm, hp_name)) df_dict['evaluator'].append(evaluator) df_dict['optimisation_goals'].append(optimisation_goals) df_dict['eval_time'] = time.time() df = pd.DataFrame(df_dict) test_trials = Trials() for tid, (index, row) in enumerate(df.iterrows()): hyperopt_trial = hyperopt.Trials().new_trial_docs( tids=[tid], specs=[None], results=[{'loss': row['loss'], 'status': hyperopt.STATUS_OK}], miscs=[{'tid': tid, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'idxs': {**{key: [tid] for key in hyperopt_selection.keys()}}, 'vals': {**{key: [row[key]] for key in hyperopt_selection.keys()}}, 'workdir': None }] ) hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE test_trials.insert_trial_docs(hyperopt_trial) test_trials.refresh() return test_trials
def get_next_params(current_trials): trial_parameters = {} def extract_params(params): nonlocal trial_parameters trial_parameters = params return {'loss': 0.5, 'status': STATUS_OK} cache_trials = Trials() cache_trials.insert_trial_docs(current_trials.trials) # cache_trials.refresh() fmin(extract_params, space, algo=tpe.suggest, trials=cache_trials, max_evals=1, return_argmin=False, show_progressbar=False) trial = cache_trials.trials[-1] return trial, trial_parameters
'split_size': [0] }, 'vals': { 'executor_memory': [7], 'offheap_size': [1], 'shuffle_partitions': [16], 'split_size': [15] } }, 'exp_key': None, 'owner': None, 'version': 0, 'book_time': None, 'refresh_time': None }] trials.insert_trial_docs(new_trials) trials.refresh() tpe_suggest = partial(tpe.suggest, n_startup_jobs=num_random_startup_jobs) best = fmin(objective_function, space=space, algo=tpe_suggest, max_evals=num_evals, trials=trials) trials.best_trial['result']['loss'] # define an objective function def objective(args): case, val = args if case == 'case 1':
def _fit_on_prepared_data(self, metadata): trials = Trials() domain = Domain(None, self.hyperopt_search_space) rand = np.random.RandomState(constants.RANDOM_SEED) all_estimators = [] all_estimator_results = {} for i in range(0, self.num_params, self.parallelism): n = min(self.num_params - i, self.parallelism) # Using HyperOpt TPE to generate parameters hyperopt_params = [] for j in range(i, i + n): new_param = tpe.suggest([j], domain, trials, rand.randint(0, 2 ** 31 - 1)) new_param[0]['status'] = STATUS_RUNNING trials.insert_trial_docs(new_param) trials.refresh() hyperopt_params.append(new_param[0]) # Generating Cerebro params from HyperOpt params estimator_param_maps = [] for hyperopt_param in hyperopt_params: param = {} for k in hyperopt_param['misc']['vals']: val = hyperopt_param['misc']['vals'][k][0].item() if isinstance(self.search_space[k], _HPChoice): # if the hyperparamer is a choice the index is returned val = self.search_space[k].options[val] param[k] = val estimator_param_maps.append(param) # Generating Cerebro estimators estimators = [self._estimator_gen_fn_wrapper(param) for param in estimator_param_maps] estimator_results = {model.getRunId(): {} for model in estimators} # log hyperparameters to TensorBoard self._log_hp_to_tensorboard(estimators, estimator_param_maps) # Trains the models up to the number of epochs specified. For each iteration also performs validation for epoch in range(self.num_epochs): epoch_results = self.backend.train_for_one_epoch(estimators, self.store, self.feature_cols, self.label_cols) update_model_results(estimator_results, epoch_results) epoch_results = self.backend.train_for_one_epoch(estimators, self.store, self.feature_cols, self.label_cols, is_train=False) update_model_results(estimator_results, epoch_results) self._log_epoch_metrics_to_tensorboard(estimators, estimator_results) all_estimators.extend(estimators) all_estimator_results.update(estimator_results) # HyperOpt TPE update for i, hyperopt_param in enumerate(hyperopt_params): hyperopt_param['status'] = STATUS_OK hyperopt_param['result'] = {'loss': estimator_results[estimators[i].getRunId()][ 'val_' + self.evaluation_metric][-1], 'status': STATUS_OK} trials.refresh() # find the best model and crate ModelSearchModel models = [est.create_model(all_estimator_results[est.getRunId()], est.getRunId(), metadata) for est in all_estimators] val_metrics = [all_estimator_results[est.getRunId()]['val_' + self.evaluation_metric][-1] for est in all_estimators] best_model = models[np.argmin(val_metrics)] return ModelSelectionResult(best_model, estimator_results, models, [x+"__output" for x in self.label_cols])
class Hyperopt(AbstractPlanner): def __init__(self, goal='minimize', show_progressbar=False): """ Tree of Parzen Estimators (TPE) as implemented in HyperOpt. Args: goal (str): The optimization goal, either 'minimize' or 'maximize'. Default is 'minimize'. show_progressbar (bool): If True, show a progressbar. """ AbstractPlanner.__init__(**locals()) self._trials = Trials( ) # these is a Hyperopt object that stores the search history self._hp_space = None # these are the params in the Hyperopt format def _set_param_space(self, param_space): self._param_space = [] for param in param_space: if param.type == 'continuous': param_dict = { 'name': param.name, 'type': param.type, 'domain': (param.low, param.high) } self._param_space.append(param_dict) # update hyperopt space accordingly self._set_hp_space() def _tell(self, observations): self._params = observations.get_params(as_array=False) self._values = observations.get_values(as_array=True, opposite=self.flip_measurements) # update hyperopt Trials accordingly self._set_hp_trials() def _set_hp_space(self): space = [] # go through all parameters we have defined and convert them to Hyperopt format for param in self._param_space: if param['type'] == 'continuous': space.append((param['name'], hp.uniform(param['name'], param['domain'][0], param['domain'][1]))) # update instance attribute that is the space input for Hyperopt fmin self._hp_space = OrderedDict(space) def _set_hp_trials(self): self._trials = Trials() if self._params is not None and len(self._params) > 0: for tid, (param, loss) in enumerate(zip(self._params, self._values)): idxs = {k: [tid] for k, v in param.items()} vals = {k: [v] for k, v in param.items()} hyperopt_trial = Trials().new_trial_docs( tids=[tid], specs=[None], results=[{ 'loss': loss, 'status': STATUS_OK }], miscs=[{ 'tid': tid, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'idxs': idxs, 'vals': vals, 'workdir': None }]) hyperopt_trial[0]['state'] = JOB_STATE_DONE self._trials.insert_trial_docs(hyperopt_trial) self._trials.refresh() def _ask(self): # NOTE: we pass a dummy function as we just ask for the new (+1) set of parameters _ = fmin(fn=lambda x: 0, space=self._hp_space, algo=tpe.suggest, max_evals=self.num_generated, trials=self._trials, show_progressbar=self.show_progressbar) # make sure the number of parameters asked matches the number of Hyperopt iterations/trials assert len(self._trials.trials) == self.num_generated # get params from last dict in trials.trials proposed_params = self._trials.trials[-1]['misc']['vals'] for key, value in proposed_params.items(): proposed_params[key] = value[ 0] # this is just to make value not a list return ParameterVector(dict=proposed_params, param_space=self.param_space)
def search(self, run_name, store, context, hp_records, runs): def make_trial(tid, arg_dict, loss_value): trial = {"book_time": None, "exp_key": None, "owner": None, "refresh_time": None, "spec": None, "state": 0, "tid": tid, "version": 0} #trial["result"] = {"status": "New"} misc = {} trial["misc"] = misc misc["cmd"] = ("domain_attachment", "FMinIter_Domain") misc["idxs"] = {key: [tid] for key in arg_dict.keys()} misc["tid"] = tid misc["vals"] = arg_dict trial["state"] = 2 # done trial["result"] = {"loss": loss_value, "status": "ok"} #trial["refresh_time"] = coarse_utcnow() return trial dummy_loss = lambda x: None param_space = {r["name"]: r["space_func"] for r in hp_records} domain = base.Domain(dummy_loss, param_space) rstate = np.random.RandomState() # convert runs to Trials trial_list = [] for run in runs: # don't trip over inappropriate runs if (not "run_name" in run) or (not "hparams" in run) or (not context.primary_metric in run): continue run_name = run["run_name"] arg_dict = run["hparams"] loss_value = run[context.primary_metric] if context.maximize_metric: loss_value = -loss_value # extract a unique int from run_name (parent.childnum) tid = run_helper.get_int_from_run_name(run_name) trial = make_trial(tid, arg_dict, loss_value) trial_list.append(trial) # finally, add our trial_list to trials trials = Trials() trials.insert_trial_docs(trial_list) trials.refresh() # get next suggested hyperparameter values from TPE algorithm tid = run_helper.get_int_from_run_name(run_name) min_trials = 3 # before this, just do rand sampling seed = rstate.randint(2 ** 31 - 1) if len(trials) < min_trials: new_trials = rand.suggest([tid], domain, trials, seed) else: new_trials = tpe.suggest([tid], domain, trials, seed) # apply the suggested hparam values trial = new_trials[0] arg_dict = trial["misc"]["vals"] arg_dict = self.fixup_hyperopt_hparams(param_space, arg_dict) return arg_dict
class _HyperOpt(base.Optimizer): # pylint: disable=too-many-instance-attributes def __init__( self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1, *, prior_weight: float = 1.0, n_startup_jobs: int = 20, n_EI_candidates: int = 24, gamma: float = 0.25, verbose: bool = False, ) -> None: super().__init__(parametrization, budget=budget, num_workers=num_workers) try: # try to convert parametrization to hyperopt search space if not isinstance(self.parametrization, p.Instrumentation): raise NotImplementedError self.space = _get_search_space(self.parametrization.name, self.parametrization) self._transform = None except NotImplementedError: self._transform = transforms.ArctanBound(0, 1) self.space = { f"x_{i}": hp.uniform(f"x_{i}", 0, 1) for i in range(self.dimension) } self.trials = Trials() self.domain = Domain(fn=None, expr=self.space, pass_expr_memo_ctrl=False) self.tpe_args = { "prior_weight": prior_weight, "n_startup_jobs": n_startup_jobs, "n_EI_candidates": n_EI_candidates, "gamma": gamma, "verbose": verbose, } def _internal_ask_candidate(self) -> p.Parameter: # Inspired from FMinIter class (hyperopt) next_id = self.trials.new_trial_ids(1) new_trial = tpe.suggest(next_id, self.domain, self.trials, self._rng.randint(2**31 - 1), **self.tpe_args)[0] self.trials.insert_trial_doc(new_trial) self.trials.refresh() candidate = self.parametrization.spawn_child() if self._transform: data = np.array([ new_trial["misc"]["vals"][f"x_{i}"][0] for i in range(self.dimension) ]) candidate = candidate.set_standardized_data( self._transform.backward(data)) # For consistency, we need to update hyperopt history # when standardized data is changed if any(data != self._transform.forward( candidate.get_standardized_data( reference=self.parametrization))): for it, val in enumerate( self._transform.forward( candidate.get_standardized_data( reference=self.parametrization))): self.trials._dynamic_trials[ next_id[0]]["misc"]["vals"][f"x_{it}"][0] = val else: spec = hyperopt.base.spec_from_misc(new_trial["misc"]) config = hyperopt.space_eval(self.space, spec) candidate.value = _hp_dict_to_parametrization(config) candidate._meta["trial_id"] = new_trial["tid"] return candidate def _internal_tell_candidate(self, candidate: p.Parameter, loss: float) -> None: result = {"loss": loss, "status": "ok"} assert "trial_id" in candidate._meta tid = candidate._meta["trial_id"] assert self.trials._dynamic_trials[tid][ "state"] == hyperopt.JOB_STATE_NEW now = hyperopt.utils.coarse_utcnow() self.trials._dynamic_trials[tid]["book_time"] = now self.trials._dynamic_trials[tid]["refresh_time"] = now self.trials._dynamic_trials[tid]["state"] = hyperopt.JOB_STATE_DONE self.trials._dynamic_trials[tid]["result"] = result self.trials._dynamic_trials[tid][ "refresh_time"] = hyperopt.utils.coarse_utcnow() self.trials.refresh() def _internal_tell_not_asked(self, candidate: p.Parameter, loss: float) -> None: next_id = self.trials.new_trial_ids(1) new_trial = hyperopt.rand.suggest(next_id, self.domain, self.trials, self._rng.randint(2**31 - 1)) self.trials.insert_trial_docs(new_trial) self.trials.refresh() tid = next_id[0] if self._transform: data = candidate.get_standardized_data( reference=self.parametrization) data = self._transform.forward(data) self.trials._dynamic_trials[tid]["misc"]["vals"] = { f"x_{i}": [data[i]] for i in range(len(data)) } else: null_config: dict = { k: [] for k in self.trials._dynamic_trials[tid]["misc"] ["vals"].keys() } new_vals: dict = _hp_parametrization_to_dict(candidate, default=null_config) self.trials._dynamic_trials[tid]["misc"]["vals"] = new_vals self.trials.refresh() candidate._meta["trial_id"] = tid self._internal_tell_candidate(candidate, loss)