def merge_trials(trials: Trials, new_trials_data: List[Dict]) -> Trials: """ Merge a hyperopt trials object with the contents of another hyperopt trials object. :param trials: A hyperopt trials object containing trials data, organized into hierarchical dictionaries. :param trials_data: The contents of a hyperopt trials object, `Trials.trials`. :return: A hyperopt trials object, merged from the two inputs. """ max_tid = 0 if len(trials.trials) > 0: max_tid = max([trial['tid'] for trial in trials.trials]) for trial in new_trials_data: tid = trial[ 'tid'] + max_tid + 1 #trial id needs to be unique among this list of ids. hyperopt_trial = Trials().new_trial_docs(tids=[None], specs=[None], results=[None], miscs=[None]) hyperopt_trial[0] = trial hyperopt_trial[0]['tid'] = tid hyperopt_trial[0]['misc']['tid'] = tid for key in hyperopt_trial[0]['misc']['idxs'].keys(): hyperopt_trial[0]['misc']['idxs'][key] = [tid] trials.insert_trial_docs(hyperopt_trial) trials.refresh() return trials
def add_trials(points): test_trials = Trials() for tid, row in enumerate(points): vals = {} for key in sample(space).keys(): vals[key] = [row['params'][key]] hyperopt_trial = Trials().new_trial_docs( tids=[tid], specs=[None], results=[row], miscs=[{ 'tid': tid, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'workdir': None, 'idxs': { **{key: [tid] for key in sample(space).keys()} }, 'vals': vals }]) hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE test_trials.insert_trial_docs(hyperopt_trial) test_trials.refresh() return test_trials
def create_trials(losses, statuses, vals, scope_keys): trials = Trials() tids = trials.new_trial_ids(len(losses)) specs = [None for x in range(len(tids))] results = [] miscs = [] for i in range(len(tids)): idxs_content = [[i] for key in scope_keys] idxs_vals_content = [] for key in scope_keys: idxs_vals_content.append([vals[key][i]]) results.append(dict(loss=losses[i], status=statuses[i])) miscs.append( dict(tid=tids[i], cmd=None, idxs=dict(zip(scope_keys, idxs_content)), vals=dict(zip(scope_keys, idxs_vals_content)))) trials.insert_trial_docs( trials.new_trial_docs( tids, specs, results, miscs, )) trials.refresh() return trials
def _get_trials(self, problem: HyperparameterOptimisationProblem, n_resources: int) -> Trials: """Based on the method found on Github issues to inject trials into Hyperopt.""" trials = Trials() if not self.eval_history: return trials hyperopt_selection = hyperopt.pyll.stochastic.sample(problem.get_hyperopt_space_from_hyperparams_to_opt()) print(hyperopt_selection) df_dict = { 'loss': [], **{hp_name: [] for hp_name in hyperopt_selection.keys()}, 'evaluator': [], 'optimisation_goals': [], 'eval_time': [] } for evaluator, (r, optimisation_goals, _) in self.evaluations_by_resources.items(): if self._is_transferable(r, n_resources): sign = -1 if self.min_or_max == max else 1 df_dict['loss'].append(sign * self.optimisation_func(optimisation_goals)) for hp_name in hyperopt_selection.keys(): df_dict[hp_name].append(getattr(evaluator.arm, hp_name)) df_dict['evaluator'].append(evaluator) df_dict['optimisation_goals'].append(optimisation_goals) df_dict['eval_time'] = time.time() df = pd.DataFrame(df_dict) test_trials = Trials() for tid, (index, row) in enumerate(df.iterrows()): hyperopt_trial = hyperopt.Trials().new_trial_docs( tids=[tid], specs=[None], results=[{'loss': row['loss'], 'status': hyperopt.STATUS_OK}], miscs=[{'tid': tid, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'idxs': {**{key: [tid] for key in hyperopt_selection.keys()}}, 'vals': {**{key: [row[key]] for key in hyperopt_selection.keys()}}, 'workdir': None }] ) hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE test_trials.insert_trial_docs(hyperopt_trial) test_trials.refresh() return test_trials
def create_trials(self, complete, losses): if len(complete) > 0: trials = Trials() hist = self.create_history(complete) index = 0 for c in complete: if c >= len(losses): error( "Index {} is larger than the size of losses {}".format( c, len(losses))) loss = losses[c] rval_specs = [None] new_id = index rval_results = [] rval_results.append(create_ok_result(loss, c)) rval_miscs = [] rval_miscs.append(self.create_misc(index, hist)) hyperopt_trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0] index += 1 if self.response_shaping is True: # transform log applied loss for enhancing optimization performance #debug("before scaling: {}".format(loss)) if self.shaping_func == "log_err": loss = apply_log_err(loss) elif self.shaping_func == "hybrid_log": loss = apply_hybrid_log(loss) else: debug("Invalid shaping function: {}".format( self.shaping_func)) hyperopt_trial['result'] = { 'loss': float(loss), 'status': STATUS_OK } hyperopt_trial['state'] = JOB_STATE_DONE #debug("History appended: {}-{}".format(c, loss)) trials.insert_trial_doc(hyperopt_trial) trials.refresh() return trials else: return Trials()
def optimize(experiment, search_space, report): trials = Trials() if os.path.exists(report): print("Restoring trials from {}".format(report)) for line in open(report): add_trial_from_json(trials, json.loads(line)) trials.refresh() with open(report, 'a') as report_file: def run_experiment(params): res = experiment(params) report_file.write(json.dumps(res) + '\n') report_file.flush() return res return fmin(fn=run_experiment, space=search_space, algo=tpe.suggest, trials=trials, max_evals=100)
def create_trials(self, completed, losses): if len(completed) > 0: trials = Trials() hist = self.create_history(completed) #index = 0 #for c in completed: for index in range(len(completed)): c = completed[index] loss = losses[index] rval_specs = [None] new_id = index rval_results = [ ] rval_results.append(create_ok_result(loss, c)) rval_miscs = [ ] rval_miscs.append(self.create_misc(index, hist)) hopt_trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0] if self.response_shaping is True: # transform log applied loss for enhancing optimization performance #debug("before scaling: {}".format(loss)) if self.shaping_func == "log_err": loss = apply_log_err(loss) elif self.shaping_func == "hybrid_log": loss = apply_hybrid_log(loss) else: debug("Invalid shaping function: {}".format(self.shaping_func)) if loss != None: hopt_trial['result'] = {'loss': float(loss), 'status': STATUS_OK} hopt_trial['state'] = JOB_STATE_DONE #debug("History appended: {}-{}".format(c, loss)) trials.insert_trial_doc(hopt_trial) trials.refresh() return trials else: return Trials()
def read_or_recreate_trials( hyperparameter_search_dir, tuning_dataset=None, test_dataset=None, tqdm=None, overwrite=False, do_print=True, trials_out_dict=None, results_out_dict=None, args_out_dict=None, params_out_dict=None, ): config = read_config(hyperparameter_search_dir)[0] hyperparameter_search_args = HyperparameterSearchArgs.from_json_file( os.path.join(hyperparameter_search_dir, HYPERPARAMETER_SEARCH_ARGS_FILENAME)) filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME) with open(filepath, mode='r') as f: raw_config = json.loads(f.read()) rotations = set( x for x in os.listdir(hyperparameter_search_dir)).intersection( set(str(i) for i in range(10))) if do_print: print("Observe runs for rotations: %s" % ', '.join(rotations)) if trials_out_dict is None: trials_out_dict = {} if results_out_dict is None: results_out_dict = {} if args_out_dict is None: args_out_dict = {} if params_out_dict is None: params_out_dict = {} if len(rotations) < 4 or tqdm is None: rotations_rng = rotations else: rotations_rng = tqdm(rotations, desc="Reading Rotations") for rotation in rotations_rng: for d in (results_out_dict, args_out_dict, params_out_dict): if rotation not in d: d[rotation] = {} rotation_results = results_out_dict[rotation] rotation_args = args_out_dict[rotation] rotation_params = params_out_dict[rotation] rotation_dir = os.path.join(hyperparameter_search_dir, rotation) run_names = [r for r in os.listdir(rotation_dir) if r != 'trials.pkl'] run_names_rng = run_names if tqdm is None else tqdm( run_names, desc="Reading Runs") for run_name in run_names_rng: run_dir = os.path.join(rotation_dir, run_name) if not os.path.isdir(run_dir): print(f"Found file; expecting directory! {run_dir})") continue elif os.path.isfile(os.path.join(run_dir, 'error.pkl')): continue # Lastly, if we don't need results, continue if run_name in rotation_results and rotation_results[ run_name] is not None: continue args_filepath = os.path.join(run_dir, ARGS_FILENAME) if not os.path.isfile(args_filepath): continue args = Args.from_json_file(args_filepath) num_epochs = args.epochs completed_training = os.path.isfile( os.path.join(run_dir, 'model.epoch-%d' % (num_epochs - 1))) if not completed_training: print( f"Run {run_dir} still training (or errored and didn't report)" ) continue tuning_result_filepath = os.path.join(run_dir, 'tuning_perf.pkl') if os.path.isfile(tuning_result_filepath): with open(tuning_result_filepath, mode='rb') as f: tuning = pickle.load(f) elif os.path.isfile( os.path.join(run_dir, 'tuning_perf_metrics.pkl')): with open(os.path.join(run_dir, 'tuning_perf_metrics.pkl'), mode='rb') as f: tuning = pickle.load(f) else: print(f'Missing tuning for {run_dir}') if tuning_dataset is not None: # set the necessary features in tuning dataset if args.do_masked_imputation: tuning_dataset.imputation_mask_rate = args.imputation_mask_rate _, _, tuning = evaluator.evaluate_multi( tuning_dataset, model_rundir=run_dir, num_random_endpoints=10, batch_size=1024, num_workers=27, evaluate_on_25=True, get_all_reprs=False, tqdm=tqdm) with open(tuning_result_filepath, mode='wb') as f: pickle.dump(tuning, f) else: print("Wasn't given a tuning dataset!") continue test_result_filepath = os.path.join(run_dir, 'test_perf.pkl') if os.path.isfile(test_result_filepath): with open(test_result_filepath, mode='rb') as f: test = pickle.load(f) elif os.path.isfile(os.path.join(run_dir, 'test_perf_metrics.pkl')): with open(os.path.join(run_dir, 'test_perf_metrics.pkl'), mode='rb') as f: test = pickle.load(f) else: print(f"Have tuning but missing test for {run_dir}/{run_name}") test = None if test_dataset is not None: if args.do_masked_imputation: test_dataset.imputation_mask_rate = args.imputation_mask_rate _, _, test = evaluator.evaluate_multi( test_dataset, model_rundir=run_dir, num_random_endpoints=10, batch_size=1024, num_workers=27, evaluate_on_25=True, get_all_reprs=False, tqdm=tqdm) with open(test_result_filepath, mode='wb') as f: pickle.dump(test, f) else: "Wasn't given a test dataset!" rotation_results[run_name] = (tuning, test) if run_name not in rotation_args or rotation_args[run_name] is None: rotation_args[run_name] = args if run_name not in rotation_params or rotation_params[ run_name] is None: params_filepath = os.path.join(run_dir, PARAMS_FILENAME) if os.path.isfile(params_filepath): with open(params_filepath, mode='rb') as f: rotation_params[run_name] = pickle.load(f) else: rotation_params[run_name] = args_to_params( rotation_args[run_name], raw_config) if rotation in trials_out_dict and trials_out_dict[ rotation] is not None: continue trials_filepath = os.path.join(rotation_dir, 'trials.pkl') if os.path.exists(trials_filepath) and not overwrite: with open(trials_filepath, mode='rb') as f: trials_out_dict[rotation] = pickle.load(f) continue # Rebuild Trials # TODO(mmd): Something wrong in misc.idxs... trials = Trials(exp_key='exp') #hyperparameter_search_dir for run_name in rotation_results: args = rotation_args[run_name] params = rotation_params[run_name] perf_metrics, test_perf_metrics = rotation_results[run_name] try: loss = ObjectiveFntr.perf_metrics_to_trial_result( perf_metrics, args, single_task=hyperparameter_search_args.single_task_search) except Exception as e: print( f"Errored computing tuning results for {hyperparameter_search_dir} on rotation " f"{rotation}, {run_name}: {e}") traceback.print_exc() continue try: if test_perf_metrics is not None: test_loss = ObjectiveFntr.perf_metrics_to_trial_result( test_perf_metrics, args, single_task=hyperparameter_search_args. single_task_search) else: test_loss = np.NaN except TypeError as e: test_loss = np.NaN except Exception as e: if "'NoneType' object is not subscriptable" in str(e): test_loss = np.NaN else: print( f"Errored computing test results for {hyperparameter_search_dir} on rotation {rotation}, " f"{run_name}: {e}") traceback.print_exc() continue loss_variance, test_loss_variance = np.NaN, np.NaN result = { 'status': STATUS_OK, 'loss': loss, 'loss_variance': loss_variance, 'test_loss': test_loss, 'test_loss_variance': test_loss_variance, } spec = params trials.insert_trial_doc({ 'tid': run_name, 'spec': spec, 'result': result, 'misc': { 'tid': run_name, 'cmd': '', 'idxs': [], 'vals': {k: [v] for k, v in spec.items()}, }, 'state': '', 'owner': '', 'book_time': 0, 'refresh_time': 0, 'exp_key': 'exp', # hyperparameter_search_dir, }) trials.refresh() trials_out_dict[rotation] = trials return config, results_out_dict, args_out_dict, params_out_dict, trials_out_dict
}, 'vals': { 'executor_memory': [7], 'offheap_size': [1], 'shuffle_partitions': [16], 'split_size': [15] } }, 'exp_key': None, 'owner': None, 'version': 0, 'book_time': None, 'refresh_time': None }] trials.insert_trial_docs(new_trials) trials.refresh() tpe_suggest = partial(tpe.suggest, n_startup_jobs=num_random_startup_jobs) best = fmin(objective_function, space=space, algo=tpe_suggest, max_evals=num_evals, trials=trials) trials.best_trial['result']['loss'] # define an objective function def objective(args): case, val = args if case == 'case 1': return val
max_evals=1, return_argmin=False, show_progressbar=False) trial = cache_trials.trials[-1] return trial, trial_parameters for i in range(5): t, tp = get_next_params(global_trials) mse = run_model(tp) t['result']['loss'] = mse t['refresh_time'] = datetime.datetime.now() global_trials.insert_trial_doc(t) global_trials.refresh() print(global_trials.best_trial) """ trials = Trials() print("First recommendation") fmin(param_extractor, space, algo=tpe.suggest, trials=trials, max_evals=1, return_argmin=False) new_trial = trials.trials[-1] new_trial['result']['loss'] = 0.4 print(new_trial) global_trials.insert_trial_doc(new_trial) global_trials.refresh()
def _fit_on_prepared_data(self, metadata): trials = Trials() domain = Domain(None, self.hyperopt_search_space) rand = np.random.RandomState(constants.RANDOM_SEED) all_estimators = [] all_estimator_results = {} for i in range(0, self.num_params, self.parallelism): n = min(self.num_params - i, self.parallelism) # Using HyperOpt TPE to generate parameters hyperopt_params = [] for j in range(i, i + n): new_param = tpe.suggest([j], domain, trials, rand.randint(0, 2 ** 31 - 1)) new_param[0]['status'] = STATUS_RUNNING trials.insert_trial_docs(new_param) trials.refresh() hyperopt_params.append(new_param[0]) # Generating Cerebro params from HyperOpt params estimator_param_maps = [] for hyperopt_param in hyperopt_params: param = {} for k in hyperopt_param['misc']['vals']: val = hyperopt_param['misc']['vals'][k][0].item() if isinstance(self.search_space[k], _HPChoice): # if the hyperparamer is a choice the index is returned val = self.search_space[k].options[val] param[k] = val estimator_param_maps.append(param) # Generating Cerebro estimators estimators = [self._estimator_gen_fn_wrapper(param) for param in estimator_param_maps] estimator_results = {model.getRunId(): {} for model in estimators} # log hyperparameters to TensorBoard self._log_hp_to_tensorboard(estimators, estimator_param_maps) # Trains the models up to the number of epochs specified. For each iteration also performs validation for epoch in range(self.num_epochs): epoch_results = self.backend.train_for_one_epoch(estimators, self.store, self.feature_cols, self.label_cols) update_model_results(estimator_results, epoch_results) epoch_results = self.backend.train_for_one_epoch(estimators, self.store, self.feature_cols, self.label_cols, is_train=False) update_model_results(estimator_results, epoch_results) self._log_epoch_metrics_to_tensorboard(estimators, estimator_results) all_estimators.extend(estimators) all_estimator_results.update(estimator_results) # HyperOpt TPE update for i, hyperopt_param in enumerate(hyperopt_params): hyperopt_param['status'] = STATUS_OK hyperopt_param['result'] = {'loss': estimator_results[estimators[i].getRunId()][ 'val_' + self.evaluation_metric][-1], 'status': STATUS_OK} trials.refresh() # find the best model and crate ModelSearchModel models = [est.create_model(all_estimator_results[est.getRunId()], est.getRunId(), metadata) for est in all_estimators] val_metrics = [all_estimator_results[est.getRunId()]['val_' + self.evaluation_metric][-1] for est in all_estimators] best_model = models[np.argmin(val_metrics)] return ModelSelectionResult(best_model, estimator_results, models, [x+"__output" for x in self.label_cols])
class Hyperopt(AbstractPlanner): def __init__(self, goal='minimize', show_progressbar=False): """ Tree of Parzen Estimators (TPE) as implemented in HyperOpt. Args: goal (str): The optimization goal, either 'minimize' or 'maximize'. Default is 'minimize'. show_progressbar (bool): If True, show a progressbar. """ AbstractPlanner.__init__(**locals()) self._trials = Trials( ) # these is a Hyperopt object that stores the search history self._hp_space = None # these are the params in the Hyperopt format def _set_param_space(self, param_space): self._param_space = [] for param in param_space: if param.type == 'continuous': param_dict = { 'name': param.name, 'type': param.type, 'domain': (param.low, param.high) } self._param_space.append(param_dict) # update hyperopt space accordingly self._set_hp_space() def _tell(self, observations): self._params = observations.get_params(as_array=False) self._values = observations.get_values(as_array=True, opposite=self.flip_measurements) # update hyperopt Trials accordingly self._set_hp_trials() def _set_hp_space(self): space = [] # go through all parameters we have defined and convert them to Hyperopt format for param in self._param_space: if param['type'] == 'continuous': space.append((param['name'], hp.uniform(param['name'], param['domain'][0], param['domain'][1]))) # update instance attribute that is the space input for Hyperopt fmin self._hp_space = OrderedDict(space) def _set_hp_trials(self): self._trials = Trials() if self._params is not None and len(self._params) > 0: for tid, (param, loss) in enumerate(zip(self._params, self._values)): idxs = {k: [tid] for k, v in param.items()} vals = {k: [v] for k, v in param.items()} hyperopt_trial = Trials().new_trial_docs( tids=[tid], specs=[None], results=[{ 'loss': loss, 'status': STATUS_OK }], miscs=[{ 'tid': tid, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'idxs': idxs, 'vals': vals, 'workdir': None }]) hyperopt_trial[0]['state'] = JOB_STATE_DONE self._trials.insert_trial_docs(hyperopt_trial) self._trials.refresh() def _ask(self): # NOTE: we pass a dummy function as we just ask for the new (+1) set of parameters _ = fmin(fn=lambda x: 0, space=self._hp_space, algo=tpe.suggest, max_evals=self.num_generated, trials=self._trials, show_progressbar=self.show_progressbar) # make sure the number of parameters asked matches the number of Hyperopt iterations/trials assert len(self._trials.trials) == self.num_generated # get params from last dict in trials.trials proposed_params = self._trials.trials[-1]['misc']['vals'] for key, value in proposed_params.items(): proposed_params[key] = value[ 0] # this is just to make value not a list return ParameterVector(dict=proposed_params, param_space=self.param_space)
def search(self, run_name, store, context, hp_records, runs): def make_trial(tid, arg_dict, loss_value): trial = {"book_time": None, "exp_key": None, "owner": None, "refresh_time": None, "spec": None, "state": 0, "tid": tid, "version": 0} #trial["result"] = {"status": "New"} misc = {} trial["misc"] = misc misc["cmd"] = ("domain_attachment", "FMinIter_Domain") misc["idxs"] = {key: [tid] for key in arg_dict.keys()} misc["tid"] = tid misc["vals"] = arg_dict trial["state"] = 2 # done trial["result"] = {"loss": loss_value, "status": "ok"} #trial["refresh_time"] = coarse_utcnow() return trial dummy_loss = lambda x: None param_space = {r["name"]: r["space_func"] for r in hp_records} domain = base.Domain(dummy_loss, param_space) rstate = np.random.RandomState() # convert runs to Trials trial_list = [] for run in runs: # don't trip over inappropriate runs if (not "run_name" in run) or (not "hparams" in run) or (not context.primary_metric in run): continue run_name = run["run_name"] arg_dict = run["hparams"] loss_value = run[context.primary_metric] if context.maximize_metric: loss_value = -loss_value # extract a unique int from run_name (parent.childnum) tid = run_helper.get_int_from_run_name(run_name) trial = make_trial(tid, arg_dict, loss_value) trial_list.append(trial) # finally, add our trial_list to trials trials = Trials() trials.insert_trial_docs(trial_list) trials.refresh() # get next suggested hyperparameter values from TPE algorithm tid = run_helper.get_int_from_run_name(run_name) min_trials = 3 # before this, just do rand sampling seed = rstate.randint(2 ** 31 - 1) if len(trials) < min_trials: new_trials = rand.suggest([tid], domain, trials, seed) else: new_trials = tpe.suggest([tid], domain, trials, seed) # apply the suggested hparam values trial = new_trials[0] arg_dict = trial["misc"]["vals"] arg_dict = self.fixup_hyperopt_hparams(param_space, arg_dict) return arg_dict
class _HyperOpt(base.Optimizer): # pylint: disable=too-many-instance-attributes def __init__( self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1, *, prior_weight: float = 1.0, n_startup_jobs: int = 20, n_EI_candidates: int = 24, gamma: float = 0.25, verbose: bool = False, ) -> None: super().__init__(parametrization, budget=budget, num_workers=num_workers) try: # try to convert parametrization to hyperopt search space if not isinstance(self.parametrization, p.Instrumentation): raise NotImplementedError self.space = _get_search_space(self.parametrization.name, self.parametrization) self._transform = None except NotImplementedError: self._transform = transforms.ArctanBound(0, 1) self.space = { f"x_{i}": hp.uniform(f"x_{i}", 0, 1) for i in range(self.dimension) } self.trials = Trials() self.domain = Domain(fn=None, expr=self.space, pass_expr_memo_ctrl=False) self.tpe_args = { "prior_weight": prior_weight, "n_startup_jobs": n_startup_jobs, "n_EI_candidates": n_EI_candidates, "gamma": gamma, "verbose": verbose, } def _internal_ask_candidate(self) -> p.Parameter: # Inspired from FMinIter class (hyperopt) next_id = self.trials.new_trial_ids(1) new_trial = tpe.suggest(next_id, self.domain, self.trials, self._rng.randint(2**31 - 1), **self.tpe_args)[0] self.trials.insert_trial_doc(new_trial) self.trials.refresh() candidate = self.parametrization.spawn_child() if self._transform: data = np.array([ new_trial["misc"]["vals"][f"x_{i}"][0] for i in range(self.dimension) ]) candidate = candidate.set_standardized_data( self._transform.backward(data)) # For consistency, we need to update hyperopt history # when standardized data is changed if any(data != self._transform.forward( candidate.get_standardized_data( reference=self.parametrization))): for it, val in enumerate( self._transform.forward( candidate.get_standardized_data( reference=self.parametrization))): self.trials._dynamic_trials[ next_id[0]]["misc"]["vals"][f"x_{it}"][0] = val else: spec = hyperopt.base.spec_from_misc(new_trial["misc"]) config = hyperopt.space_eval(self.space, spec) candidate.value = _hp_dict_to_parametrization(config) candidate._meta["trial_id"] = new_trial["tid"] return candidate def _internal_tell_candidate(self, candidate: p.Parameter, loss: float) -> None: result = {"loss": loss, "status": "ok"} assert "trial_id" in candidate._meta tid = candidate._meta["trial_id"] assert self.trials._dynamic_trials[tid][ "state"] == hyperopt.JOB_STATE_NEW now = hyperopt.utils.coarse_utcnow() self.trials._dynamic_trials[tid]["book_time"] = now self.trials._dynamic_trials[tid]["refresh_time"] = now self.trials._dynamic_trials[tid]["state"] = hyperopt.JOB_STATE_DONE self.trials._dynamic_trials[tid]["result"] = result self.trials._dynamic_trials[tid][ "refresh_time"] = hyperopt.utils.coarse_utcnow() self.trials.refresh() def _internal_tell_not_asked(self, candidate: p.Parameter, loss: float) -> None: next_id = self.trials.new_trial_ids(1) new_trial = hyperopt.rand.suggest(next_id, self.domain, self.trials, self._rng.randint(2**31 - 1)) self.trials.insert_trial_docs(new_trial) self.trials.refresh() tid = next_id[0] if self._transform: data = candidate.get_standardized_data( reference=self.parametrization) data = self._transform.forward(data) self.trials._dynamic_trials[tid]["misc"]["vals"] = { f"x_{i}": [data[i]] for i in range(len(data)) } else: null_config: dict = { k: [] for k in self.trials._dynamic_trials[tid]["misc"] ["vals"].keys() } new_vals: dict = _hp_parametrization_to_dict(candidate, default=null_config) self.trials._dynamic_trials[tid]["misc"]["vals"] = new_vals self.trials.refresh() candidate._meta["trial_id"] = tid self._internal_tell_candidate(candidate, loss)
def read_or_recreate_trials(hyperparameter_search_dir, tqdm=None, overwrite=False): config = read_config(hyperparameter_search_dir)[0] filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME) with open(filepath, mode='r') as f: raw_config = json.load(f) all_params, all_results, all_configs = {}, {}, {} run_names = [r for r in os.listdir(hyperparameter_search_dir) if r != 'trials.pkl'] run_names_rng = run_names if tqdm is None else tqdm(run_names) for run_name in run_names: run_dir = os.path.join(hyperparameter_search_dir, run_name) if not os.path.isdir(run_dir): print(run_dir) continue if os.path.isfile(os.path.join(run_dir, 'error.pkl')): continue config_filepath = os.path.join(run_dir, CONFIG_FILENAME) if not os.path.isfile(config_filepath): continue with open(config_filepath, mode='r') as f: config = json.load(f) all_configs[run_name] = config params_filepath = os.path.join(run_dir, PARAMS_FILENAME) if os.path.isfile(params_filepath): with open(params_filepath, mode='rb') as f: constant, variable = pickle.load(f) all_params[run_name] = constant all_params[run_name].update(variable) else: raise NotImplementedError num_epochs = config['trainer']['epochs'] completed_training = trained_until(run_dir, run_name, num_epochs) if not completed_training: print("Run %s still training (or errored and didn't report)" % run_name) print(run_name, num_epochs) print(os.listdir(run_dir)) continue tuning_results_filename = os.path.join(run_dir, 'ir_metrics_%d.json' % num_epochs) assert os.path.isfile(tuning_results_filename), "Missing tuning results for %s" % run_dir with open(tuning_results_filename, mode='r') as f: all_results[run_name] = json.load(f) trials_filepath = os.path.join(hyperparameter_search_dir, 'trials.pkl') if os.path.exists(trials_filepath) and not overwrite: print("Reloading trials!") with open(trials_filepath, mode='rb') as f: trials = pickle.load(f) return config, all_results, all_configs, all_params, trials # Rebuild Trials # TODO(mmd): Something wrong in misc.idxs... trials = Trials(exp_key = 'exp') #hyperparameter_search_dir for run_name in all_results: configs = all_configs[run_name] params = all_params[run_name] loss = all_results[run_name]['Val (Pert):']['median_rank'] loss_variance, test_loss, test_loss_variance = np.NaN, np.NaN, np.NaN result = { 'status': STATUS_OK, 'loss': loss, 'loss_variance': loss_variance, 'test_loss': test_loss, 'test_loss_variance': test_loss_variance, } spec = params a = trials.insert_trial_doc({ 'tid': run_name, 'spec': spec, 'result': result, 'misc': { 'tid': run_name, 'cmd': '', 'idxs': [], 'vals': {k: [v] for k, v in spec.items()}, }, 'state': JOB_STATE_DONE, 'owner': '', 'book_time': 0, 'refresh_time': 0, 'exp_key': 'exp',# hyperparameter_search_dir, }) trials.refresh() return config, all_results, all_configs, all_params, trials
def suggest(self, history, searchspace): """ Suggest params to maximize an objective function based on the function evaluation history using a tree of Parzen estimators (TPE), as implemented in the hyperopt package. Use of this function requires that hyperopt be installed. """ # This function is very odd, because as far as I can tell there's # no real documented API for any of the internals of hyperopt. Its # execution model is that hyperopt calls your objective function # (instead of merely providing you with suggested points, and then # you calling the function yourself), and its very tricky (for me) # to use the internal hyperopt data structures to get these predictions # out directly. # so they path we take in this function is to construct a synthetic # hyperopt.Trials database which from the `history`, and then call # hyoperopt.fmin with a dummy objective function that logs the value # used, and then return that value to our client. # The form of the hyperopt.Trials database isn't really documented in # the code -- most of this comes from reverse engineering it, by # running fmin() on a simple function and then inspecting the form of # the resulting trials object. if 'hyperopt' not in sys.modules: raise ImportError('No module named hyperopt') random = check_random_state(self.seed) hp_searchspace = searchspace.to_hyperopt() trials = Trials() for i, (params, scores, status) in enumerate(history): if status == 'SUCCEEDED': # we're doing maximization, hyperopt.fmin() does minimization, # so we need to swap the sign result = {'loss': -np.mean(scores), 'status': STATUS_OK} elif status == 'PENDING': result = {'status': STATUS_RUNNING} elif status == 'FAILED': result = {'status': STATUS_FAIL} else: raise RuntimeError('unrecognized status: %s' % status) # the vals key in the trials dict is basically just the params # dict, but enum variables (hyperopt hp.choice() nodes) are # different, because the index of the parameter is specified # in vals, not the parameter itself. vals = {} for var in searchspace: if isinstance(var, EnumVariable): # get the index in the choices of the parameter, and use # that. matches = [ i for i, c in enumerate(var.choices) if c == params[var.name] ] assert len(matches) == 1 vals[var.name] = matches else: # the other big difference is that all of the param values # are wrapped in length-1 lists. vals[var.name] = [params[var.name]] trials.insert_trial_doc({ 'misc': { 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'idxs': dict((k, [i]) for k in hp_searchspace.keys()), 'tid': i, 'vals': vals, 'workdir': None }, 'result': result, 'tid': i, # bunch of fixed fields that hyperopt seems to require 'owner': None, 'spec': None, 'state': 2, 'book_time': None, 'exp_key': None, 'refresh_time': None, 'version': 0 }) trials.refresh() chosen_params_container = [] def suggest(*args, **kwargs): return tpe.suggest(*args, **kwargs, gamma=self.gamma, n_startup_jobs=self.seeds) def mock_fn(x): # http://stackoverflow.com/a/3190783/1079728 # to get around no nonlocal keywork in python2 chosen_params_container.append(x) return 0 fmin(fn=mock_fn, algo=tpe.suggest, space=hp_searchspace, trials=trials, max_evals=len(trials.trials) + 1, **self._hyperopt_fmin_random_kwarg(random)) chosen_params = chosen_params_container[0] return chosen_params
def suggest(self, history, searchspace): """ Suggest params to maximize an objective function based on the function evaluation history using a tree of Parzen estimators (TPE), as implemented in the hyperopt package. Use of this function requires that hyperopt be installed. """ # This function is very odd, because as far as I can tell there's # no real documented API for any of the internals of hyperopt. Its # execution model is that hyperopt calls your objective function # (instead of merely providing you with suggested points, and then # you calling the function yourself), and its very tricky (for me) # to use the internal hyperopt data structures to get these predictions # out directly. # so they path we take in this function is to construct a synthetic # hyperopt.Trials database which from the `history`, and then call # hyoperopt.fmin with a dummy objective function that logs the value # used, and then return that value to our client. # The form of the hyperopt.Trials database isn't really documented in # the code -- most of this comes from reverse engineering it, by # running fmin() on a simple function and then inspecting the form of # the resulting trials object. if 'hyperopt' not in sys.modules: raise ImportError('No module named hyperopt') random = check_random_state(self.seed) hp_searchspace = searchspace.to_hyperopt() trials = Trials() for i, (params, scores, status) in enumerate(history): if status == 'SUCCEEDED': # we're doing maximization, hyperopt.fmin() does minimization, # so we need to swap the sign result = {'loss': -np.mean(scores), 'status': STATUS_OK} elif status == 'PENDING': result = {'status': STATUS_RUNNING} elif status == 'FAILED': result = {'status': STATUS_FAIL} else: raise RuntimeError('unrecognized status: %s' % status) # the vals key in the trials dict is basically just the params # dict, but enum variables (hyperopt hp.choice() nodes) are # different, because the index of the parameter is specified # in vals, not the parameter itself. vals = {} for var in searchspace: if isinstance(var, EnumVariable): # get the index in the choices of the parameter, and use # that. matches = [i for i, c in enumerate(var.choices) if c == params[var.name]] assert len(matches) == 1 vals[var.name] = matches else: # the other big difference is that all of the param values # are wrapped in length-1 lists. vals[var.name] = [params[var.name]] trials.insert_trial_doc({ 'misc': { 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'idxs': dict((k, [i]) for k in hp_searchspace.keys()), 'tid': i, 'vals': vals, 'workdir': None}, 'result': result, 'tid': i, # bunch of fixed fields that hyperopt seems to require 'owner': None, 'spec': None, 'state': 2, 'book_time': None, 'exp_key': None, 'refresh_time': None, 'version': 0 }) trials.refresh() chosen_params_container = [] def mock_fn(x): # http://stackoverflow.com/a/3190783/1079728 # to get around no nonlocal keywork in python2 chosen_params_container.append(x) return 0 fmin(fn=mock_fn, algo=tpe.suggest, space=hp_searchspace, trials=trials, max_evals=len(trials.trials)+1, **self._hyperopt_fmin_random_kwarg(random)) chosen_params = chosen_params_container[0] return chosen_params
def read_or_recreate_trials(hyperparameter_search_dir, tuning_dataset=None, test_dataset=None, tqdm=None, overwrite=False): config = read_config(hyperparameter_search_dir)[0] filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME) with open(filepath, mode='r') as f: raw_config = json.loads(f.read()) rotations = set( x for x in os.listdir(hyperparameter_search_dir)).intersection( set(str(i) for i in range(10))) print("Observe runs for rotations: %s" % ', '.join(rotations)) all_trials = {} all_results = {} all_args = {} all_params = {} rotations_rng = rotations if len(rotations) < 4 or tqdm is None else tqdm( rotations) for rotation in rotations_rng: rotation_results = {} rotation_args = {} rotation_params = {} rotation_dir = os.path.join(hyperparameter_search_dir, rotation) run_names = [r for r in os.listdir(rotation_dir) if r != 'trials.pkl'] run_names_rng = run_names if tqdm is None else tqdm(run_names) for run_name in run_names: run_dir = os.path.join(rotation_dir, run_name) if not os.path.isdir(run_dir): print(run_dir) continue if os.path.isfile(os.path.join(run_dir, 'error.pkl')): continue args_filepath = os.path.join(run_dir, ARGS_FILENAME) if not os.path.isfile(args_filepath): continue args = Args.from_json_file(args_filepath) rotation_args[run_name] = args params_filepath = os.path.join(run_dir, PARAMS_FILENAME) if os.path.isfile(params_filepath): with open(params_filepath, mode='rb') as f: rotation_params[run_name] = pickle.load(f) else: rotation_params[run_name] = args_to_params( rotation_args[run_name], raw_config) num_epochs = args.epochs completed_training = os.path.isfile( os.path.join(run_dir, 'model.epoch-%d' % (num_epochs - 1))) if not completed_training: print("Run %s Still training (or errored and didn't report)" % run_name) continue tuning_result_filepath = os.path.join(run_dir, 'tuning_perf_metrics.pkl') if os.path.isfile(tuning_result_filepath): with open(tuning_result_filepath, mode='rb') as f: tuning = pickle.load(f) else: print('Missing tuning for %s' % run_name) if tuning_dataset is not None: _, _, tuning = evaluate_multi(tuning_dataset, model_rundir=run_dir, num_random_endpoints=10, batch_size=1024, num_workers=27, evaluate_on_25=True, get_all_reprs=False, tqdm=tqdm) with open(tuning_result_filepath, mode='wb') as f: pickle.dump(tuning, f) else: print("Wasn't given a tuning dataset!") continue test_result_filepath = os.path.join(run_dir, 'test_perf_metrics.pkl') if os.path.isfile(test_result_filepath): with open(test_result_filepath, mode='rb') as f: test = pickle.load(f) else: print('Have tuning but missing test for %s' % run_name) if test_dataset is not None: _, _, test = evaluate_multi(test_dataset, model_rundir=run_dir, num_random_endpoints=10, batch_size=1024, num_workers=27, evaluate_on_25=True, get_all_reprs=False, tqdm=tqdm) with open(test_result_filepath, mode='wb') as f: pickle.dump(test, f) else: "Wasn't given a test dataset!" rotation_results[run_name] = (tuning, test) all_results[rotation] = rotation_results all_args[rotation] = rotation_args all_params[rotation] = rotation_params trials_filepath = os.path.join(rotation_dir, 'trials.pkl') if os.path.exists(trials_filepath) and not overwrite: with open(trials_filepath, mode='rb') as f: all_trials[rotation] = pickle.load(f) continue # Rebuild Trials # TODO(mmd): Something wrong in misc.idxs... trials = Trials(exp_key='exp') #hyperparameter_search_dir for run_name in rotation_results: args = rotation_args[run_name] params = rotation_params[run_name] perf_metrics, test_perf_metrics = rotation_results[run_name] tuning_scores = -pd.Series( ObjectiveFntr.perf_metrics_to_trial_result(perf_metrics)) test_scores = -pd.Series( ObjectiveFntr.perf_metrics_to_trial_result(test_perf_metrics)) loss = tuning_scores.mean() loss_variance = tuning_scores.std()**2 test_loss = test_scores.mean() test_loss_variance = test_scores.std()**2 result = { 'status': STATUS_OK, 'loss': loss, 'loss_variance': loss_variance, 'test_loss': test_loss, 'test_loss_variance': test_loss_variance, } spec = params trials.insert_trial_doc({ 'tid': run_name, 'spec': spec, 'result': result, 'misc': { 'tid': run_name, 'cmd': '', 'idxs': [], 'vals': {k: [v] for k, v in spec.items()}, }, 'state': '', 'owner': '', 'book_time': 0, 'refresh_time': 0, 'exp_key': 'exp', # hyperparameter_search_dir, }) trials.refresh() all_trials[rotation] = trials return config, all_results, all_args, all_params, all_trials