Ejemplo n.º 1
0
def add_trials(points):
    test_trials = Trials()

    for tid, row in enumerate(points):
        vals = {}
        for key in sample(space).keys():
            vals[key] = [row['params'][key]]

        hyperopt_trial = Trials().new_trial_docs(
            tids=[tid],
            specs=[None],
            results=[row],
            miscs=[{
                'tid': tid,
                'cmd': ('domain_attachment', 'FMinIter_Domain'),
                'workdir': None,
                'idxs': {
                    **{key: [tid]
                       for key in sample(space).keys()}
                },
                'vals': vals
            }])

        hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE

        test_trials.insert_trial_docs(hyperopt_trial)
        test_trials.refresh()

    return test_trials
Ejemplo n.º 2
0
    def create_trials(losses, statuses, vals, scope_keys):
        trials = Trials()

        tids = trials.new_trial_ids(len(losses))
        specs = [None for x in range(len(tids))]
        results = []
        miscs = []
        for i in range(len(tids)):
            idxs_content = [[i] for key in scope_keys]
            idxs_vals_content = []
            for key in scope_keys:
                idxs_vals_content.append([vals[key][i]])

            results.append(dict(loss=losses[i], status=statuses[i]))
            miscs.append(
                dict(tid=tids[i],
                     cmd=None,
                     idxs=dict(zip(scope_keys, idxs_content)),
                     vals=dict(zip(scope_keys, idxs_vals_content))))

        trials.insert_trial_docs(
            trials.new_trial_docs(
                tids,
                specs,
                results,
                miscs,
            ))
        trials.refresh()
        return trials
Ejemplo n.º 3
0
def merge_trials(trials: Trials, new_trials_data: List[Dict]) -> Trials:
    """
    Merge a hyperopt trials object with the contents of another hyperopt trials object.

    :param trials: A hyperopt trials object containing trials data, organized into hierarchical dictionaries.
    :param trials_data: The contents of a hyperopt trials object, `Trials.trials`.
    :return: A hyperopt trials object, merged from the two inputs.
    """
    max_tid = 0
    if len(trials.trials) > 0:
        max_tid = max([trial['tid'] for trial in trials.trials])

    for trial in new_trials_data:
        tid = trial[
            'tid'] + max_tid + 1  #trial id needs to be unique among this list of ids.
        hyperopt_trial = Trials().new_trial_docs(tids=[None],
                                                 specs=[None],
                                                 results=[None],
                                                 miscs=[None])
        hyperopt_trial[0] = trial
        hyperopt_trial[0]['tid'] = tid
        hyperopt_trial[0]['misc']['tid'] = tid
        for key in hyperopt_trial[0]['misc']['idxs'].keys():
            hyperopt_trial[0]['misc']['idxs'][key] = [tid]
        trials.insert_trial_docs(hyperopt_trial)
        trials.refresh()
    return trials
Ejemplo n.º 4
0
    def _get_trials(self, problem: HyperparameterOptimisationProblem, n_resources: int) -> Trials:
        """Based on the method found on Github issues to inject trials into Hyperopt."""
        trials = Trials()
        if not self.eval_history:
            return trials

        hyperopt_selection = hyperopt.pyll.stochastic.sample(problem.get_hyperopt_space_from_hyperparams_to_opt())
        print(hyperopt_selection)

        df_dict = {
            'loss': [], **{hp_name: [] for hp_name in hyperopt_selection.keys()}, 'evaluator': [],
            'optimisation_goals': [], 'eval_time': []
        }
        for evaluator, (r, optimisation_goals, _) in self.evaluations_by_resources.items():
            if self._is_transferable(r, n_resources):
                sign = -1 if self.min_or_max == max else 1
                df_dict['loss'].append(sign * self.optimisation_func(optimisation_goals))
                for hp_name in hyperopt_selection.keys():
                    df_dict[hp_name].append(getattr(evaluator.arm, hp_name))
                df_dict['evaluator'].append(evaluator)
                df_dict['optimisation_goals'].append(optimisation_goals)
                df_dict['eval_time'] = time.time()
        df = pd.DataFrame(df_dict)

        test_trials = Trials()
        for tid, (index, row) in enumerate(df.iterrows()):
            hyperopt_trial = hyperopt.Trials().new_trial_docs(
                tids=[tid],
                specs=[None],
                results=[{'loss': row['loss'], 'status': hyperopt.STATUS_OK}],
                miscs=[{'tid': tid,
                        'cmd': ('domain_attachment', 'FMinIter_Domain'),
                        'idxs': {**{key: [tid] for key in hyperopt_selection.keys()}},
                        'vals': {**{key: [row[key]] for key in hyperopt_selection.keys()}},
                        'workdir': None
                        }]
            )
            hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE

            test_trials.insert_trial_docs(hyperopt_trial)
            test_trials.refresh()

        return test_trials
Ejemplo n.º 5
0
def get_next_params(current_trials):

    trial_parameters = {}

    def extract_params(params):
        nonlocal trial_parameters
        trial_parameters = params
        return {'loss': 0.5, 'status': STATUS_OK}

    cache_trials = Trials()
    cache_trials.insert_trial_docs(current_trials.trials)
    # cache_trials.refresh()

    fmin(extract_params,
         space,
         algo=tpe.suggest,
         trials=cache_trials,
         max_evals=1,
         return_argmin=False,
         show_progressbar=False)

    trial = cache_trials.trials[-1]

    return trial, trial_parameters
Ejemplo n.º 6
0
            'split_size': [0]
        },
        'vals': {
            'executor_memory': [7],
            'offheap_size': [1],
            'shuffle_partitions': [16],
            'split_size': [15]
        }
    },
    'exp_key': None,
    'owner': None,
    'version': 0,
    'book_time': None,
    'refresh_time': None
}]
trials.insert_trial_docs(new_trials)
trials.refresh()

tpe_suggest = partial(tpe.suggest, n_startup_jobs=num_random_startup_jobs)
best = fmin(objective_function,
            space=space,
            algo=tpe_suggest,
            max_evals=num_evals,
            trials=trials)
trials.best_trial['result']['loss']


# define an objective function
def objective(args):
    case, val = args
    if case == 'case 1':
Ejemplo n.º 7
0
    def _fit_on_prepared_data(self, metadata):
        trials = Trials()
        domain = Domain(None, self.hyperopt_search_space)
        rand = np.random.RandomState(constants.RANDOM_SEED)

        all_estimators = []
        all_estimator_results = {}
        for i in range(0, self.num_params, self.parallelism):
            n = min(self.num_params - i, self.parallelism)

            # Using HyperOpt TPE to generate parameters
            hyperopt_params = []
            for j in range(i, i + n):
                new_param = tpe.suggest([j], domain, trials, rand.randint(0, 2 ** 31 - 1))
                new_param[0]['status'] = STATUS_RUNNING

                trials.insert_trial_docs(new_param)
                trials.refresh()
                hyperopt_params.append(new_param[0])

            # Generating Cerebro params from HyperOpt params
            estimator_param_maps = []
            for hyperopt_param in hyperopt_params:
                param = {}
                for k in hyperopt_param['misc']['vals']:
                    val = hyperopt_param['misc']['vals'][k][0].item()
                    if isinstance(self.search_space[k], _HPChoice):
                        # if the hyperparamer is a choice the index is returned
                        val = self.search_space[k].options[val]
                    param[k] = val
                estimator_param_maps.append(param)

            # Generating Cerebro estimators
            estimators = [self._estimator_gen_fn_wrapper(param) for param in estimator_param_maps]
            estimator_results = {model.getRunId(): {} for model in estimators}
            # log hyperparameters to TensorBoard
            self._log_hp_to_tensorboard(estimators, estimator_param_maps)

            # Trains the models up to the number of epochs specified. For each iteration also performs validation
            for epoch in range(self.num_epochs):
                epoch_results = self.backend.train_for_one_epoch(estimators, self.store, self.feature_cols,
                                                                 self.label_cols)
                update_model_results(estimator_results, epoch_results)

                epoch_results = self.backend.train_for_one_epoch(estimators, self.store, self.feature_cols,
                                                                 self.label_cols, is_train=False)
                update_model_results(estimator_results, epoch_results)

                self._log_epoch_metrics_to_tensorboard(estimators, estimator_results)

            all_estimators.extend(estimators)
            all_estimator_results.update(estimator_results)

            # HyperOpt TPE update
            for i, hyperopt_param in enumerate(hyperopt_params):
                hyperopt_param['status'] = STATUS_OK
                hyperopt_param['result'] = {'loss': estimator_results[estimators[i].getRunId()][
                    'val_' + self.evaluation_metric][-1], 'status': STATUS_OK}
            trials.refresh()

        # find the best model and crate ModelSearchModel
        models = [est.create_model(all_estimator_results[est.getRunId()], est.getRunId(), metadata) for est in
                  all_estimators]
        val_metrics = [all_estimator_results[est.getRunId()]['val_' + self.evaluation_metric][-1] for est in
                       all_estimators]
        best_model = models[np.argmin(val_metrics)]

        return ModelSelectionResult(best_model, estimator_results, models, [x+"__output" for x in self.label_cols])
Ejemplo n.º 8
0
class Hyperopt(AbstractPlanner):
    def __init__(self, goal='minimize', show_progressbar=False):
        """
        Tree of Parzen Estimators (TPE) as implemented in HyperOpt.

        Args:
            goal (str): The optimization goal, either 'minimize' or 'maximize'. Default is 'minimize'.
            show_progressbar (bool): If True, show a progressbar.
        """
        AbstractPlanner.__init__(**locals())
        self._trials = Trials(
        )  # these is a Hyperopt object that stores the search history
        self._hp_space = None  # these are the params in the Hyperopt format

    def _set_param_space(self, param_space):
        self._param_space = []
        for param in param_space:
            if param.type == 'continuous':
                param_dict = {
                    'name': param.name,
                    'type': param.type,
                    'domain': (param.low, param.high)
                }
            self._param_space.append(param_dict)
        # update hyperopt space accordingly
        self._set_hp_space()

    def _tell(self, observations):
        self._params = observations.get_params(as_array=False)
        self._values = observations.get_values(as_array=True,
                                               opposite=self.flip_measurements)
        # update hyperopt Trials accordingly
        self._set_hp_trials()

    def _set_hp_space(self):
        space = []
        # go through all parameters we have defined and convert them to Hyperopt format
        for param in self._param_space:
            if param['type'] == 'continuous':
                space.append((param['name'],
                              hp.uniform(param['name'], param['domain'][0],
                                         param['domain'][1])))
        # update instance attribute that is the space input for Hyperopt fmin
        self._hp_space = OrderedDict(space)

    def _set_hp_trials(self):
        self._trials = Trials()
        if self._params is not None and len(self._params) > 0:
            for tid, (param, loss) in enumerate(zip(self._params,
                                                    self._values)):
                idxs = {k: [tid] for k, v in param.items()}
                vals = {k: [v] for k, v in param.items()}
                hyperopt_trial = Trials().new_trial_docs(
                    tids=[tid],
                    specs=[None],
                    results=[{
                        'loss': loss,
                        'status': STATUS_OK
                    }],
                    miscs=[{
                        'tid': tid,
                        'cmd': ('domain_attachment', 'FMinIter_Domain'),
                        'idxs': idxs,
                        'vals': vals,
                        'workdir': None
                    }])
                hyperopt_trial[0]['state'] = JOB_STATE_DONE
                self._trials.insert_trial_docs(hyperopt_trial)
                self._trials.refresh()

    def _ask(self):
        # NOTE: we pass a dummy function as we just ask for the new (+1) set of parameters
        _ = fmin(fn=lambda x: 0,
                 space=self._hp_space,
                 algo=tpe.suggest,
                 max_evals=self.num_generated,
                 trials=self._trials,
                 show_progressbar=self.show_progressbar)

        # make sure the number of parameters asked matches the number of Hyperopt iterations/trials
        assert len(self._trials.trials) == self.num_generated
        # get params from last dict in trials.trials
        proposed_params = self._trials.trials[-1]['misc']['vals']
        for key, value in proposed_params.items():
            proposed_params[key] = value[
                0]  # this is just to make value not a list

        return ParameterVector(dict=proposed_params,
                               param_space=self.param_space)
    def search(self, run_name, store, context, hp_records, runs):
        
        def make_trial(tid, arg_dict, loss_value):
            trial = {"book_time": None, "exp_key": None, "owner": None, "refresh_time": None, "spec": None, "state": 0, "tid": tid, "version": 0}
            #trial["result"] = {"status": "New"}
            misc = {}
            trial["misc"] = misc

            misc["cmd"] = ("domain_attachment", "FMinIter_Domain")
            misc["idxs"] = {key: [tid] for key in arg_dict.keys()}
            misc["tid"] = tid
            misc["vals"] = arg_dict

            trial["state"] = 2   # done
            trial["result"] = {"loss": loss_value, "status": "ok"}
            #trial["refresh_time"] = coarse_utcnow()

            return trial

        dummy_loss = lambda x: None
        param_space = {r["name"]: r["space_func"] for r in hp_records} 
        domain = base.Domain(dummy_loss, param_space)

        rstate = np.random.RandomState()

        # convert runs to Trials
        trial_list = []

        for run in runs:
            # don't trip over inappropriate runs
            if (not "run_name" in run) or (not "hparams" in run) or (not context.primary_metric in run):
                continue

            run_name = run["run_name"]
            arg_dict = run["hparams"]

            loss_value = run[context.primary_metric]
            if context.maximize_metric:
                loss_value = -loss_value

            # extract a unique int from run_name   (parent.childnum)
            tid = run_helper.get_int_from_run_name(run_name)

            trial = make_trial(tid, arg_dict, loss_value)
            trial_list.append(trial)

        # finally, add our trial_list to trials
        trials = Trials()
        trials.insert_trial_docs(trial_list)
        trials.refresh()

        # get next suggested hyperparameter values from TPE algorithm
        tid = run_helper.get_int_from_run_name(run_name)

        min_trials = 3      # before this, just do rand sampling
        seed =  rstate.randint(2 ** 31 - 1)

        if len(trials) < min_trials:
            new_trials = rand.suggest([tid], domain, trials, seed)
        else:
            new_trials = tpe.suggest([tid], domain, trials, seed)

        # apply the suggested hparam values
        trial = new_trials[0]
        arg_dict = trial["misc"]["vals"]
        arg_dict = self.fixup_hyperopt_hparams(param_space, arg_dict)

        return arg_dict
Ejemplo n.º 10
0
class _HyperOpt(base.Optimizer):
    # pylint: disable=too-many-instance-attributes
    def __init__(
        self,
        parametrization: IntOrParameter,
        budget: tp.Optional[int] = None,
        num_workers: int = 1,
        *,
        prior_weight: float = 1.0,
        n_startup_jobs: int = 20,
        n_EI_candidates: int = 24,
        gamma: float = 0.25,
        verbose: bool = False,
    ) -> None:
        super().__init__(parametrization,
                         budget=budget,
                         num_workers=num_workers)
        try:
            # try to convert parametrization to hyperopt search space
            if not isinstance(self.parametrization, p.Instrumentation):
                raise NotImplementedError
            self.space = _get_search_space(self.parametrization.name,
                                           self.parametrization)
            self._transform = None
        except NotImplementedError:
            self._transform = transforms.ArctanBound(0, 1)
            self.space = {
                f"x_{i}": hp.uniform(f"x_{i}", 0, 1)
                for i in range(self.dimension)
            }

        self.trials = Trials()
        self.domain = Domain(fn=None,
                             expr=self.space,
                             pass_expr_memo_ctrl=False)
        self.tpe_args = {
            "prior_weight": prior_weight,
            "n_startup_jobs": n_startup_jobs,
            "n_EI_candidates": n_EI_candidates,
            "gamma": gamma,
            "verbose": verbose,
        }

    def _internal_ask_candidate(self) -> p.Parameter:
        # Inspired from FMinIter class (hyperopt)
        next_id = self.trials.new_trial_ids(1)
        new_trial = tpe.suggest(next_id, self.domain, self.trials,
                                self._rng.randint(2**31 - 1),
                                **self.tpe_args)[0]
        self.trials.insert_trial_doc(new_trial)
        self.trials.refresh()

        candidate = self.parametrization.spawn_child()

        if self._transform:
            data = np.array([
                new_trial["misc"]["vals"][f"x_{i}"][0]
                for i in range(self.dimension)
            ])
            candidate = candidate.set_standardized_data(
                self._transform.backward(data))

            # For consistency, we need to update hyperopt history
            # when standardized data is changed
            if any(data != self._transform.forward(
                    candidate.get_standardized_data(
                        reference=self.parametrization))):
                for it, val in enumerate(
                        self._transform.forward(
                            candidate.get_standardized_data(
                                reference=self.parametrization))):
                    self.trials._dynamic_trials[
                        next_id[0]]["misc"]["vals"][f"x_{it}"][0] = val
        else:
            spec = hyperopt.base.spec_from_misc(new_trial["misc"])
            config = hyperopt.space_eval(self.space, spec)
            candidate.value = _hp_dict_to_parametrization(config)

        candidate._meta["trial_id"] = new_trial["tid"]
        return candidate

    def _internal_tell_candidate(self, candidate: p.Parameter,
                                 loss: float) -> None:
        result = {"loss": loss, "status": "ok"}
        assert "trial_id" in candidate._meta
        tid = candidate._meta["trial_id"]
        assert self.trials._dynamic_trials[tid][
            "state"] == hyperopt.JOB_STATE_NEW

        now = hyperopt.utils.coarse_utcnow()
        self.trials._dynamic_trials[tid]["book_time"] = now
        self.trials._dynamic_trials[tid]["refresh_time"] = now
        self.trials._dynamic_trials[tid]["state"] = hyperopt.JOB_STATE_DONE
        self.trials._dynamic_trials[tid]["result"] = result
        self.trials._dynamic_trials[tid][
            "refresh_time"] = hyperopt.utils.coarse_utcnow()
        self.trials.refresh()

    def _internal_tell_not_asked(self, candidate: p.Parameter,
                                 loss: float) -> None:
        next_id = self.trials.new_trial_ids(1)
        new_trial = hyperopt.rand.suggest(next_id, self.domain, self.trials,
                                          self._rng.randint(2**31 - 1))
        self.trials.insert_trial_docs(new_trial)
        self.trials.refresh()
        tid = next_id[0]

        if self._transform:
            data = candidate.get_standardized_data(
                reference=self.parametrization)
            data = self._transform.forward(data)
            self.trials._dynamic_trials[tid]["misc"]["vals"] = {
                f"x_{i}": [data[i]]
                for i in range(len(data))
            }
        else:
            null_config: dict = {
                k: []
                for k in self.trials._dynamic_trials[tid]["misc"]
                ["vals"].keys()
            }
            new_vals: dict = _hp_parametrization_to_dict(candidate,
                                                         default=null_config)
            self.trials._dynamic_trials[tid]["misc"]["vals"] = new_vals

        self.trials.refresh()
        candidate._meta["trial_id"] = tid
        self._internal_tell_candidate(candidate, loss)