Beispiel #1
0
def merge_trials(trials: Trials, new_trials_data: List[Dict]) -> Trials:
    """
    Merge a hyperopt trials object with the contents of another hyperopt trials object.

    :param trials: A hyperopt trials object containing trials data, organized into hierarchical dictionaries.
    :param trials_data: The contents of a hyperopt trials object, `Trials.trials`.
    :return: A hyperopt trials object, merged from the two inputs.
    """
    max_tid = 0
    if len(trials.trials) > 0:
        max_tid = max([trial['tid'] for trial in trials.trials])

    for trial in new_trials_data:
        tid = trial[
            'tid'] + max_tid + 1  #trial id needs to be unique among this list of ids.
        hyperopt_trial = Trials().new_trial_docs(tids=[None],
                                                 specs=[None],
                                                 results=[None],
                                                 miscs=[None])
        hyperopt_trial[0] = trial
        hyperopt_trial[0]['tid'] = tid
        hyperopt_trial[0]['misc']['tid'] = tid
        for key in hyperopt_trial[0]['misc']['idxs'].keys():
            hyperopt_trial[0]['misc']['idxs'][key] = [tid]
        trials.insert_trial_docs(hyperopt_trial)
        trials.refresh()
    return trials
Beispiel #2
0
def add_trials(points):
    test_trials = Trials()

    for tid, row in enumerate(points):
        vals = {}
        for key in sample(space).keys():
            vals[key] = [row['params'][key]]

        hyperopt_trial = Trials().new_trial_docs(
            tids=[tid],
            specs=[None],
            results=[row],
            miscs=[{
                'tid': tid,
                'cmd': ('domain_attachment', 'FMinIter_Domain'),
                'workdir': None,
                'idxs': {
                    **{key: [tid]
                       for key in sample(space).keys()}
                },
                'vals': vals
            }])

        hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE

        test_trials.insert_trial_docs(hyperopt_trial)
        test_trials.refresh()

    return test_trials
Beispiel #3
0
    def create_trials(losses, statuses, vals, scope_keys):
        trials = Trials()

        tids = trials.new_trial_ids(len(losses))
        specs = [None for x in range(len(tids))]
        results = []
        miscs = []
        for i in range(len(tids)):
            idxs_content = [[i] for key in scope_keys]
            idxs_vals_content = []
            for key in scope_keys:
                idxs_vals_content.append([vals[key][i]])

            results.append(dict(loss=losses[i], status=statuses[i]))
            miscs.append(
                dict(tid=tids[i],
                     cmd=None,
                     idxs=dict(zip(scope_keys, idxs_content)),
                     vals=dict(zip(scope_keys, idxs_vals_content))))

        trials.insert_trial_docs(
            trials.new_trial_docs(
                tids,
                specs,
                results,
                miscs,
            ))
        trials.refresh()
        return trials
Beispiel #4
0
    def _get_trials(self, problem: HyperparameterOptimisationProblem, n_resources: int) -> Trials:
        """Based on the method found on Github issues to inject trials into Hyperopt."""
        trials = Trials()
        if not self.eval_history:
            return trials

        hyperopt_selection = hyperopt.pyll.stochastic.sample(problem.get_hyperopt_space_from_hyperparams_to_opt())
        print(hyperopt_selection)

        df_dict = {
            'loss': [], **{hp_name: [] for hp_name in hyperopt_selection.keys()}, 'evaluator': [],
            'optimisation_goals': [], 'eval_time': []
        }
        for evaluator, (r, optimisation_goals, _) in self.evaluations_by_resources.items():
            if self._is_transferable(r, n_resources):
                sign = -1 if self.min_or_max == max else 1
                df_dict['loss'].append(sign * self.optimisation_func(optimisation_goals))
                for hp_name in hyperopt_selection.keys():
                    df_dict[hp_name].append(getattr(evaluator.arm, hp_name))
                df_dict['evaluator'].append(evaluator)
                df_dict['optimisation_goals'].append(optimisation_goals)
                df_dict['eval_time'] = time.time()
        df = pd.DataFrame(df_dict)

        test_trials = Trials()
        for tid, (index, row) in enumerate(df.iterrows()):
            hyperopt_trial = hyperopt.Trials().new_trial_docs(
                tids=[tid],
                specs=[None],
                results=[{'loss': row['loss'], 'status': hyperopt.STATUS_OK}],
                miscs=[{'tid': tid,
                        'cmd': ('domain_attachment', 'FMinIter_Domain'),
                        'idxs': {**{key: [tid] for key in hyperopt_selection.keys()}},
                        'vals': {**{key: [row[key]] for key in hyperopt_selection.keys()}},
                        'workdir': None
                        }]
            )
            hyperopt_trial[0]['state'] = hyperopt.JOB_STATE_DONE

            test_trials.insert_trial_docs(hyperopt_trial)
            test_trials.refresh()

        return test_trials
    def create_trials(self, complete, losses):
        if len(complete) > 0:
            trials = Trials()
            hist = self.create_history(complete)
            index = 0
            for c in complete:
                if c >= len(losses):
                    error(
                        "Index {} is larger than the size of losses {}".format(
                            c, len(losses)))
                loss = losses[c]
                rval_specs = [None]
                new_id = index
                rval_results = []
                rval_results.append(create_ok_result(loss, c))
                rval_miscs = []
                rval_miscs.append(self.create_misc(index, hist))

                hyperopt_trial = trials.new_trial_docs([new_id], rval_specs,
                                                       rval_results,
                                                       rval_miscs)[0]
                index += 1
                if self.response_shaping is True:
                    # transform log applied loss for enhancing optimization performance
                    #debug("before scaling: {}".format(loss))
                    if self.shaping_func == "log_err":
                        loss = apply_log_err(loss)
                    elif self.shaping_func == "hybrid_log":
                        loss = apply_hybrid_log(loss)
                    else:
                        debug("Invalid shaping function: {}".format(
                            self.shaping_func))
                hyperopt_trial['result'] = {
                    'loss': float(loss),
                    'status': STATUS_OK
                }
                hyperopt_trial['state'] = JOB_STATE_DONE
                #debug("History appended: {}-{}".format(c, loss))
                trials.insert_trial_doc(hyperopt_trial)
            trials.refresh()
            return trials
        else:
            return Trials()
def optimize(experiment, search_space, report):
    trials = Trials()

    if os.path.exists(report):
        print("Restoring trials from {}".format(report))
        for line in open(report):
            add_trial_from_json(trials, json.loads(line))
        trials.refresh()

    with open(report, 'a') as report_file:

        def run_experiment(params):
            res = experiment(params)
            report_file.write(json.dumps(res) + '\n')
            report_file.flush()
            return res

        return fmin(fn=run_experiment,
                    space=search_space,
                    algo=tpe.suggest,
                    trials=trials,
                    max_evals=100)
Beispiel #7
0
    def create_trials(self, completed, losses):
        if len(completed) > 0:
            trials = Trials()
            hist = self.create_history(completed)
            #index = 0
            #for c in completed:
            for index in range(len(completed)):
                c = completed[index]
                loss = losses[index]
                rval_specs = [None]
                new_id = index
                rval_results = [ ]
                rval_results.append(create_ok_result(loss, c))
                rval_miscs = [  ]
                rval_miscs.append(self.create_misc(index, hist))
                    
                hopt_trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0]

                if self.response_shaping is True:
                        # transform log applied loss for enhancing optimization performance
                        #debug("before scaling: {}".format(loss))
                    if self.shaping_func == "log_err":                        
                        loss = apply_log_err(loss)
                    elif self.shaping_func == "hybrid_log":
                        loss = apply_hybrid_log(loss)
                    else:
                        debug("Invalid shaping function: {}".format(self.shaping_func))
                if loss != None:                    
                    hopt_trial['result'] = {'loss': float(loss), 'status': STATUS_OK}
                    hopt_trial['state'] = JOB_STATE_DONE
                    #debug("History appended: {}-{}".format(c, loss))
                    trials.insert_trial_doc(hopt_trial)
            trials.refresh()
            return trials
        else:        
            return Trials()
Beispiel #8
0
def read_or_recreate_trials(
    hyperparameter_search_dir,
    tuning_dataset=None,
    test_dataset=None,
    tqdm=None,
    overwrite=False,
    do_print=True,
    trials_out_dict=None,
    results_out_dict=None,
    args_out_dict=None,
    params_out_dict=None,
):
    config = read_config(hyperparameter_search_dir)[0]
    hyperparameter_search_args = HyperparameterSearchArgs.from_json_file(
        os.path.join(hyperparameter_search_dir,
                     HYPERPARAMETER_SEARCH_ARGS_FILENAME))

    filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME)
    with open(filepath, mode='r') as f:
        raw_config = json.loads(f.read())

    rotations = set(
        x for x in os.listdir(hyperparameter_search_dir)).intersection(
            set(str(i) for i in range(10)))
    if do_print: print("Observe runs for rotations: %s" % ', '.join(rotations))

    if trials_out_dict is None: trials_out_dict = {}
    if results_out_dict is None: results_out_dict = {}
    if args_out_dict is None: args_out_dict = {}
    if params_out_dict is None: params_out_dict = {}

    if len(rotations) < 4 or tqdm is None: rotations_rng = rotations
    else: rotations_rng = tqdm(rotations, desc="Reading Rotations")
    for rotation in rotations_rng:
        for d in (results_out_dict, args_out_dict, params_out_dict):
            if rotation not in d: d[rotation] = {}

        rotation_results = results_out_dict[rotation]
        rotation_args = args_out_dict[rotation]
        rotation_params = params_out_dict[rotation]

        rotation_dir = os.path.join(hyperparameter_search_dir, rotation)

        run_names = [r for r in os.listdir(rotation_dir) if r != 'trials.pkl']
        run_names_rng = run_names if tqdm is None else tqdm(
            run_names, desc="Reading Runs")

        for run_name in run_names_rng:
            run_dir = os.path.join(rotation_dir, run_name)
            if not os.path.isdir(run_dir):
                print(f"Found file; expecting directory! {run_dir})")
                continue
            elif os.path.isfile(os.path.join(run_dir, 'error.pkl')):
                continue

            # Lastly, if we don't need results, continue
            if run_name in rotation_results and rotation_results[
                    run_name] is not None:
                continue

            args_filepath = os.path.join(run_dir, ARGS_FILENAME)
            if not os.path.isfile(args_filepath): continue
            args = Args.from_json_file(args_filepath)

            num_epochs = args.epochs
            completed_training = os.path.isfile(
                os.path.join(run_dir, 'model.epoch-%d' % (num_epochs - 1)))
            if not completed_training:
                print(
                    f"Run {run_dir} still training (or errored and didn't report)"
                )
                continue

            tuning_result_filepath = os.path.join(run_dir, 'tuning_perf.pkl')
            if os.path.isfile(tuning_result_filepath):
                with open(tuning_result_filepath, mode='rb') as f:
                    tuning = pickle.load(f)
            elif os.path.isfile(
                    os.path.join(run_dir, 'tuning_perf_metrics.pkl')):
                with open(os.path.join(run_dir, 'tuning_perf_metrics.pkl'),
                          mode='rb') as f:
                    tuning = pickle.load(f)
            else:
                print(f'Missing tuning for {run_dir}')
                if tuning_dataset is not None:
                    # set the necessary features in tuning dataset
                    if args.do_masked_imputation:
                        tuning_dataset.imputation_mask_rate = args.imputation_mask_rate
                    _, _, tuning = evaluator.evaluate_multi(
                        tuning_dataset,
                        model_rundir=run_dir,
                        num_random_endpoints=10,
                        batch_size=1024,
                        num_workers=27,
                        evaluate_on_25=True,
                        get_all_reprs=False,
                        tqdm=tqdm)
                    with open(tuning_result_filepath, mode='wb') as f:
                        pickle.dump(tuning, f)
                else:
                    print("Wasn't given a tuning dataset!")
                    continue

            test_result_filepath = os.path.join(run_dir, 'test_perf.pkl')
            if os.path.isfile(test_result_filepath):
                with open(test_result_filepath, mode='rb') as f:
                    test = pickle.load(f)
            elif os.path.isfile(os.path.join(run_dir,
                                             'test_perf_metrics.pkl')):
                with open(os.path.join(run_dir, 'test_perf_metrics.pkl'),
                          mode='rb') as f:
                    test = pickle.load(f)
            else:
                print(f"Have tuning but missing test for {run_dir}/{run_name}")
                test = None
                if test_dataset is not None:
                    if args.do_masked_imputation:
                        test_dataset.imputation_mask_rate = args.imputation_mask_rate
                    _, _, test = evaluator.evaluate_multi(
                        test_dataset,
                        model_rundir=run_dir,
                        num_random_endpoints=10,
                        batch_size=1024,
                        num_workers=27,
                        evaluate_on_25=True,
                        get_all_reprs=False,
                        tqdm=tqdm)
                    with open(test_result_filepath, mode='wb') as f:
                        pickle.dump(test, f)
                else:
                    "Wasn't given a test dataset!"

            rotation_results[run_name] = (tuning, test)
            if run_name not in rotation_args or rotation_args[run_name] is None:
                rotation_args[run_name] = args

            if run_name not in rotation_params or rotation_params[
                    run_name] is None:
                params_filepath = os.path.join(run_dir, PARAMS_FILENAME)
                if os.path.isfile(params_filepath):
                    with open(params_filepath, mode='rb') as f:
                        rotation_params[run_name] = pickle.load(f)
                else:
                    rotation_params[run_name] = args_to_params(
                        rotation_args[run_name], raw_config)

        if rotation in trials_out_dict and trials_out_dict[
                rotation] is not None:
            continue

        trials_filepath = os.path.join(rotation_dir, 'trials.pkl')
        if os.path.exists(trials_filepath) and not overwrite:
            with open(trials_filepath, mode='rb') as f:
                trials_out_dict[rotation] = pickle.load(f)
            continue

        # Rebuild Trials
        # TODO(mmd): Something wrong in misc.idxs...
        trials = Trials(exp_key='exp')  #hyperparameter_search_dir
        for run_name in rotation_results:
            args = rotation_args[run_name]
            params = rotation_params[run_name]
            perf_metrics, test_perf_metrics = rotation_results[run_name]
            try:
                loss = ObjectiveFntr.perf_metrics_to_trial_result(
                    perf_metrics,
                    args,
                    single_task=hyperparameter_search_args.single_task_search)
            except Exception as e:
                print(
                    f"Errored computing tuning results for {hyperparameter_search_dir} on rotation "
                    f"{rotation}, {run_name}: {e}")
                traceback.print_exc()
                continue

            try:
                if test_perf_metrics is not None:
                    test_loss = ObjectiveFntr.perf_metrics_to_trial_result(
                        test_perf_metrics,
                        args,
                        single_task=hyperparameter_search_args.
                        single_task_search)
                else:
                    test_loss = np.NaN
            except TypeError as e:
                test_loss = np.NaN
            except Exception as e:
                if "'NoneType' object is not subscriptable" in str(e):
                    test_loss = np.NaN
                else:
                    print(
                        f"Errored computing test results for {hyperparameter_search_dir} on rotation {rotation}, "
                        f"{run_name}: {e}")
                    traceback.print_exc()
                    continue

            loss_variance, test_loss_variance = np.NaN, np.NaN
            result = {
                'status': STATUS_OK,
                'loss': loss,
                'loss_variance': loss_variance,
                'test_loss': test_loss,
                'test_loss_variance': test_loss_variance,
            }
            spec = params

            trials.insert_trial_doc({
                'tid': run_name,
                'spec': spec,
                'result': result,
                'misc': {
                    'tid': run_name,
                    'cmd': '',
                    'idxs': [],
                    'vals': {k: [v]
                             for k, v in spec.items()},
                },
                'state': '',
                'owner': '',
                'book_time': 0,
                'refresh_time': 0,
                'exp_key': 'exp',  # hyperparameter_search_dir,
            })
        trials.refresh()
        trials_out_dict[rotation] = trials

    return config, results_out_dict, args_out_dict, params_out_dict, trials_out_dict
Beispiel #9
0
        },
        'vals': {
            'executor_memory': [7],
            'offheap_size': [1],
            'shuffle_partitions': [16],
            'split_size': [15]
        }
    },
    'exp_key': None,
    'owner': None,
    'version': 0,
    'book_time': None,
    'refresh_time': None
}]
trials.insert_trial_docs(new_trials)
trials.refresh()

tpe_suggest = partial(tpe.suggest, n_startup_jobs=num_random_startup_jobs)
best = fmin(objective_function,
            space=space,
            algo=tpe_suggest,
            max_evals=num_evals,
            trials=trials)
trials.best_trial['result']['loss']


# define an objective function
def objective(args):
    case, val = args
    if case == 'case 1':
        return val
Beispiel #10
0
         max_evals=1,
         return_argmin=False,
         show_progressbar=False)

    trial = cache_trials.trials[-1]

    return trial, trial_parameters


for i in range(5):
    t, tp = get_next_params(global_trials)
    mse = run_model(tp)
    t['result']['loss'] = mse
    t['refresh_time'] = datetime.datetime.now()
    global_trials.insert_trial_doc(t)
    global_trials.refresh()

print(global_trials.best_trial)
"""
trials = Trials()
print("First recommendation")
fmin(param_extractor, space, algo=tpe.suggest, trials=trials, max_evals=1, return_argmin=False)

new_trial = trials.trials[-1]

new_trial['result']['loss'] = 0.4

print(new_trial)

global_trials.insert_trial_doc(new_trial)
global_trials.refresh()
Beispiel #11
0
    def _fit_on_prepared_data(self, metadata):
        trials = Trials()
        domain = Domain(None, self.hyperopt_search_space)
        rand = np.random.RandomState(constants.RANDOM_SEED)

        all_estimators = []
        all_estimator_results = {}
        for i in range(0, self.num_params, self.parallelism):
            n = min(self.num_params - i, self.parallelism)

            # Using HyperOpt TPE to generate parameters
            hyperopt_params = []
            for j in range(i, i + n):
                new_param = tpe.suggest([j], domain, trials, rand.randint(0, 2 ** 31 - 1))
                new_param[0]['status'] = STATUS_RUNNING

                trials.insert_trial_docs(new_param)
                trials.refresh()
                hyperopt_params.append(new_param[0])

            # Generating Cerebro params from HyperOpt params
            estimator_param_maps = []
            for hyperopt_param in hyperopt_params:
                param = {}
                for k in hyperopt_param['misc']['vals']:
                    val = hyperopt_param['misc']['vals'][k][0].item()
                    if isinstance(self.search_space[k], _HPChoice):
                        # if the hyperparamer is a choice the index is returned
                        val = self.search_space[k].options[val]
                    param[k] = val
                estimator_param_maps.append(param)

            # Generating Cerebro estimators
            estimators = [self._estimator_gen_fn_wrapper(param) for param in estimator_param_maps]
            estimator_results = {model.getRunId(): {} for model in estimators}
            # log hyperparameters to TensorBoard
            self._log_hp_to_tensorboard(estimators, estimator_param_maps)

            # Trains the models up to the number of epochs specified. For each iteration also performs validation
            for epoch in range(self.num_epochs):
                epoch_results = self.backend.train_for_one_epoch(estimators, self.store, self.feature_cols,
                                                                 self.label_cols)
                update_model_results(estimator_results, epoch_results)

                epoch_results = self.backend.train_for_one_epoch(estimators, self.store, self.feature_cols,
                                                                 self.label_cols, is_train=False)
                update_model_results(estimator_results, epoch_results)

                self._log_epoch_metrics_to_tensorboard(estimators, estimator_results)

            all_estimators.extend(estimators)
            all_estimator_results.update(estimator_results)

            # HyperOpt TPE update
            for i, hyperopt_param in enumerate(hyperopt_params):
                hyperopt_param['status'] = STATUS_OK
                hyperopt_param['result'] = {'loss': estimator_results[estimators[i].getRunId()][
                    'val_' + self.evaluation_metric][-1], 'status': STATUS_OK}
            trials.refresh()

        # find the best model and crate ModelSearchModel
        models = [est.create_model(all_estimator_results[est.getRunId()], est.getRunId(), metadata) for est in
                  all_estimators]
        val_metrics = [all_estimator_results[est.getRunId()]['val_' + self.evaluation_metric][-1] for est in
                       all_estimators]
        best_model = models[np.argmin(val_metrics)]

        return ModelSelectionResult(best_model, estimator_results, models, [x+"__output" for x in self.label_cols])
class Hyperopt(AbstractPlanner):
    def __init__(self, goal='minimize', show_progressbar=False):
        """
        Tree of Parzen Estimators (TPE) as implemented in HyperOpt.

        Args:
            goal (str): The optimization goal, either 'minimize' or 'maximize'. Default is 'minimize'.
            show_progressbar (bool): If True, show a progressbar.
        """
        AbstractPlanner.__init__(**locals())
        self._trials = Trials(
        )  # these is a Hyperopt object that stores the search history
        self._hp_space = None  # these are the params in the Hyperopt format

    def _set_param_space(self, param_space):
        self._param_space = []
        for param in param_space:
            if param.type == 'continuous':
                param_dict = {
                    'name': param.name,
                    'type': param.type,
                    'domain': (param.low, param.high)
                }
            self._param_space.append(param_dict)
        # update hyperopt space accordingly
        self._set_hp_space()

    def _tell(self, observations):
        self._params = observations.get_params(as_array=False)
        self._values = observations.get_values(as_array=True,
                                               opposite=self.flip_measurements)
        # update hyperopt Trials accordingly
        self._set_hp_trials()

    def _set_hp_space(self):
        space = []
        # go through all parameters we have defined and convert them to Hyperopt format
        for param in self._param_space:
            if param['type'] == 'continuous':
                space.append((param['name'],
                              hp.uniform(param['name'], param['domain'][0],
                                         param['domain'][1])))
        # update instance attribute that is the space input for Hyperopt fmin
        self._hp_space = OrderedDict(space)

    def _set_hp_trials(self):
        self._trials = Trials()
        if self._params is not None and len(self._params) > 0:
            for tid, (param, loss) in enumerate(zip(self._params,
                                                    self._values)):
                idxs = {k: [tid] for k, v in param.items()}
                vals = {k: [v] for k, v in param.items()}
                hyperopt_trial = Trials().new_trial_docs(
                    tids=[tid],
                    specs=[None],
                    results=[{
                        'loss': loss,
                        'status': STATUS_OK
                    }],
                    miscs=[{
                        'tid': tid,
                        'cmd': ('domain_attachment', 'FMinIter_Domain'),
                        'idxs': idxs,
                        'vals': vals,
                        'workdir': None
                    }])
                hyperopt_trial[0]['state'] = JOB_STATE_DONE
                self._trials.insert_trial_docs(hyperopt_trial)
                self._trials.refresh()

    def _ask(self):
        # NOTE: we pass a dummy function as we just ask for the new (+1) set of parameters
        _ = fmin(fn=lambda x: 0,
                 space=self._hp_space,
                 algo=tpe.suggest,
                 max_evals=self.num_generated,
                 trials=self._trials,
                 show_progressbar=self.show_progressbar)

        # make sure the number of parameters asked matches the number of Hyperopt iterations/trials
        assert len(self._trials.trials) == self.num_generated
        # get params from last dict in trials.trials
        proposed_params = self._trials.trials[-1]['misc']['vals']
        for key, value in proposed_params.items():
            proposed_params[key] = value[
                0]  # this is just to make value not a list

        return ParameterVector(dict=proposed_params,
                               param_space=self.param_space)
    def search(self, run_name, store, context, hp_records, runs):
        
        def make_trial(tid, arg_dict, loss_value):
            trial = {"book_time": None, "exp_key": None, "owner": None, "refresh_time": None, "spec": None, "state": 0, "tid": tid, "version": 0}
            #trial["result"] = {"status": "New"}
            misc = {}
            trial["misc"] = misc

            misc["cmd"] = ("domain_attachment", "FMinIter_Domain")
            misc["idxs"] = {key: [tid] for key in arg_dict.keys()}
            misc["tid"] = tid
            misc["vals"] = arg_dict

            trial["state"] = 2   # done
            trial["result"] = {"loss": loss_value, "status": "ok"}
            #trial["refresh_time"] = coarse_utcnow()

            return trial

        dummy_loss = lambda x: None
        param_space = {r["name"]: r["space_func"] for r in hp_records} 
        domain = base.Domain(dummy_loss, param_space)

        rstate = np.random.RandomState()

        # convert runs to Trials
        trial_list = []

        for run in runs:
            # don't trip over inappropriate runs
            if (not "run_name" in run) or (not "hparams" in run) or (not context.primary_metric in run):
                continue

            run_name = run["run_name"]
            arg_dict = run["hparams"]

            loss_value = run[context.primary_metric]
            if context.maximize_metric:
                loss_value = -loss_value

            # extract a unique int from run_name   (parent.childnum)
            tid = run_helper.get_int_from_run_name(run_name)

            trial = make_trial(tid, arg_dict, loss_value)
            trial_list.append(trial)

        # finally, add our trial_list to trials
        trials = Trials()
        trials.insert_trial_docs(trial_list)
        trials.refresh()

        # get next suggested hyperparameter values from TPE algorithm
        tid = run_helper.get_int_from_run_name(run_name)

        min_trials = 3      # before this, just do rand sampling
        seed =  rstate.randint(2 ** 31 - 1)

        if len(trials) < min_trials:
            new_trials = rand.suggest([tid], domain, trials, seed)
        else:
            new_trials = tpe.suggest([tid], domain, trials, seed)

        # apply the suggested hparam values
        trial = new_trials[0]
        arg_dict = trial["misc"]["vals"]
        arg_dict = self.fixup_hyperopt_hparams(param_space, arg_dict)

        return arg_dict
Beispiel #14
0
class _HyperOpt(base.Optimizer):
    # pylint: disable=too-many-instance-attributes
    def __init__(
        self,
        parametrization: IntOrParameter,
        budget: tp.Optional[int] = None,
        num_workers: int = 1,
        *,
        prior_weight: float = 1.0,
        n_startup_jobs: int = 20,
        n_EI_candidates: int = 24,
        gamma: float = 0.25,
        verbose: bool = False,
    ) -> None:
        super().__init__(parametrization,
                         budget=budget,
                         num_workers=num_workers)
        try:
            # try to convert parametrization to hyperopt search space
            if not isinstance(self.parametrization, p.Instrumentation):
                raise NotImplementedError
            self.space = _get_search_space(self.parametrization.name,
                                           self.parametrization)
            self._transform = None
        except NotImplementedError:
            self._transform = transforms.ArctanBound(0, 1)
            self.space = {
                f"x_{i}": hp.uniform(f"x_{i}", 0, 1)
                for i in range(self.dimension)
            }

        self.trials = Trials()
        self.domain = Domain(fn=None,
                             expr=self.space,
                             pass_expr_memo_ctrl=False)
        self.tpe_args = {
            "prior_weight": prior_weight,
            "n_startup_jobs": n_startup_jobs,
            "n_EI_candidates": n_EI_candidates,
            "gamma": gamma,
            "verbose": verbose,
        }

    def _internal_ask_candidate(self) -> p.Parameter:
        # Inspired from FMinIter class (hyperopt)
        next_id = self.trials.new_trial_ids(1)
        new_trial = tpe.suggest(next_id, self.domain, self.trials,
                                self._rng.randint(2**31 - 1),
                                **self.tpe_args)[0]
        self.trials.insert_trial_doc(new_trial)
        self.trials.refresh()

        candidate = self.parametrization.spawn_child()

        if self._transform:
            data = np.array([
                new_trial["misc"]["vals"][f"x_{i}"][0]
                for i in range(self.dimension)
            ])
            candidate = candidate.set_standardized_data(
                self._transform.backward(data))

            # For consistency, we need to update hyperopt history
            # when standardized data is changed
            if any(data != self._transform.forward(
                    candidate.get_standardized_data(
                        reference=self.parametrization))):
                for it, val in enumerate(
                        self._transform.forward(
                            candidate.get_standardized_data(
                                reference=self.parametrization))):
                    self.trials._dynamic_trials[
                        next_id[0]]["misc"]["vals"][f"x_{it}"][0] = val
        else:
            spec = hyperopt.base.spec_from_misc(new_trial["misc"])
            config = hyperopt.space_eval(self.space, spec)
            candidate.value = _hp_dict_to_parametrization(config)

        candidate._meta["trial_id"] = new_trial["tid"]
        return candidate

    def _internal_tell_candidate(self, candidate: p.Parameter,
                                 loss: float) -> None:
        result = {"loss": loss, "status": "ok"}
        assert "trial_id" in candidate._meta
        tid = candidate._meta["trial_id"]
        assert self.trials._dynamic_trials[tid][
            "state"] == hyperopt.JOB_STATE_NEW

        now = hyperopt.utils.coarse_utcnow()
        self.trials._dynamic_trials[tid]["book_time"] = now
        self.trials._dynamic_trials[tid]["refresh_time"] = now
        self.trials._dynamic_trials[tid]["state"] = hyperopt.JOB_STATE_DONE
        self.trials._dynamic_trials[tid]["result"] = result
        self.trials._dynamic_trials[tid][
            "refresh_time"] = hyperopt.utils.coarse_utcnow()
        self.trials.refresh()

    def _internal_tell_not_asked(self, candidate: p.Parameter,
                                 loss: float) -> None:
        next_id = self.trials.new_trial_ids(1)
        new_trial = hyperopt.rand.suggest(next_id, self.domain, self.trials,
                                          self._rng.randint(2**31 - 1))
        self.trials.insert_trial_docs(new_trial)
        self.trials.refresh()
        tid = next_id[0]

        if self._transform:
            data = candidate.get_standardized_data(
                reference=self.parametrization)
            data = self._transform.forward(data)
            self.trials._dynamic_trials[tid]["misc"]["vals"] = {
                f"x_{i}": [data[i]]
                for i in range(len(data))
            }
        else:
            null_config: dict = {
                k: []
                for k in self.trials._dynamic_trials[tid]["misc"]
                ["vals"].keys()
            }
            new_vals: dict = _hp_parametrization_to_dict(candidate,
                                                         default=null_config)
            self.trials._dynamic_trials[tid]["misc"]["vals"] = new_vals

        self.trials.refresh()
        candidate._meta["trial_id"] = tid
        self._internal_tell_candidate(candidate, loss)
Beispiel #15
0
def read_or_recreate_trials(hyperparameter_search_dir, tqdm=None, overwrite=False):
    config = read_config(hyperparameter_search_dir)[0]

    filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME)
    with open(filepath, mode='r') as f: raw_config = json.load(f)

    all_params, all_results, all_configs = {}, {}, {}

    run_names = [r for r in os.listdir(hyperparameter_search_dir) if r != 'trials.pkl']
    run_names_rng = run_names if tqdm is None else tqdm(run_names)

    for run_name in run_names:
        run_dir = os.path.join(hyperparameter_search_dir, run_name)
        if not os.path.isdir(run_dir):
            print(run_dir)
            continue

        if os.path.isfile(os.path.join(run_dir, 'error.pkl')): continue

        config_filepath = os.path.join(run_dir, CONFIG_FILENAME)
        if not os.path.isfile(config_filepath): continue
        with open(config_filepath, mode='r') as f: config = json.load(f)
        all_configs[run_name] = config

        params_filepath = os.path.join(run_dir, PARAMS_FILENAME)
        if os.path.isfile(params_filepath):
            with open(params_filepath, mode='rb') as f: constant, variable = pickle.load(f)
            all_params[run_name] = constant
            all_params[run_name].update(variable)
        else:
            raise NotImplementedError

        num_epochs = config['trainer']['epochs']
        completed_training = trained_until(run_dir, run_name, num_epochs)
        if not completed_training:
            print("Run %s still training (or errored and didn't report)" % run_name)
            print(run_name, num_epochs)
            print(os.listdir(run_dir))
            continue

        tuning_results_filename = os.path.join(run_dir, 'ir_metrics_%d.json' % num_epochs)
        assert os.path.isfile(tuning_results_filename), "Missing tuning results for %s" % run_dir

        with open(tuning_results_filename, mode='r') as f: all_results[run_name] = json.load(f)

    trials_filepath = os.path.join(hyperparameter_search_dir, 'trials.pkl')
    if os.path.exists(trials_filepath) and not overwrite:
        print("Reloading trials!")
        with open(trials_filepath, mode='rb') as f: trials = pickle.load(f)
        return config, all_results, all_configs, all_params, trials

    # Rebuild Trials
    # TODO(mmd): Something wrong in misc.idxs...
    trials = Trials(exp_key = 'exp') #hyperparameter_search_dir
    for run_name in all_results:
        configs = all_configs[run_name]
        params = all_params[run_name]

        loss = all_results[run_name]['Val   (Pert):']['median_rank']
        loss_variance, test_loss, test_loss_variance = np.NaN, np.NaN, np.NaN

        result = {
            'status': STATUS_OK,
            'loss': loss,
            'loss_variance': loss_variance,
            'test_loss': test_loss,
            'test_loss_variance': test_loss_variance,
        }
        spec = params

        a = trials.insert_trial_doc({
            'tid': run_name,
            'spec': spec,
            'result': result,
            'misc': {
                'tid': run_name,
                'cmd': '',
                'idxs': [],
                'vals': {k: [v] for k, v in spec.items()},
            },
            'state': JOB_STATE_DONE,
            'owner': '',
            'book_time': 0,
            'refresh_time': 0,
            'exp_key': 'exp',# hyperparameter_search_dir,
        })

    trials.refresh()

    return config, all_results, all_configs, all_params, trials
Beispiel #16
0
    def suggest(self, history, searchspace):
        """
        Suggest params to maximize an objective function based on the
        function evaluation history using a tree of Parzen estimators (TPE),
        as implemented in the hyperopt package.

        Use of this function requires that hyperopt be installed.
        """
        # This function is very odd, because as far as I can tell there's
        # no real documented API for any of the internals of hyperopt. Its
        # execution model is that hyperopt calls your objective function
        # (instead of merely providing you with suggested points, and then
        # you calling the function yourself), and its very tricky (for me)
        # to use the internal hyperopt data structures to get these predictions
        # out directly.

        # so they path we take in this function is to construct a synthetic
        # hyperopt.Trials database which from the `history`, and then call
        # hyoperopt.fmin with a dummy objective function that logs the value
        # used, and then return that value to our client.

        # The form of the hyperopt.Trials database isn't really documented in
        # the code -- most of this comes from reverse engineering it, by
        # running fmin() on a simple function and then inspecting the form of
        # the resulting trials object.
        if 'hyperopt' not in sys.modules:
            raise ImportError('No module named hyperopt')

        random = check_random_state(self.seed)
        hp_searchspace = searchspace.to_hyperopt()

        trials = Trials()
        for i, (params, scores, status) in enumerate(history):
            if status == 'SUCCEEDED':
                # we're doing maximization, hyperopt.fmin() does minimization,
                # so we need to swap the sign
                result = {'loss': -np.mean(scores), 'status': STATUS_OK}
            elif status == 'PENDING':
                result = {'status': STATUS_RUNNING}
            elif status == 'FAILED':
                result = {'status': STATUS_FAIL}
            else:
                raise RuntimeError('unrecognized status: %s' % status)

            # the vals key in the trials dict is basically just the params
            # dict, but enum variables (hyperopt hp.choice() nodes) are
            # different, because the index of the parameter is specified
            # in vals, not the parameter itself.

            vals = {}
            for var in searchspace:
                if isinstance(var, EnumVariable):
                    # get the index in the choices of the parameter, and use
                    # that.
                    matches = [
                        i for i, c in enumerate(var.choices)
                        if c == params[var.name]
                    ]
                    assert len(matches) == 1
                    vals[var.name] = matches
                else:
                    # the other big difference is that all of the param values
                    # are wrapped in length-1 lists.
                    vals[var.name] = [params[var.name]]

            trials.insert_trial_doc({
                'misc': {
                    'cmd': ('domain_attachment', 'FMinIter_Domain'),
                    'idxs': dict((k, [i]) for k in hp_searchspace.keys()),
                    'tid': i,
                    'vals': vals,
                    'workdir': None
                },
                'result': result,
                'tid': i,
                # bunch of fixed fields that hyperopt seems to require
                'owner': None,
                'spec': None,
                'state': 2,
                'book_time': None,
                'exp_key': None,
                'refresh_time': None,
                'version': 0
            })

        trials.refresh()
        chosen_params_container = []

        def suggest(*args, **kwargs):
            return tpe.suggest(*args,
                               **kwargs,
                               gamma=self.gamma,
                               n_startup_jobs=self.seeds)

        def mock_fn(x):
            # http://stackoverflow.com/a/3190783/1079728
            # to get around no nonlocal keywork in python2
            chosen_params_container.append(x)
            return 0

        fmin(fn=mock_fn,
             algo=tpe.suggest,
             space=hp_searchspace,
             trials=trials,
             max_evals=len(trials.trials) + 1,
             **self._hyperopt_fmin_random_kwarg(random))
        chosen_params = chosen_params_container[0]

        return chosen_params
Beispiel #17
0
    def suggest(self, history, searchspace):
        """
        Suggest params to maximize an objective function based on the
        function evaluation history using a tree of Parzen estimators (TPE),
        as implemented in the hyperopt package.

        Use of this function requires that hyperopt be installed.
        """
        # This function is very odd, because as far as I can tell there's
        # no real documented API for any of the internals of hyperopt. Its
        # execution model is that hyperopt calls your objective function
        # (instead of merely providing you with suggested points, and then
        # you calling the function yourself), and its very tricky (for me)
        # to use the internal hyperopt data structures to get these predictions
        # out directly.

        # so they path we take in this function is to construct a synthetic
        # hyperopt.Trials database which from the `history`, and then call
        # hyoperopt.fmin with a dummy objective function that logs the value
        # used, and then return that value to our client.

        # The form of the hyperopt.Trials database isn't really documented in
        # the code -- most of this comes from reverse engineering it, by
        # running fmin() on a simple function and then inspecting the form of
        # the resulting trials object.
        if 'hyperopt' not in sys.modules:
            raise ImportError('No module named hyperopt')

        random = check_random_state(self.seed)
        hp_searchspace = searchspace.to_hyperopt()

        trials = Trials()
        for i, (params, scores, status) in enumerate(history):
            if status == 'SUCCEEDED':
                # we're doing maximization, hyperopt.fmin() does minimization,
                # so we need to swap the sign
                result = {'loss': -np.mean(scores), 'status': STATUS_OK}
            elif status == 'PENDING':
                result = {'status': STATUS_RUNNING}
            elif status == 'FAILED':
                result = {'status': STATUS_FAIL}
            else:
                raise RuntimeError('unrecognized status: %s' % status)

            # the vals key in the trials dict is basically just the params
            # dict, but enum variables (hyperopt hp.choice() nodes) are
            # different, because the index of the parameter is specified
            # in vals, not the parameter itself.

            vals = {}
            for var in searchspace:
                if isinstance(var, EnumVariable):
                    # get the index in the choices of the parameter, and use
                    # that.
                    matches = [i for i, c in enumerate(var.choices)
                               if c == params[var.name]]
                    assert len(matches) == 1
                    vals[var.name] = matches
                else:
                    # the other big difference is that all of the param values
                    # are wrapped in length-1 lists.
                    vals[var.name] = [params[var.name]]

            trials.insert_trial_doc({
                'misc': {
                    'cmd': ('domain_attachment', 'FMinIter_Domain'),
                    'idxs': dict((k, [i]) for k in hp_searchspace.keys()),
                    'tid': i,
                    'vals': vals,
                    'workdir': None},
                'result': result,
                'tid': i,
                # bunch of fixed fields that hyperopt seems to require
                'owner': None, 'spec': None, 'state': 2, 'book_time': None,
                'exp_key': None, 'refresh_time': None, 'version': 0
                })

        trials.refresh()
        chosen_params_container = []

        def mock_fn(x):
            # http://stackoverflow.com/a/3190783/1079728
            # to get around no nonlocal keywork in python2
            chosen_params_container.append(x)
            return 0

        fmin(fn=mock_fn, algo=tpe.suggest, space=hp_searchspace, trials=trials,
             max_evals=len(trials.trials)+1,
             **self._hyperopt_fmin_random_kwarg(random))
        chosen_params = chosen_params_container[0]

        return chosen_params
def read_or_recreate_trials(hyperparameter_search_dir,
                            tuning_dataset=None,
                            test_dataset=None,
                            tqdm=None,
                            overwrite=False):
    config = read_config(hyperparameter_search_dir)[0]

    filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME)
    with open(filepath, mode='r') as f:
        raw_config = json.loads(f.read())

    rotations = set(
        x for x in os.listdir(hyperparameter_search_dir)).intersection(
            set(str(i) for i in range(10)))
    print("Observe runs for rotations: %s" % ', '.join(rotations))

    all_trials = {}
    all_results = {}
    all_args = {}
    all_params = {}

    rotations_rng = rotations if len(rotations) < 4 or tqdm is None else tqdm(
        rotations)
    for rotation in rotations_rng:
        rotation_results = {}
        rotation_args = {}
        rotation_params = {}

        rotation_dir = os.path.join(hyperparameter_search_dir, rotation)

        run_names = [r for r in os.listdir(rotation_dir) if r != 'trials.pkl']
        run_names_rng = run_names if tqdm is None else tqdm(run_names)

        for run_name in run_names:
            run_dir = os.path.join(rotation_dir, run_name)
            if not os.path.isdir(run_dir):
                print(run_dir)
                continue

            if os.path.isfile(os.path.join(run_dir, 'error.pkl')): continue

            args_filepath = os.path.join(run_dir, ARGS_FILENAME)
            if not os.path.isfile(args_filepath): continue
            args = Args.from_json_file(args_filepath)
            rotation_args[run_name] = args

            params_filepath = os.path.join(run_dir, PARAMS_FILENAME)
            if os.path.isfile(params_filepath):
                with open(params_filepath, mode='rb') as f:
                    rotation_params[run_name] = pickle.load(f)
            else:
                rotation_params[run_name] = args_to_params(
                    rotation_args[run_name], raw_config)

            num_epochs = args.epochs
            completed_training = os.path.isfile(
                os.path.join(run_dir, 'model.epoch-%d' % (num_epochs - 1)))
            if not completed_training:
                print("Run %s Still training (or errored and didn't report)" %
                      run_name)
                continue

            tuning_result_filepath = os.path.join(run_dir,
                                                  'tuning_perf_metrics.pkl')
            if os.path.isfile(tuning_result_filepath):
                with open(tuning_result_filepath, mode='rb') as f:
                    tuning = pickle.load(f)
            else:
                print('Missing tuning for %s' % run_name)
                if tuning_dataset is not None:
                    _, _, tuning = evaluate_multi(tuning_dataset,
                                                  model_rundir=run_dir,
                                                  num_random_endpoints=10,
                                                  batch_size=1024,
                                                  num_workers=27,
                                                  evaluate_on_25=True,
                                                  get_all_reprs=False,
                                                  tqdm=tqdm)
                    with open(tuning_result_filepath, mode='wb') as f:
                        pickle.dump(tuning, f)
                else:
                    print("Wasn't given a tuning dataset!")
                    continue

            test_result_filepath = os.path.join(run_dir,
                                                'test_perf_metrics.pkl')
            if os.path.isfile(test_result_filepath):
                with open(test_result_filepath, mode='rb') as f:
                    test = pickle.load(f)
            else:
                print('Have tuning but missing test for %s' % run_name)
                if test_dataset is not None:
                    _, _, test = evaluate_multi(test_dataset,
                                                model_rundir=run_dir,
                                                num_random_endpoints=10,
                                                batch_size=1024,
                                                num_workers=27,
                                                evaluate_on_25=True,
                                                get_all_reprs=False,
                                                tqdm=tqdm)
                    with open(test_result_filepath, mode='wb') as f:
                        pickle.dump(test, f)
                else:
                    "Wasn't given a test dataset!"

            rotation_results[run_name] = (tuning, test)

        all_results[rotation] = rotation_results
        all_args[rotation] = rotation_args
        all_params[rotation] = rotation_params

        trials_filepath = os.path.join(rotation_dir, 'trials.pkl')
        if os.path.exists(trials_filepath) and not overwrite:
            with open(trials_filepath, mode='rb') as f:
                all_trials[rotation] = pickle.load(f)
            continue

        # Rebuild Trials
        # TODO(mmd): Something wrong in misc.idxs...
        trials = Trials(exp_key='exp')  #hyperparameter_search_dir
        for run_name in rotation_results:
            args = rotation_args[run_name]
            params = rotation_params[run_name]
            perf_metrics, test_perf_metrics = rotation_results[run_name]
            tuning_scores = -pd.Series(
                ObjectiveFntr.perf_metrics_to_trial_result(perf_metrics))
            test_scores = -pd.Series(
                ObjectiveFntr.perf_metrics_to_trial_result(test_perf_metrics))

            loss = tuning_scores.mean()
            loss_variance = tuning_scores.std()**2
            test_loss = test_scores.mean()
            test_loss_variance = test_scores.std()**2

            result = {
                'status': STATUS_OK,
                'loss': loss,
                'loss_variance': loss_variance,
                'test_loss': test_loss,
                'test_loss_variance': test_loss_variance,
            }
            spec = params

            trials.insert_trial_doc({
                'tid': run_name,
                'spec': spec,
                'result': result,
                'misc': {
                    'tid': run_name,
                    'cmd': '',
                    'idxs': [],
                    'vals': {k: [v]
                             for k, v in spec.items()},
                },
                'state': '',
                'owner': '',
                'book_time': 0,
                'refresh_time': 0,
                'exp_key': 'exp',  # hyperparameter_search_dir,
            })
        trials.refresh()
        all_trials[rotation] = trials

    return config, all_results, all_args, all_params, all_trials