Example #1
0
    def create_trials(self, complete, losses):
        if len(complete) > 0:
            trials = Trials()
            hist = self.create_history(complete)
            index = 0
            for c in complete:
                if c >= len(losses):
                    error(
                        "Index {} is larger than the size of losses {}".format(
                            c, len(losses)))
                loss = losses[c]
                rval_specs = [None]
                new_id = index
                rval_results = []
                rval_results.append(create_ok_result(loss, c))
                rval_miscs = []
                rval_miscs.append(self.create_misc(index, hist))

                hyperopt_trial = trials.new_trial_docs([new_id], rval_specs,
                                                       rval_results,
                                                       rval_miscs)[0]
                index += 1
                if self.response_shaping is True:
                    # transform log applied loss for enhancing optimization performance
                    #debug("before scaling: {}".format(loss))
                    if self.shaping_func == "log_err":
                        loss = apply_log_err(loss)
                    elif self.shaping_func == "hybrid_log":
                        loss = apply_hybrid_log(loss)
                    else:
                        debug("Invalid shaping function: {}".format(
                            self.shaping_func))
                hyperopt_trial['result'] = {
                    'loss': float(loss),
                    'status': STATUS_OK
                }
                hyperopt_trial['state'] = JOB_STATE_DONE
                #debug("History appended: {}-{}".format(c, loss))
                trials.insert_trial_doc(hyperopt_trial)
            trials.refresh()
            return trials
        else:
            return Trials()
Example #2
0
    def create_trials(self, completed, losses):
        if len(completed) > 0:
            trials = Trials()
            hist = self.create_history(completed)
            #index = 0
            #for c in completed:
            for index in range(len(completed)):
                c = completed[index]
                loss = losses[index]
                rval_specs = [None]
                new_id = index
                rval_results = [ ]
                rval_results.append(create_ok_result(loss, c))
                rval_miscs = [  ]
                rval_miscs.append(self.create_misc(index, hist))
                    
                hopt_trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0]

                if self.response_shaping is True:
                        # transform log applied loss for enhancing optimization performance
                        #debug("before scaling: {}".format(loss))
                    if self.shaping_func == "log_err":                        
                        loss = apply_log_err(loss)
                    elif self.shaping_func == "hybrid_log":
                        loss = apply_hybrid_log(loss)
                    else:
                        debug("Invalid shaping function: {}".format(self.shaping_func))
                if loss != None:                    
                    hopt_trial['result'] = {'loss': float(loss), 'status': STATUS_OK}
                    hopt_trial['state'] = JOB_STATE_DONE
                    #debug("History appended: {}-{}".format(c, loss))
                    trials.insert_trial_doc(hopt_trial)
            trials.refresh()
            return trials
        else:        
            return Trials()
Example #3
0
    def suggest(self, history, searchspace):
        """
        Suggest params to maximize an objective function based on the
        function evaluation history using a tree of Parzen estimators (TPE),
        as implemented in the hyperopt package.

        Use of this function requires that hyperopt be installed.
        """
        # This function is very odd, because as far as I can tell there's
        # no real documented API for any of the internals of hyperopt. Its
        # execution model is that hyperopt calls your objective function
        # (instead of merely providing you with suggested points, and then
        # you calling the function yourself), and its very tricky (for me)
        # to use the internal hyperopt data structures to get these predictions
        # out directly.

        # so they path we take in this function is to construct a synthetic
        # hyperopt.Trials database which from the `history`, and then call
        # hyoperopt.fmin with a dummy objective function that logs the value
        # used, and then return that value to our client.

        # The form of the hyperopt.Trials database isn't really documented in
        # the code -- most of this comes from reverse engineering it, by
        # running fmin() on a simple function and then inspecting the form of
        # the resulting trials object.
        if 'hyperopt' not in sys.modules:
            raise ImportError('No module named hyperopt')

        random = check_random_state(self.seed)
        hp_searchspace = searchspace.to_hyperopt()

        trials = Trials()
        for i, (params, scores, status) in enumerate(history):
            if status == 'SUCCEEDED':
                # we're doing maximization, hyperopt.fmin() does minimization,
                # so we need to swap the sign
                result = {'loss': -np.mean(scores), 'status': STATUS_OK}
            elif status == 'PENDING':
                result = {'status': STATUS_RUNNING}
            elif status == 'FAILED':
                result = {'status': STATUS_FAIL}
            else:
                raise RuntimeError('unrecognized status: %s' % status)

            # the vals key in the trials dict is basically just the params
            # dict, but enum variables (hyperopt hp.choice() nodes) are
            # different, because the index of the parameter is specified
            # in vals, not the parameter itself.

            vals = {}
            for var in searchspace:
                if isinstance(var, EnumVariable):
                    # get the index in the choices of the parameter, and use
                    # that.
                    matches = [i for i, c in enumerate(var.choices)
                               if c == params[var.name]]
                    assert len(matches) == 1
                    vals[var.name] = matches
                else:
                    # the other big difference is that all of the param values
                    # are wrapped in length-1 lists.
                    vals[var.name] = [params[var.name]]

            trials.insert_trial_doc({
                'misc': {
                    'cmd': ('domain_attachment', 'FMinIter_Domain'),
                    'idxs': dict((k, [i]) for k in hp_searchspace.keys()),
                    'tid': i,
                    'vals': vals,
                    'workdir': None},
                'result': result,
                'tid': i,
                # bunch of fixed fields that hyperopt seems to require
                'owner': None, 'spec': None, 'state': 2, 'book_time': None,
                'exp_key': None, 'refresh_time': None, 'version': 0
                })

        trials.refresh()
        chosen_params_container = []

        def mock_fn(x):
            # http://stackoverflow.com/a/3190783/1079728
            # to get around no nonlocal keywork in python2
            chosen_params_container.append(x)
            return 0

        fmin(fn=mock_fn, algo=tpe.suggest, space=hp_searchspace, trials=trials,
             max_evals=len(trials.trials)+1,
             **self._hyperopt_fmin_random_kwarg(random))
        chosen_params = chosen_params_container[0]

        return chosen_params
Example #4
0
    def suggest(self, history, searchspace):
        """
        Suggest params to maximize an objective function based on the
        function evaluation history using a tree of Parzen estimators (TPE),
        as implemented in the hyperopt package.

        Use of this function requires that hyperopt be installed.
        """
        # This function is very odd, because as far as I can tell there's
        # no real documented API for any of the internals of hyperopt. Its
        # execution model is that hyperopt calls your objective function
        # (instead of merely providing you with suggested points, and then
        # you calling the function yourself), and its very tricky (for me)
        # to use the internal hyperopt data structures to get these predictions
        # out directly.

        # so they path we take in this function is to construct a synthetic
        # hyperopt.Trials database which from the `history`, and then call
        # hyoperopt.fmin with a dummy objective function that logs the value
        # used, and then return that value to our client.

        # The form of the hyperopt.Trials database isn't really documented in
        # the code -- most of this comes from reverse engineering it, by
        # running fmin() on a simple function and then inspecting the form of
        # the resulting trials object.
        if 'hyperopt' not in sys.modules:
            raise ImportError('No module named hyperopt')

        random = check_random_state(self.seed)
        hp_searchspace = searchspace.to_hyperopt()

        trials = Trials()
        for i, (params, scores, status) in enumerate(history):
            if status == 'SUCCEEDED':
                # we're doing maximization, hyperopt.fmin() does minimization,
                # so we need to swap the sign
                result = {'loss': -np.mean(scores), 'status': STATUS_OK}
            elif status == 'PENDING':
                result = {'status': STATUS_RUNNING}
            elif status == 'FAILED':
                result = {'status': STATUS_FAIL}
            else:
                raise RuntimeError('unrecognized status: %s' % status)

            # the vals key in the trials dict is basically just the params
            # dict, but enum variables (hyperopt hp.choice() nodes) are
            # different, because the index of the parameter is specified
            # in vals, not the parameter itself.

            vals = {}
            for var in searchspace:
                if isinstance(var, EnumVariable):
                    # get the index in the choices of the parameter, and use
                    # that.
                    matches = [
                        i for i, c in enumerate(var.choices)
                        if c == params[var.name]
                    ]
                    assert len(matches) == 1
                    vals[var.name] = matches
                else:
                    # the other big difference is that all of the param values
                    # are wrapped in length-1 lists.
                    vals[var.name] = [params[var.name]]

            trials.insert_trial_doc({
                'misc': {
                    'cmd': ('domain_attachment', 'FMinIter_Domain'),
                    'idxs': dict((k, [i]) for k in hp_searchspace.keys()),
                    'tid': i,
                    'vals': vals,
                    'workdir': None
                },
                'result': result,
                'tid': i,
                # bunch of fixed fields that hyperopt seems to require
                'owner': None,
                'spec': None,
                'state': 2,
                'book_time': None,
                'exp_key': None,
                'refresh_time': None,
                'version': 0
            })

        trials.refresh()
        chosen_params_container = []

        def suggest(*args, **kwargs):
            return tpe.suggest(*args,
                               **kwargs,
                               gamma=self.gamma,
                               n_startup_jobs=self.seeds)

        def mock_fn(x):
            # http://stackoverflow.com/a/3190783/1079728
            # to get around no nonlocal keywork in python2
            chosen_params_container.append(x)
            return 0

        fmin(fn=mock_fn,
             algo=tpe.suggest,
             space=hp_searchspace,
             trials=trials,
             max_evals=len(trials.trials) + 1,
             **self._hyperopt_fmin_random_kwarg(random))
        chosen_params = chosen_params_container[0]

        return chosen_params
Example #5
0
def read_or_recreate_trials(
    hyperparameter_search_dir,
    tuning_dataset=None,
    test_dataset=None,
    tqdm=None,
    overwrite=False,
    do_print=True,
    trials_out_dict=None,
    results_out_dict=None,
    args_out_dict=None,
    params_out_dict=None,
):
    config = read_config(hyperparameter_search_dir)[0]
    hyperparameter_search_args = HyperparameterSearchArgs.from_json_file(
        os.path.join(hyperparameter_search_dir,
                     HYPERPARAMETER_SEARCH_ARGS_FILENAME))

    filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME)
    with open(filepath, mode='r') as f:
        raw_config = json.loads(f.read())

    rotations = set(
        x for x in os.listdir(hyperparameter_search_dir)).intersection(
            set(str(i) for i in range(10)))
    if do_print: print("Observe runs for rotations: %s" % ', '.join(rotations))

    if trials_out_dict is None: trials_out_dict = {}
    if results_out_dict is None: results_out_dict = {}
    if args_out_dict is None: args_out_dict = {}
    if params_out_dict is None: params_out_dict = {}

    if len(rotations) < 4 or tqdm is None: rotations_rng = rotations
    else: rotations_rng = tqdm(rotations, desc="Reading Rotations")
    for rotation in rotations_rng:
        for d in (results_out_dict, args_out_dict, params_out_dict):
            if rotation not in d: d[rotation] = {}

        rotation_results = results_out_dict[rotation]
        rotation_args = args_out_dict[rotation]
        rotation_params = params_out_dict[rotation]

        rotation_dir = os.path.join(hyperparameter_search_dir, rotation)

        run_names = [r for r in os.listdir(rotation_dir) if r != 'trials.pkl']
        run_names_rng = run_names if tqdm is None else tqdm(
            run_names, desc="Reading Runs")

        for run_name in run_names_rng:
            run_dir = os.path.join(rotation_dir, run_name)
            if not os.path.isdir(run_dir):
                print(f"Found file; expecting directory! {run_dir})")
                continue
            elif os.path.isfile(os.path.join(run_dir, 'error.pkl')):
                continue

            # Lastly, if we don't need results, continue
            if run_name in rotation_results and rotation_results[
                    run_name] is not None:
                continue

            args_filepath = os.path.join(run_dir, ARGS_FILENAME)
            if not os.path.isfile(args_filepath): continue
            args = Args.from_json_file(args_filepath)

            num_epochs = args.epochs
            completed_training = os.path.isfile(
                os.path.join(run_dir, 'model.epoch-%d' % (num_epochs - 1)))
            if not completed_training:
                print(
                    f"Run {run_dir} still training (or errored and didn't report)"
                )
                continue

            tuning_result_filepath = os.path.join(run_dir, 'tuning_perf.pkl')
            if os.path.isfile(tuning_result_filepath):
                with open(tuning_result_filepath, mode='rb') as f:
                    tuning = pickle.load(f)
            elif os.path.isfile(
                    os.path.join(run_dir, 'tuning_perf_metrics.pkl')):
                with open(os.path.join(run_dir, 'tuning_perf_metrics.pkl'),
                          mode='rb') as f:
                    tuning = pickle.load(f)
            else:
                print(f'Missing tuning for {run_dir}')
                if tuning_dataset is not None:
                    # set the necessary features in tuning dataset
                    if args.do_masked_imputation:
                        tuning_dataset.imputation_mask_rate = args.imputation_mask_rate
                    _, _, tuning = evaluator.evaluate_multi(
                        tuning_dataset,
                        model_rundir=run_dir,
                        num_random_endpoints=10,
                        batch_size=1024,
                        num_workers=27,
                        evaluate_on_25=True,
                        get_all_reprs=False,
                        tqdm=tqdm)
                    with open(tuning_result_filepath, mode='wb') as f:
                        pickle.dump(tuning, f)
                else:
                    print("Wasn't given a tuning dataset!")
                    continue

            test_result_filepath = os.path.join(run_dir, 'test_perf.pkl')
            if os.path.isfile(test_result_filepath):
                with open(test_result_filepath, mode='rb') as f:
                    test = pickle.load(f)
            elif os.path.isfile(os.path.join(run_dir,
                                             'test_perf_metrics.pkl')):
                with open(os.path.join(run_dir, 'test_perf_metrics.pkl'),
                          mode='rb') as f:
                    test = pickle.load(f)
            else:
                print(f"Have tuning but missing test for {run_dir}/{run_name}")
                test = None
                if test_dataset is not None:
                    if args.do_masked_imputation:
                        test_dataset.imputation_mask_rate = args.imputation_mask_rate
                    _, _, test = evaluator.evaluate_multi(
                        test_dataset,
                        model_rundir=run_dir,
                        num_random_endpoints=10,
                        batch_size=1024,
                        num_workers=27,
                        evaluate_on_25=True,
                        get_all_reprs=False,
                        tqdm=tqdm)
                    with open(test_result_filepath, mode='wb') as f:
                        pickle.dump(test, f)
                else:
                    "Wasn't given a test dataset!"

            rotation_results[run_name] = (tuning, test)
            if run_name not in rotation_args or rotation_args[run_name] is None:
                rotation_args[run_name] = args

            if run_name not in rotation_params or rotation_params[
                    run_name] is None:
                params_filepath = os.path.join(run_dir, PARAMS_FILENAME)
                if os.path.isfile(params_filepath):
                    with open(params_filepath, mode='rb') as f:
                        rotation_params[run_name] = pickle.load(f)
                else:
                    rotation_params[run_name] = args_to_params(
                        rotation_args[run_name], raw_config)

        if rotation in trials_out_dict and trials_out_dict[
                rotation] is not None:
            continue

        trials_filepath = os.path.join(rotation_dir, 'trials.pkl')
        if os.path.exists(trials_filepath) and not overwrite:
            with open(trials_filepath, mode='rb') as f:
                trials_out_dict[rotation] = pickle.load(f)
            continue

        # Rebuild Trials
        # TODO(mmd): Something wrong in misc.idxs...
        trials = Trials(exp_key='exp')  #hyperparameter_search_dir
        for run_name in rotation_results:
            args = rotation_args[run_name]
            params = rotation_params[run_name]
            perf_metrics, test_perf_metrics = rotation_results[run_name]
            try:
                loss = ObjectiveFntr.perf_metrics_to_trial_result(
                    perf_metrics,
                    args,
                    single_task=hyperparameter_search_args.single_task_search)
            except Exception as e:
                print(
                    f"Errored computing tuning results for {hyperparameter_search_dir} on rotation "
                    f"{rotation}, {run_name}: {e}")
                traceback.print_exc()
                continue

            try:
                if test_perf_metrics is not None:
                    test_loss = ObjectiveFntr.perf_metrics_to_trial_result(
                        test_perf_metrics,
                        args,
                        single_task=hyperparameter_search_args.
                        single_task_search)
                else:
                    test_loss = np.NaN
            except TypeError as e:
                test_loss = np.NaN
            except Exception as e:
                if "'NoneType' object is not subscriptable" in str(e):
                    test_loss = np.NaN
                else:
                    print(
                        f"Errored computing test results for {hyperparameter_search_dir} on rotation {rotation}, "
                        f"{run_name}: {e}")
                    traceback.print_exc()
                    continue

            loss_variance, test_loss_variance = np.NaN, np.NaN
            result = {
                'status': STATUS_OK,
                'loss': loss,
                'loss_variance': loss_variance,
                'test_loss': test_loss,
                'test_loss_variance': test_loss_variance,
            }
            spec = params

            trials.insert_trial_doc({
                'tid': run_name,
                'spec': spec,
                'result': result,
                'misc': {
                    'tid': run_name,
                    'cmd': '',
                    'idxs': [],
                    'vals': {k: [v]
                             for k, v in spec.items()},
                },
                'state': '',
                'owner': '',
                'book_time': 0,
                'refresh_time': 0,
                'exp_key': 'exp',  # hyperparameter_search_dir,
            })
        trials.refresh()
        trials_out_dict[rotation] = trials

    return config, results_out_dict, args_out_dict, params_out_dict, trials_out_dict
Example #6
0
         trials=cache_trials,
         max_evals=1,
         return_argmin=False,
         show_progressbar=False)

    trial = cache_trials.trials[-1]

    return trial, trial_parameters


for i in range(5):
    t, tp = get_next_params(global_trials)
    mse = run_model(tp)
    t['result']['loss'] = mse
    t['refresh_time'] = datetime.datetime.now()
    global_trials.insert_trial_doc(t)
    global_trials.refresh()

print(global_trials.best_trial)
"""
trials = Trials()
print("First recommendation")
fmin(param_extractor, space, algo=tpe.suggest, trials=trials, max_evals=1, return_argmin=False)

new_trial = trials.trials[-1]

new_trial['result']['loss'] = 0.4

print(new_trial)

global_trials.insert_trial_doc(new_trial)
def read_or_recreate_trials(hyperparameter_search_dir,
                            tuning_dataset=None,
                            test_dataset=None,
                            tqdm=None,
                            overwrite=False):
    config = read_config(hyperparameter_search_dir)[0]

    filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME)
    with open(filepath, mode='r') as f:
        raw_config = json.loads(f.read())

    rotations = set(
        x for x in os.listdir(hyperparameter_search_dir)).intersection(
            set(str(i) for i in range(10)))
    print("Observe runs for rotations: %s" % ', '.join(rotations))

    all_trials = {}
    all_results = {}
    all_args = {}
    all_params = {}

    rotations_rng = rotations if len(rotations) < 4 or tqdm is None else tqdm(
        rotations)
    for rotation in rotations_rng:
        rotation_results = {}
        rotation_args = {}
        rotation_params = {}

        rotation_dir = os.path.join(hyperparameter_search_dir, rotation)

        run_names = [r for r in os.listdir(rotation_dir) if r != 'trials.pkl']
        run_names_rng = run_names if tqdm is None else tqdm(run_names)

        for run_name in run_names:
            run_dir = os.path.join(rotation_dir, run_name)
            if not os.path.isdir(run_dir):
                print(run_dir)
                continue

            if os.path.isfile(os.path.join(run_dir, 'error.pkl')): continue

            args_filepath = os.path.join(run_dir, ARGS_FILENAME)
            if not os.path.isfile(args_filepath): continue
            args = Args.from_json_file(args_filepath)
            rotation_args[run_name] = args

            params_filepath = os.path.join(run_dir, PARAMS_FILENAME)
            if os.path.isfile(params_filepath):
                with open(params_filepath, mode='rb') as f:
                    rotation_params[run_name] = pickle.load(f)
            else:
                rotation_params[run_name] = args_to_params(
                    rotation_args[run_name], raw_config)

            num_epochs = args.epochs
            completed_training = os.path.isfile(
                os.path.join(run_dir, 'model.epoch-%d' % (num_epochs - 1)))
            if not completed_training:
                print("Run %s Still training (or errored and didn't report)" %
                      run_name)
                continue

            tuning_result_filepath = os.path.join(run_dir,
                                                  'tuning_perf_metrics.pkl')
            if os.path.isfile(tuning_result_filepath):
                with open(tuning_result_filepath, mode='rb') as f:
                    tuning = pickle.load(f)
            else:
                print('Missing tuning for %s' % run_name)
                if tuning_dataset is not None:
                    _, _, tuning = evaluate_multi(tuning_dataset,
                                                  model_rundir=run_dir,
                                                  num_random_endpoints=10,
                                                  batch_size=1024,
                                                  num_workers=27,
                                                  evaluate_on_25=True,
                                                  get_all_reprs=False,
                                                  tqdm=tqdm)
                    with open(tuning_result_filepath, mode='wb') as f:
                        pickle.dump(tuning, f)
                else:
                    print("Wasn't given a tuning dataset!")
                    continue

            test_result_filepath = os.path.join(run_dir,
                                                'test_perf_metrics.pkl')
            if os.path.isfile(test_result_filepath):
                with open(test_result_filepath, mode='rb') as f:
                    test = pickle.load(f)
            else:
                print('Have tuning but missing test for %s' % run_name)
                if test_dataset is not None:
                    _, _, test = evaluate_multi(test_dataset,
                                                model_rundir=run_dir,
                                                num_random_endpoints=10,
                                                batch_size=1024,
                                                num_workers=27,
                                                evaluate_on_25=True,
                                                get_all_reprs=False,
                                                tqdm=tqdm)
                    with open(test_result_filepath, mode='wb') as f:
                        pickle.dump(test, f)
                else:
                    "Wasn't given a test dataset!"

            rotation_results[run_name] = (tuning, test)

        all_results[rotation] = rotation_results
        all_args[rotation] = rotation_args
        all_params[rotation] = rotation_params

        trials_filepath = os.path.join(rotation_dir, 'trials.pkl')
        if os.path.exists(trials_filepath) and not overwrite:
            with open(trials_filepath, mode='rb') as f:
                all_trials[rotation] = pickle.load(f)
            continue

        # Rebuild Trials
        # TODO(mmd): Something wrong in misc.idxs...
        trials = Trials(exp_key='exp')  #hyperparameter_search_dir
        for run_name in rotation_results:
            args = rotation_args[run_name]
            params = rotation_params[run_name]
            perf_metrics, test_perf_metrics = rotation_results[run_name]
            tuning_scores = -pd.Series(
                ObjectiveFntr.perf_metrics_to_trial_result(perf_metrics))
            test_scores = -pd.Series(
                ObjectiveFntr.perf_metrics_to_trial_result(test_perf_metrics))

            loss = tuning_scores.mean()
            loss_variance = tuning_scores.std()**2
            test_loss = test_scores.mean()
            test_loss_variance = test_scores.std()**2

            result = {
                'status': STATUS_OK,
                'loss': loss,
                'loss_variance': loss_variance,
                'test_loss': test_loss,
                'test_loss_variance': test_loss_variance,
            }
            spec = params

            trials.insert_trial_doc({
                'tid': run_name,
                'spec': spec,
                'result': result,
                'misc': {
                    'tid': run_name,
                    'cmd': '',
                    'idxs': [],
                    'vals': {k: [v]
                             for k, v in spec.items()},
                },
                'state': '',
                'owner': '',
                'book_time': 0,
                'refresh_time': 0,
                'exp_key': 'exp',  # hyperparameter_search_dir,
            })
        trials.refresh()
        all_trials[rotation] = trials

    return config, all_results, all_args, all_params, all_trials
Example #8
0
class _HyperOpt(base.Optimizer):
    # pylint: disable=too-many-instance-attributes
    def __init__(
        self,
        parametrization: IntOrParameter,
        budget: tp.Optional[int] = None,
        num_workers: int = 1,
        *,
        prior_weight: float = 1.0,
        n_startup_jobs: int = 20,
        n_EI_candidates: int = 24,
        gamma: float = 0.25,
        verbose: bool = False,
    ) -> None:
        super().__init__(parametrization,
                         budget=budget,
                         num_workers=num_workers)
        try:
            # try to convert parametrization to hyperopt search space
            if not isinstance(self.parametrization, p.Instrumentation):
                raise NotImplementedError
            self.space = _get_search_space(self.parametrization.name,
                                           self.parametrization)
            self._transform = None
        except NotImplementedError:
            self._transform = transforms.ArctanBound(0, 1)
            self.space = {
                f"x_{i}": hp.uniform(f"x_{i}", 0, 1)
                for i in range(self.dimension)
            }

        self.trials = Trials()
        self.domain = Domain(fn=None,
                             expr=self.space,
                             pass_expr_memo_ctrl=False)
        self.tpe_args = {
            "prior_weight": prior_weight,
            "n_startup_jobs": n_startup_jobs,
            "n_EI_candidates": n_EI_candidates,
            "gamma": gamma,
            "verbose": verbose,
        }

    def _internal_ask_candidate(self) -> p.Parameter:
        # Inspired from FMinIter class (hyperopt)
        next_id = self.trials.new_trial_ids(1)
        new_trial = tpe.suggest(next_id, self.domain, self.trials,
                                self._rng.randint(2**31 - 1),
                                **self.tpe_args)[0]
        self.trials.insert_trial_doc(new_trial)
        self.trials.refresh()

        candidate = self.parametrization.spawn_child()

        if self._transform:
            data = np.array([
                new_trial["misc"]["vals"][f"x_{i}"][0]
                for i in range(self.dimension)
            ])
            candidate = candidate.set_standardized_data(
                self._transform.backward(data))

            # For consistency, we need to update hyperopt history
            # when standardized data is changed
            if any(data != self._transform.forward(
                    candidate.get_standardized_data(
                        reference=self.parametrization))):
                for it, val in enumerate(
                        self._transform.forward(
                            candidate.get_standardized_data(
                                reference=self.parametrization))):
                    self.trials._dynamic_trials[
                        next_id[0]]["misc"]["vals"][f"x_{it}"][0] = val
        else:
            spec = hyperopt.base.spec_from_misc(new_trial["misc"])
            config = hyperopt.space_eval(self.space, spec)
            candidate.value = _hp_dict_to_parametrization(config)

        candidate._meta["trial_id"] = new_trial["tid"]
        return candidate

    def _internal_tell_candidate(self, candidate: p.Parameter,
                                 loss: float) -> None:
        result = {"loss": loss, "status": "ok"}
        assert "trial_id" in candidate._meta
        tid = candidate._meta["trial_id"]
        assert self.trials._dynamic_trials[tid][
            "state"] == hyperopt.JOB_STATE_NEW

        now = hyperopt.utils.coarse_utcnow()
        self.trials._dynamic_trials[tid]["book_time"] = now
        self.trials._dynamic_trials[tid]["refresh_time"] = now
        self.trials._dynamic_trials[tid]["state"] = hyperopt.JOB_STATE_DONE
        self.trials._dynamic_trials[tid]["result"] = result
        self.trials._dynamic_trials[tid][
            "refresh_time"] = hyperopt.utils.coarse_utcnow()
        self.trials.refresh()

    def _internal_tell_not_asked(self, candidate: p.Parameter,
                                 loss: float) -> None:
        next_id = self.trials.new_trial_ids(1)
        new_trial = hyperopt.rand.suggest(next_id, self.domain, self.trials,
                                          self._rng.randint(2**31 - 1))
        self.trials.insert_trial_docs(new_trial)
        self.trials.refresh()
        tid = next_id[0]

        if self._transform:
            data = candidate.get_standardized_data(
                reference=self.parametrization)
            data = self._transform.forward(data)
            self.trials._dynamic_trials[tid]["misc"]["vals"] = {
                f"x_{i}": [data[i]]
                for i in range(len(data))
            }
        else:
            null_config: dict = {
                k: []
                for k in self.trials._dynamic_trials[tid]["misc"]
                ["vals"].keys()
            }
            new_vals: dict = _hp_parametrization_to_dict(candidate,
                                                         default=null_config)
            self.trials._dynamic_trials[tid]["misc"]["vals"] = new_vals

        self.trials.refresh()
        candidate._meta["trial_id"] = tid
        self._internal_tell_candidate(candidate, loss)
Example #9
0
def read_or_recreate_trials(hyperparameter_search_dir, tqdm=None, overwrite=False):
    config = read_config(hyperparameter_search_dir)[0]

    filepath = os.path.join(hyperparameter_search_dir, HYP_CONFIG_FILENAME)
    with open(filepath, mode='r') as f: raw_config = json.load(f)

    all_params, all_results, all_configs = {}, {}, {}

    run_names = [r for r in os.listdir(hyperparameter_search_dir) if r != 'trials.pkl']
    run_names_rng = run_names if tqdm is None else tqdm(run_names)

    for run_name in run_names:
        run_dir = os.path.join(hyperparameter_search_dir, run_name)
        if not os.path.isdir(run_dir):
            print(run_dir)
            continue

        if os.path.isfile(os.path.join(run_dir, 'error.pkl')): continue

        config_filepath = os.path.join(run_dir, CONFIG_FILENAME)
        if not os.path.isfile(config_filepath): continue
        with open(config_filepath, mode='r') as f: config = json.load(f)
        all_configs[run_name] = config

        params_filepath = os.path.join(run_dir, PARAMS_FILENAME)
        if os.path.isfile(params_filepath):
            with open(params_filepath, mode='rb') as f: constant, variable = pickle.load(f)
            all_params[run_name] = constant
            all_params[run_name].update(variable)
        else:
            raise NotImplementedError

        num_epochs = config['trainer']['epochs']
        completed_training = trained_until(run_dir, run_name, num_epochs)
        if not completed_training:
            print("Run %s still training (or errored and didn't report)" % run_name)
            print(run_name, num_epochs)
            print(os.listdir(run_dir))
            continue

        tuning_results_filename = os.path.join(run_dir, 'ir_metrics_%d.json' % num_epochs)
        assert os.path.isfile(tuning_results_filename), "Missing tuning results for %s" % run_dir

        with open(tuning_results_filename, mode='r') as f: all_results[run_name] = json.load(f)

    trials_filepath = os.path.join(hyperparameter_search_dir, 'trials.pkl')
    if os.path.exists(trials_filepath) and not overwrite:
        print("Reloading trials!")
        with open(trials_filepath, mode='rb') as f: trials = pickle.load(f)
        return config, all_results, all_configs, all_params, trials

    # Rebuild Trials
    # TODO(mmd): Something wrong in misc.idxs...
    trials = Trials(exp_key = 'exp') #hyperparameter_search_dir
    for run_name in all_results:
        configs = all_configs[run_name]
        params = all_params[run_name]

        loss = all_results[run_name]['Val   (Pert):']['median_rank']
        loss_variance, test_loss, test_loss_variance = np.NaN, np.NaN, np.NaN

        result = {
            'status': STATUS_OK,
            'loss': loss,
            'loss_variance': loss_variance,
            'test_loss': test_loss,
            'test_loss_variance': test_loss_variance,
        }
        spec = params

        a = trials.insert_trial_doc({
            'tid': run_name,
            'spec': spec,
            'result': result,
            'misc': {
                'tid': run_name,
                'cmd': '',
                'idxs': [],
                'vals': {k: [v] for k, v in spec.items()},
            },
            'state': JOB_STATE_DONE,
            'owner': '',
            'book_time': 0,
            'refresh_time': 0,
            'exp_key': 'exp',# hyperparameter_search_dir,
        })

    trials.refresh()

    return config, all_results, all_configs, all_params, trials