예제 #1
0
    def __init__(
        self,
        space: Optional[Dict] = None,
        metric: Optional[str] = None,
        mode: Optional[str] = None,
        points_to_evaluate: Optional[List[Dict]] = None,
        n_initial_points: int = 20,
        random_state_seed: Optional[int] = None,
        gamma: float = 0.25,
        max_concurrent: Optional[int] = None,
        use_early_stopped_trials: Optional[bool] = None,
    ):
        assert hpo is not None, (
            "HyperOpt must be installed! Run `pip install hyperopt`.")
        if mode:
            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
        from hyperopt.fmin import generate_trials_to_calculate
        super(HyperOptSearch,
              self).__init__(metric=metric,
                             mode=mode,
                             max_concurrent=max_concurrent,
                             use_early_stopped_trials=use_early_stopped_trials)
        self.max_concurrent = max_concurrent
        # hyperopt internally minimizes, so "max" => -1
        if mode == "max":
            self.metric_op = -1.
        elif mode == "min":
            self.metric_op = 1.

        if n_initial_points is None:
            self.algo = hpo.tpe.suggest
        else:
            self.algo = partial(hpo.tpe.suggest,
                                n_startup_jobs=n_initial_points)
        if gamma is not None:
            self.algo = partial(self.algo, gamma=gamma)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert isinstance(points_to_evaluate, (list, tuple))
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        if random_state_seed is None:
            self.rstate = np.random.RandomState()
        else:
            self.rstate = np.random.RandomState(random_state_seed)

        self.domain = None
        if isinstance(space, dict) and space:
            resolved_vars, domain_vars, grid_vars = parse_spec_vars(space)
            if domain_vars or grid_vars:
                logger.warning(
                    UNRESOLVED_SEARCH_SPACE.format(par="space",
                                                   cls=type(self)))
                space = self.convert_search_space(space)
            self.domain = hpo.Domain(lambda spc: spc, space)
예제 #2
0
파일: hyperopt.py 프로젝트: wanghuimu/ray
    def __init__(self,
                 space,
                 max_concurrent=10,
                 reward_attr="episode_reward_mean",
                 points_to_evaluate=None,
                 **kwargs):
        assert hpo is not None, "HyperOpt must be installed!"
        from hyperopt.fmin import generate_trials_to_calculate
        assert type(max_concurrent) is int and max_concurrent > 0
        self._max_concurrent = max_concurrent
        self._reward_attr = reward_attr
        self.algo = hpo.tpe.suggest
        self.domain = hpo.Domain(lambda spc: spc, space)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert type(points_to_evaluate) == list
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        self.rstate = np.random.RandomState()

        super(HyperOptSearch, self).__init__(**kwargs)
예제 #3
0
    def update_search_space(self, search_space):
        """
        Update search space definition in tuner by search_space in parameters.

        Will called when first setup experiemnt or update search space in WebUI.

        Parameters
        ----------
        search_space : dict
        """
        self.json = search_space

        search_space_instance = json2space(self.json)
        rstate = np.random.RandomState()
        trials = hp.Trials()
        domain = hp.Domain(None,
                           search_space_instance,
                           pass_expr_memo_ctrl=None)
        algorithm = self._choose_tuner(self.algorithm_name)
        self.rval = hp.FMinIter(algorithm,
                                domain,
                                trials,
                                max_evals=-1,
                                rstate=rstate,
                                verbose=0)
        self.rval.catch_eval_exceptions = False
예제 #4
0
def _search_condor_parallel(path, space, trials_per_point, setting,
                            objective, max_evals,
                            algo=hyperopt.tpe.suggest,
                            max_queue_len=10, poll_interval_secs=30):
    """
    block_until_done  means that the process blocks until ALL jobs in
    trials are not in running or new state

    suggest() can pass instance of StopExperiment to break out of
    enqueuing loop
    """

    trials = CondorTrials(path=path, ids=range(1, trials_per_point + 1),
                          setting=setting, objective=objective)
    domain = hyperopt.Domain(dummy_f, space, rseed=123)
    trial_path = os.path.join(path, "trials.pck")
    if os.path.exists(trial_path):
        with open(trial_path) as f:
            old_trials = pickle.load(f)
        print "Loaded existing trials"
        if old_trials.setting == trials.setting and trials.ids == old_trials.ids:
            trials = old_trials
    n_queued = trials.count_by_state_unsynced(hyperopt.JOB_STATES)

    def get_queue_len():
        trials.count_by_state_unsynced(hyperopt.base.JOB_STATE_NEW)
        return trials.update_trials(trials._trials)
    stopped = False
    while n_queued < max_evals:
        qlen = get_queue_len()
        while qlen < max_queue_len and n_queued < max_evals:
            n_to_enqueue = 1  # min(self.max_queue_len - qlen, N - n_queued)
            new_ids = trials.new_trial_ids(n_to_enqueue)
            trials.refresh()
            new_trials = algo(new_ids, domain, trials)
            if new_trials is hyperopt.base.StopExperiment:
                stopped = True
                break
            else:
                assert len(new_ids) >= len(new_trials)
                if len(new_trials):
                    trials.insert_trial_docs(new_trials)
                    trials.refresh()
                    n_queued += len(new_trials)
                    qlen = get_queue_len()
                else:
                    break

        with open(trial_path, 'w') as f:
            pickle.dump(trials, f)
        # -- wait for workers to fill in the trials
        time.sleep(poll_interval_secs)
        if stopped:
            break
    while trials.count_by_state_unsynced(hyperopt.base.JOB_STATE_NEW) > 0:
        time.sleep(poll_interval_secs)
    trials.refresh()
    return trials
예제 #5
0
파일: hyperopt.py 프로젝트: yuishihara/ray
    def __init__(self,
                 space,
                 max_concurrent=10,
                 reward_attr=None,
                 metric="episode_reward_mean",
                 mode="max",
                 points_to_evaluate=None,
                 n_initial_points=20,
                 random_state_seed=None,
                 gamma=0.25,
                 **kwargs):
        assert hpo is not None, "HyperOpt must be installed!"
        from hyperopt.fmin import generate_trials_to_calculate
        assert type(max_concurrent) is int and max_concurrent > 0
        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"

        if reward_attr is not None:
            mode = "max"
            metric = reward_attr
            logger.warning(
                "`reward_attr` is deprecated and will be removed in a future "
                "version of Tune. "
                "Setting `metric={}` and `mode=max`.".format(reward_attr))

        self._max_concurrent = max_concurrent
        self._metric = metric
        # hyperopt internally minimizes, so "max" => -1
        if mode == "max":
            self._metric_op = -1.
        elif mode == "min":
            self._metric_op = 1.
        if n_initial_points is None:
            self.algo = hpo.tpe.suggest
        else:
            self.algo = partial(hpo.tpe.suggest,
                                n_startup_jobs=n_initial_points)
        if gamma is not None:
            self.algo = partial(self.algo, gamma=gamma)
        self.domain = hpo.Domain(lambda spc: spc, space)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert type(points_to_evaluate) == list
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        if random_state_seed is None:
            self.rstate = np.random.RandomState()
        else:
            self.rstate = np.random.RandomState(random_state_seed)

        super(HyperOptSearch, self).__init__(metric=self._metric,
                                             mode=mode,
                                             **kwargs)
예제 #6
0
파일: hyperopt.py 프로젝트: wangziyuruc/ray
    def __init__(
            self,
            space=None,
            metric=None,
            mode=None,
            points_to_evaluate=None,
            n_initial_points=20,
            random_state_seed=None,
            gamma=0.25,
            max_concurrent=None,
            use_early_stopped_trials=None,
    ):
        assert hpo is not None, (
            "HyperOpt must be installed! Run `pip install hyperopt`.")
        if mode:
            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
        from hyperopt.fmin import generate_trials_to_calculate
        super(HyperOptSearch, self).__init__(
            metric=metric,
            mode=mode,
            max_concurrent=max_concurrent,
            use_early_stopped_trials=use_early_stopped_trials)
        self.max_concurrent = max_concurrent
        # hyperopt internally minimizes, so "max" => -1
        if mode == "max":
            self.metric_op = -1.
        elif mode == "min":
            self.metric_op = 1.

        if n_initial_points is None:
            self.algo = hpo.tpe.suggest
        else:
            self.algo = partial(
                hpo.tpe.suggest, n_startup_jobs=n_initial_points)
        if gamma is not None:
            self.algo = partial(self.algo, gamma=gamma)
        if points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert isinstance(points_to_evaluate, (list, tuple))
            self._hpopt_trials = generate_trials_to_calculate(
                points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(points_to_evaluate)
        self._live_trial_mapping = {}
        if random_state_seed is None:
            self.rstate = np.random.RandomState()
        else:
            self.rstate = np.random.RandomState(random_state_seed)

        self.domain = None
        if space:
            self.domain = hpo.Domain(lambda spc: spc, space)
예제 #7
0
    def __init__(self,
                 space,
                 max_concurrent=10,
                 reward_attr="episode_reward_mean",
                 **kwargs):
        assert hpo is not None, "HyperOpt must be installed!"
        assert type(max_concurrent) is int and max_concurrent > 0
        self._max_concurrent = max_concurrent
        self._reward_attr = reward_attr
        self.algo = hpo.tpe.suggest
        self.domain = hpo.Domain(lambda spc: spc, space)
        self._hpopt_trials = hpo.Trials()
        self._live_trial_mapping = {}
        self.rstate = np.random.RandomState()

        super(HyperOptSearch, self).__init__(**kwargs)
예제 #8
0
    def create_hyperopt_domain(self):
        # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)}
        hyperopt_search_space = {}
        for param in self.search_space.params:
            if param.type == INTEGER:
                hyperopt_search_space[param.name] = hyperopt.hp.quniform(
                    param.name, float(param.min), float(param.max), 1)
            elif param.type == DOUBLE:
                hyperopt_search_space[param.name] = hyperopt.hp.uniform(
                    param.name, float(param.min), float(param.max))
            elif param.type == CATEGORICAL or param.type == DISCRETE:
                hyperopt_search_space[param.name] = hyperopt.hp.choice(
                    param.name, param.list)

        self.hyperopt_domain = hyperopt.Domain(None,
                                               hyperopt_search_space,
                                               pass_expr_memo_ctrl=None)
예제 #9
0
    def get_suggestions(self,
                        configs: List[Dict] = None,
                        metrics: List[float] = None) -> List[Dict]:
        if not self.config.num_runs:
            raise ValueError("This search strategy requires `num_runs`.")
        suggestions = []
        rand_generator = get_random_generator(seed=self.config.seed)
        hyperopt_domain = hyperopt.Domain(None,
                                          self._search_space,
                                          pass_expr_memo_ctrl=None)

        hyperopt_trials = self._get_previous_observations(
            hyperopt_domain=hyperopt_domain, configs=configs, metrics=metrics)
        is_first = not all([configs, metrics])

        minimize = hyperopt.FMinIter(
            self.config.algorithm,
            hyperopt_domain,
            hyperopt_trials,
            max_evals=-1,
            rstate=rand_generator,
            verbose=0,
        )

        minimize.catch_eval_exceptions = False
        new_ids = minimize.trials.new_trial_ids(self.config.num_runs)
        minimize.trials.refresh()
        random_state = minimize.rstate.randint(2**31 - 1)
        new_trials = self.run_algorithm(is_first, new_ids, minimize.domain,
                                        hyperopt_trials, random_state)
        minimize.trials.refresh()

        for tid in range(self.config.num_runs):
            vals = new_trials[tid]["misc"]["vals"]
            suggestion = {}
            for param in vals:
                observation_value = vals[param][0]
                if param in self._param_to_value:
                    value = self._param_to_value[param][observation_value]
                    suggestion[param] = value
                else:
                    suggestion[param] = observation_value

            suggestions.append(suggestion)

        return suggestions
예제 #10
0
파일: hyperopt.py 프로젝트: zachkeer/ray
    def set_search_properties(self, metric, mode, config):
        if self.domain:
            return False
        space = self.convert_search_space(config)
        self.domain = hpo.Domain(lambda spc: spc, space)

        if metric:
            self._metric = metric
        if mode:
            self._mode = mode

        if self._mode == "max":
            self.metric_op = -1.
        elif self._mode == "min":
            self.metric_op = 1.

        return True
예제 #11
0
    def update_search_space(self, search_space):
        validate_search_space(search_space)
        self.json = search_space

        search_space_instance = json2space(self.json)
        rstate = np.random.RandomState()
        trials = hp.Trials()
        domain = hp.Domain(None,
                           search_space_instance,
                           pass_expr_memo_ctrl=None)
        algorithm = self._choose_tuner(self.algorithm_name)
        self.rval = hp.FMinIter(algorithm,
                                domain,
                                trials,
                                max_evals=-1,
                                rstate=rstate,
                                verbose=0)
        self.rval.catch_eval_exceptions = False
예제 #12
0
    def __init__(self,
                 space,
                 seed=123,
                 method="tpe",
                 errors_ok=False,
                 context={}):
        super().__init__(context=context)

        self.og_space = deepcopy(space)

        self.space = deepcopy(space)
        # Parse space to contain hyperopt functions
        find_pattern_apply_f(self.space, is_hyperopt, to_hyperopt)
        self._needs_postprocessing = len(find_pattern(self.space, is_exp2)) > 0

        self.seed = seed

        self.method = method
        if method == "tpe":
            self.algo = hpo.tpe
        elif method == "rand":
            self.algo = hpo.rand
        else:
            raise ValueError(f"Do not recognise suggestion method {method}.")

        # determines whether errors are treated as errors, or as trials with infinite loss
        self.errors_ok = errors_ok

        # this is to avoid getting spammed with useless TPE diagnostic info
        algo_logger = logging.getLogger("hpo")
        algo_logger.setLevel(logging.WARNING)
        self.algo.logger = algo_logger

        # this is hyperopt internals
        self.domain = hpo.Domain(lambda spc: spc, self.space)
        self._hpopt_trials = hpo.Trials()

        # this maps internal hyperopt ids to trial ids
        self._live_trial_mapping = {}

        self.rstate = np.random.RandomState(self.seed)

        self.counter = count(
        )  # used to generate unique, but deterministic trial ids
예제 #13
0
파일: hpo_scheduler.py 프로젝트: zhiyun/ray
    def add_experiment(self, experiment, trial_runner):
        """Tracks one experiment.

        Will error if one tries to track multiple experiments.
        """
        assert self._experiment is None, "HyperOpt only tracks one experiment!"
        self._experiment = experiment

        self._output_path = experiment.name
        spec = copy.deepcopy(experiment.spec)

        # Set Scheduler field, as Tune Parser will default to FIFO
        assert spec.get("scheduler") in [None, "HyperOpt"], "Incorrectly " \
            "specified scheduler!"
        spec["scheduler"] = "HyperOpt"

        if "env" in spec:
            spec["config"] = spec.get("config", {})
            spec["config"]["env"] = spec["env"]
            del spec["env"]

        space = spec["config"]["space"]
        del spec["config"]["space"]

        self.parser = make_parser()
        self.args = self.parser.parse_args(to_argv(spec))
        self.args.scheduler = "HyperOpt"
        self.default_config = copy.deepcopy(spec["config"])

        self.algo = hpo.tpe.suggest
        self.domain = hpo.Domain(lambda spc: spc, space)
        self._hpopt_trials = hpo.Trials()
        self._tune_to_hp = {}
        self._num_trials_left = self.args.repeat

        if type(self._max_concurrent) is int:
            self._max_concurrent = min(self._max_concurrent, self.args.repeat)

        self.rstate = np.random.RandomState()
        self.trial_generator = self._trial_generator()
        self._add_new_trials_if_needed(trial_runner)
예제 #14
0
    def _setup_hyperopt(self) -> None:
        from hyperopt.fmin import generate_trials_to_calculate

        if self._metric is None and self._mode:
            # If only a mode was passed, use anonymous metric
            self._metric = DEFAULT_METRIC

        if self._points_to_evaluate is None:
            self._hpopt_trials = hpo.Trials()
            self._points_to_evaluate = 0
        else:
            assert isinstance(self._points_to_evaluate, (list, tuple))

            for i in range(len(self._points_to_evaluate)):
                config = self._points_to_evaluate[i]
                self._convert_categories_to_indices(config)
            # HyperOpt treats initial points as LIFO, reverse to get FIFO
            self._points_to_evaluate = list(reversed(self._points_to_evaluate))
            self._hpopt_trials = generate_trials_to_calculate(self._points_to_evaluate)
            self._hpopt_trials.refresh()
            self._points_to_evaluate = len(self._points_to_evaluate)

        self.domain = hpo.Domain(lambda spc: spc, self._space)
예제 #15
0
    def _setup_hyperopt(self):
        if self._metric is None and self._mode:
            # If only a mode was passed, use anonymous metric
            self._metric = DEFAULT_METRIC

        self.domain = hpo.Domain(lambda spc: spc, self._space)
예제 #16
0
파일: tpe.py 프로젝트: 2793145003/advisor
    def get_new_suggestions(self, study_id, trials=[], number=1):
        """
    Get the new suggested trials with random search.
    """

        search_space = hyperopt.hp.uniform('x', -10, 10)

        search_space_instance = search_space
        rstate = np.random.RandomState()
        trials = hyperopt.Trials()
        domain = hyperopt.Domain(None,
                                 search_space_instance,
                                 pass_expr_memo_ctrl=None)
        algorithm = hyperopt.tpe.suggest
        rval = hyperopt.FMinIter(algorithm,
                                 domain,
                                 trials,
                                 max_evals=-1,
                                 rstate=rstate,
                                 verbose=0)
        rval.catch_eval_exceptions = False

        algorithm = rval.algo
        new_ids = rval.trials.new_trial_ids(1)
        rval.trials.refresh()
        random_state = rval.rstate.randint(2**31 - 1)
        new_trials = algorithm(new_ids, rval.domain, trials, random_state)
        rval.trials.refresh()

        # Example: {'x': [8.721658602103911]}
        vals = new_trials[0]['misc']['vals']

        #import ipdb;ipdb.set_trace()
        """
    parameter = dict()
    for key in vals:
      try:
        parameter[key] = vals[key][0].item()
      except Exception:
        parameter[key] = None
    """
        """
    trials =rval.trials

    trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0]
    trial['result'] = {'loss': reward, 'status': 'ok'}
    trial['state'] = hp.JOB_STATE_DONE
    trials.insert_trial_docs([trial])
    trials.refresh()
    """
        """
    def _choose_tuner(self, algorithm_name):
      if algorithm_name == 'tpe':
        return hp.tpe.suggest
      if algorithm_name == 'random_search':
        return hp.rand.suggest
      if algorithm_name == 'anneal':
        return hp.anneal.suggest
      raise RuntimeError('Not support tuner algorithm in hyperopt.')
    """

        return_trial_list = []

        study = Study.objects.get(id=study_id)
        study_configuration_json = json.loads(study.study_configuration)
        params = study_configuration_json["params"]

        for i in range(number):
            trial = Trial.create(study.id, "TpeTrial")
            parameter_values_json = {}

            for param in params:

                if param["type"] == "INTEGER" or param[
                        "type"] == "DISCRETE" or param["type"] == "CATEGORICAL":
                    pass

                elif param["type"] == "DOUBLE":
                    # TODO: Get the specified value from hyperopt
                    suggest_value = vals["x"][0]
                    parameter_values_json[
                        param["parameterName"]] = suggest_value

                parameter_values_json[param["parameterName"]] = suggest_value

            trial.parameter_values = json.dumps(parameter_values_json)
            trial.save()
            return_trial_list.append(trial)

        return return_trial_list
예제 #17
0
파일: lrcall.py 프로젝트: yuyuz/FLASH
def main():
    parser = ArgumentParser()

    parser.add_argument('-p',
                        '--space',
                        dest='spaceFile',
                        help='Where is the space.py located?')
    parser.add_argument(
        '--use_optimal_design',
        dest='use_optimal_design',
        help='Use optimal design or pure random initialization?')
    parser.add_argument('--init_budget',
                        dest='init_budget',
                        help='How many evaluations for random burning period?')
    parser.add_argument(
        '--ei_budget',
        dest='ei_budget',
        help='How many evaluations for EI controlled online period?')
    parser.add_argument(
        '--bopt_budget',
        dest='bopt_budget',
        help=
        'How many evaluations for Bayesian optimization after get subspace?')
    parser.add_argument(
        '--ei_xi',
        dest='ei_xi',
        help='What is the exploration parameter for computing EI?')
    parser.add_argument(
        '--top_k_pipelines',
        dest='top_k_pipelines',
        help='How many top (LR predicted) pipelines to cover in subspace?')
    parser.add_argument('-s',
                        '--seed',
                        default='1',
                        dest='seed',
                        type=int,
                        help='Seed for the algorithm')

    parser.add_argument(
        '-a',
        '--algo',
        default='SMAC',
        dest='algo',
        type=str,
        help='Specify the algorithm after LR, can be SMAC or TPE')

    parser.add_argument(
        '-r',
        '--restore',
        action='store_true',
        dest='restore',
        help='When this flag is set state.pkl is restored in ' +
        'the current working directory')
    parser.add_argument('--random',
                        default=False,
                        action='store_true',
                        dest='random',
                        help='Use a random search')
    parser.add_argument('--cwd',
                        help='Change the working directory before '
                        'optimizing.')

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    if not os.path.exists(args.spaceFile):
        logger.critical('Search space not found: %s' % args.spaceFile)
        sys.exit(1)

    # First remove '.py'
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append('./')
    sys.path.append('')

    module = import_module(space)
    search_space = module.space
    ni = [len(d)
          for d in module.layer_dict_list]  # number of units in each layer
    cum_ni = np.cumsum(ni)

    log_filename = 'lr.pkl'

    # Random burning period as initialization
    init_budget = int(args.init_budget)
    if args.use_optimal_design == '1':
        picks = get_random_picks_by_optimal_design(ni, init_budget)
    else:
        picks = get_pure_random_picks(ni, init_budget)
    for i in range(init_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo)
        logger.info('Total evaluation times: %d, valid times: %d' %
                    (times, valid_times))
        logger.info('Random burning period times: %d, valid times: %d' %
                    (times, valid_times))
        subspace = construct_subspace(module, picks[i])
        params = sample(subspace)
        cv.main(params)
    valid_times_in_random_period = get_num_of_trials(log_filename,
                                                     filter_valid=True)

    # Train the first LR model before entering into EI controlled period
    fh = open(log_filename)
    log = cPickle.load(fh)
    trials = log['trials']
    fh.close()
    X = []
    y = []
    y_time = []
    for trial in trials:
        result = trial['result']
        time = trial['duration']
        # make sure the logged result is a number (accept evaluations return 100.0)
        if result <= 100:
            params = trial['params']
            rescaling = params['-rescaling']
            balancing = params['-balancing']
            feat_pre = params['-feat_pre']
            clf = params['-classifier']
            x = [[0] * n for n in ni]
            x[0][module.d_rescaling[rescaling]] = 1
            x[1][module.d_balancing[balancing]] = 1
            x[2][module.d_feat_pre[feat_pre]] = 1
            x[3][module.d_clf[clf]] = 1
            x_flat = np.array(x[0] + x[1] + x[2] + x[3])
            X.append(x_flat)
            y.append(result)
            y_time.append(np.log(time))
    X = np.array(X)
    alpha = 1.0
    lr = linear_model.Ridge(alpha=alpha)
    lr.fit(X, y)
    lr_time = linear_model.Ridge(alpha=alpha)
    lr_time.fit(X, y_time)

    # Online period controlled by EI
    ei_budget = int(args.ei_budget)
    for i in range(ei_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('Total evaluation times: %d, valid times: %d' %
                    (times, valid_times))
        logger.info(
            'EI controlled period times: %d, valid times: %d' %
            (times - init_budget, valid_times - valid_times_in_random_period))
        ebeta = lr.coef_[:cum_ni[0]], \
                lr.coef_[cum_ni[0]:cum_ni[1]], \
                lr.coef_[cum_ni[1]:cum_ni[2]], \
                lr.coef_[cum_ni[2]:]
        logger.info('LR model estimated unit ranking: %s %s %s %s' %
                    (str(ebeta[0].argsort()), str(ebeta[1].argsort()),
                     str(ebeta[2].argsort()), str(ebeta[3].argsort())))
        ebeta_time = lr_time.coef_[:cum_ni[0]], \
                     lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                     lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                     lr_time.coef_[cum_ni[2]:]
        logger.info(
            'LR Time model estimated unit ranking: %s %s %s %s' %
            (str(ebeta_time[0].argsort()), str(ebeta_time[1].argsort()),
             str(ebeta_time[2].argsort()), str(ebeta_time[3].argsort())))
        # pick the best pipeline by EI
        x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y,
                                float(args.ei_xi))
        pick = [[np.argmax(x_next_i)] for x_next_i in x_next]
        subspace = construct_subspace(module, pick)
        params = sample(subspace)
        cv.main(params)

        result, time = get_last_run(log_filename)
        if result <= 100:
            x_next_flat = np.array(x_next[0] + x_next[1] + x_next[2] +
                                   x_next[3])
            X = np.vstack([X, x_next_flat])
            y.append(result)
            y_time.append(np.log(time))
            lr = linear_model.Ridge(alpha=alpha)
            lr.fit(X, y)
            lr_time = linear_model.Ridge(alpha=alpha)
            lr_time.fit(X, y_time)
    valid_times_in_ei_period = get_num_of_trials(
        log_filename, filter_valid=True) - valid_times_in_random_period

    # Construct subspace based on LR prediction
    final_ebeta = lr.coef_[:cum_ni[0]], \
                  lr.coef_[cum_ni[0]:cum_ni[1]], \
                  lr.coef_[cum_ni[1]:cum_ni[2]], \
                  lr.coef_[cum_ni[2]:]
    final_ebeta_time = lr_time.coef_[:cum_ni[0]], \
                       lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                       lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                       lr_time.coef_[cum_ni[2]:]
    final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0,
                                         int(args.top_k_pipelines))
    final_subspace = construct_subspace(module, final_pick)

    logger.info('LR model estimated unit ranking: %s %s %s %s' %
                (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()),
                 str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort())))
    logger.info(
        'LR Time model estimated unit ranking: %s %s %s %s' %
        (str(final_ebeta_time[0].argsort()), str(
            final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()),
         str(final_ebeta_time[3].argsort())))
    logger.info('Selected pipelines: %s %s %s %s' %
                (final_pick[0], final_pick[1], final_pick[2], final_pick[3]))

    # Phase 3 with SMAC
    if args.algo == 'SMAC':
        fh = file('pickup.txt', 'w')
        for layer_pick in final_pick:
            for i in layer_pick:
                fh.write('%d ' % i)
            fh.write('\n')
        fh.close()
        subspace = construct_subspace(module, final_pick)
        new_space = convert_tpe_to_smac_from_object(subspace)
        fh = open('params.pcs', 'w')
        fh.write(new_space)
        fh.close()

    # Phase 3 with TPE
    elif args.algo == 'TPE':
        fn = cv.main
        domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed))
        trials = hyperopt.Trials()
        bopt_budget = int(args.bopt_budget)
        for i in range(bopt_budget):
            times = get_num_of_trials(log_filename, filter_valid=False)
            valid_times = get_num_of_trials(log_filename, filter_valid=True)
            logger.info('Total evaluation times: %d, valid times: %d' %
                        (times, valid_times))
            logger.info(
                'TPE period times: %d, valid times: %d' %
                (times - init_budget - ei_budget, valid_times -
                 valid_times_in_random_period - valid_times_in_ei_period))
            logger.info(
                'LR model estimated unit ranking: %s %s %s %s' %
                (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()),
                 str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort())))
            logger.info('LR Time model estimated unit ranking: %s %s %s %s' %
                        (str(final_ebeta_time[0].argsort()),
                         str(final_ebeta_time[1].argsort()),
                         str(final_ebeta_time[2].argsort()),
                         str(final_ebeta_time[3].argsort())))
            logger.info(
                'Selected pipelines: %s %s %s %s' %
                (final_pick[0], final_pick[1], final_pick[2], final_pick[3]))
            # in exhaust, the number of evaluations is max_evals - num_done
            tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))
            rval = hyperopt.FMinIter(tpe_with_seed,
                                     domain,
                                     trials,
                                     max_evals=i)
            rval.exhaust()
예제 #18
0
def main():
    prog = "python statistics.py WhatIsThis <manyPickles> WhatIsThis <manyPickles> [WhatIsThis <manyPickles>]"
    description = "Return some statistical information"

    parser = ArgumentParser(description=description, prog=prog)

    parser.add_argument("-p",
                        "--space",
                        dest="spaceFile",
                        help="Where is the space.py located?")
    parser.add_argument("-m",
                        "--maxEvals",
                        dest="maxEvals",
                        help="How many evaluations?")
    parser.add_argument("-s",
                        "--seed",
                        default="1",
                        dest="seed",
                        type=int,
                        help="Seed for the TPE algorithm")
    parser.add_argument(
        "-r",
        "--restore",
        action="store_true",
        dest="restore",
        help="When this flag is set state.pkl is restored in " +
        "the current working directory")
    parser.add_argument("--random",
                        default=False,
                        action="store_true",
                        dest="random",
                        help="Use a random search")
    parser.add_argument("--cwd",
                        help="Change the working directory before "
                        "optimizing.")

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    cfg = load_experiment_config_file()
    log_level = cfg.getint("HPOLIB", "HPOlib_loglevel")
    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
                        'message)s',
                        datefmt='%H:%M:%S')
    logger.setLevel(log_level)

    if not os.path.exists(args.spaceFile):
        logger.critical("Search space not found: %s" % args.spaceFile)
        sys.exit(1)

    # First remove ".py"
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append("./")
    sys.path.append("")

    module = import_module(space)
    search_space = module.space

    cli_target = "HPOlib.optimization_interceptor"
    fn = partial(command_line_function, cli_target=cli_target)

    if args.random:
        # We use a random search
        tpe_with_seed = partial(hyperopt.tpe.rand.suggest, seed=int(args.seed))
        logger.info("Using Random Search")
    else:
        tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))

    # Now run TPE, emulate fmin.fmin()
    state_filename = "state.pkl"
    if args.restore:
        # We do not need to care about the state of the trials object since it
        # is only serialized in a synchronized state, there will never be a save
        # with a running experiment
        fh = open(state_filename)
        tmp_dict = cPickle.load(fh)
        domain = tmp_dict['domain']
        trials = tmp_dict['trials']
        print trials.__dict__
    else:
        domain = hyperopt.Domain(fn, search_space, rseed=int(args.seed))
        trials = hyperopt.Trials()
        fh = open(state_filename, "w")
        # By this we probably loose the seed; not too critical for a restart
        cPickle.dump({"trials": trials, "domain": domain}, fh)
        fh.close()

    for i in range(int(args.maxEvals) + 1):
        # in exhaust, the number of evaluations is max_evals - num_done
        rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i)
        rval.exhaust()
        fh = open(state_filename, "w")
        cPickle.dump({"trials": trials, "domain": domain}, fh)
        fh.close()

    best = trials.argmin
    print "Best Value found for params:", best
예제 #19
0
        best_trial = trials.best_trial
    except Exception, e:
        raise ValueError('problem retrieving best trial: %s' % (e))

    dataset_info = {'data_obj': data_obj,
                    'fn_imgs': 'protocol_imgs',
                    'fn_eval': 'protocol_eval'}

    search_space = build_search_space(dataset_info,
                                      learning_algo,
                                      hp_space=hp_space,
                                      n_ok_trials=1000000,
                                      batched_lmap_speed_thresh=speed_thresh)

    ctrl = hyperopt.Ctrl(trials=trials, current_trial=best_trial)
    domain = hyperopt.Domain(objective, search_space)

    best_hps = hyperopt.base.spec_from_misc(best_trial['misc'])

    r_dict = domain.evaluate(best_hps, ctrl, attach_attachments=True)

    if r_dict['status'] == 'ok':
        print '\nperformance according to dataset protocol:\n'
        for key in r_dict:
            if key == 'int_samples':
                if int_samples:
                    interesting_samples(r_dict['int_samples'])
            else:
                print key, pprint.pformat(r_dict[key])
    else:
        print '\n', r_dict['failure']['tb']
예제 #20
0
  def get_new_suggestions(self, study_name, input_trials=[], number=1):
    """
    Get the new suggested trials with TPE algorithm.
    """

    # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)}
    hyperopt_search_space = {}

    study = Study.objects.get(name=study_name)
    study_configuration_json = json.loads(study.study_configuration)
    params = study_configuration_json["params"]

    for param in params:
      param_name = param["parameterName"]

      if param["type"] == "INTEGER":
        # TODO: Support int type of search space)
        pass

      elif param["type"] == "DOUBLE":
        hyperopt_search_space[param_name] = hyperopt.hp.uniform(
            param_name, param["minValue"], param["maxValue"])

      elif param["type"] == "DISCRETE" or param["type"] == "CATEGORICAL":
        feasible_point_list = [
            value.strip() for value in param["feasiblePoints"].split(",")
        ]
        hyperopt_search_space[param_name] = hyperopt.hp.choice(
            param_name, feasible_point_list)

    # New hyperopt variables
    hyperopt_rstate = np.random.RandomState()
    hyperopt_domain = hyperopt.Domain(
        None, hyperopt_search_space, pass_expr_memo_ctrl=None)

    hyperopt_trial_specs = []
    hyperopt_trial_results = []
    # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}]
    hyperopt_trial_miscs = []
    hyperopt_trial_new_ids = []

    # Update hyperopt for trained trials with completed advisor trials
    completed_hyperopt_trials = hyperopt.Trials()

    completed_advisor_trials = Trial.objects.filter(
        study_name=study_name, status="Completed")

    for index, advisor_trial in enumerate(completed_advisor_trials):
      # Example: {"learning_rate": 0.01, "optimizer": "ftrl"}
      parameter_values_json = json.loads(advisor_trial.parameter_values)

      # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}
      hyperopt_trial_miscs_idxs = {}
      # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}
      hyperopt_trial_miscs_vals = {}
      new_id = index
      hyperopt_trial_new_ids.append(new_id)
      hyperopt_trial_misc = dict(
          tid=new_id, cmd=hyperopt_domain.cmd, workdir=hyperopt_domain.workdir)

      for param in params:

        if param["type"] == "INTEGER":
          pass

        elif param["type"] == "DOUBLE":
          parameter_value = parameter_values_json[param["parameterName"]]
          hyperopt_trial_miscs_idxs[param["parameterName"]] = [index]
          hyperopt_trial_miscs_vals[param["parameterName"]] = [parameter_value]

        elif param["type"] == "DISCRETE":
          feasible_points_string = param["feasiblePoints"]
          feasible_points = [
              float(value.strip())
              for value in feasible_points_string.split(",")
          ]
          parameter_value = parameter_values_json[param["parameterName"]]
          index_of_value_in_list = feasible_points.index(parameter_value)
          hyperopt_trial_miscs_idxs[param["parameterName"]] = [index]
          hyperopt_trial_miscs_vals[param["parameterName"]] = [
              index_of_value_in_list
          ]

        elif param["type"] == "CATEGORICAL":
          feasible_points_string = param["feasiblePoints"]
          feasible_points = [
              value.strip() for value in feasible_points_string.split(",")
          ]
          # Example: "ftrl"
          parameter_value = parameter_values_json[param["parameterName"]]
          index_of_value_in_list = feasible_points.index(parameter_value)
          hyperopt_trial_miscs_idxs[param["parameterName"]] = [index]
          hyperopt_trial_miscs_vals[param["parameterName"]] = [
              index_of_value_in_list
          ]

      hyperopt_trial_specs.append(None)

      hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs
      hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals
      hyperopt_trial_miscs.append(hyperopt_trial_misc)

      # TODO: Use negative objective value for loss or not

      loss_for_hyperopt = advisor_trial.objective_value
      if study_configuration_json["goal"] == "MAXIMIZE":
        # Now hyperopt only supports fmin and we need to reverse objective value for maximization
        loss_for_hyperopt = -1 * advisor_trial.objective_value

      hyperopt_trial_result = {
          "loss": loss_for_hyperopt,
          "status": hyperopt.STATUS_OK
      }
      hyperopt_trial_results.append(hyperopt_trial_result)

    if len(completed_advisor_trials) > 0:
      # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None}
      hyperopt_trials = completed_hyperopt_trials.new_trial_docs(
          hyperopt_trial_new_ids, hyperopt_trial_specs, hyperopt_trial_results,
          hyperopt_trial_miscs)
      for current_hyperopt_trials in hyperopt_trials:
        current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE

      completed_hyperopt_trials.insert_trial_docs(hyperopt_trials)
      completed_hyperopt_trials.refresh()

    rval = hyperopt.FMinIter(
        self.hyperopt_algorithm,
        hyperopt_domain,
        completed_hyperopt_trials,
        max_evals=-1,
        rstate=hyperopt_rstate,
        verbose=0)
    rval.catch_eval_exceptions = False

    new_ids = rval.trials.new_trial_ids(number)

    rval.trials.refresh()

    random_state = rval.rstate.randint(2**31 - 1)
    new_trials = self.hyperopt_algorithm(
        new_ids, rval.domain, completed_hyperopt_trials, random_state)
    rval.trials.refresh()

    # Construct return advisor trials from new hyperopt trials
    return_trial_list = []

    for i in range(number):

      # Example: {u'hidden2': [2], u'learning_rate': [0.04633366105812467], u'l1_normalization': [0.16858448611765364], u'optimizer': [3]}
      vals = new_trials[0]['misc']['vals']

      new_advisor_trial = Trial.create(study.name, "TpeTrial")
      parameter_values_json = {}

      for param in params:

        if param["type"] == "INTEGER":
          pass

        elif param["type"] == "DOUBLE":
          suggest_value = vals[param["parameterName"]][0]
          parameter_values_json[param["parameterName"]] = suggest_value

        elif param["type"] == "DISCRETE":
          feasible_point_list = [
              float(value.strip())
              for value in param["feasiblePoints"].split(",")
          ]
          suggest_index = vals[param["parameterName"]][0]
          suggest_value = feasible_point_list[suggest_index]

        elif param["type"] == "CATEGORICAL":
          feasible_point_list = [
              value.strip() for value in param["feasiblePoints"].split(",")
          ]
          suggest_index = vals[param["parameterName"]][0]
          suggest_value = feasible_point_list[suggest_index]

        parameter_values_json[param["parameterName"]] = suggest_value

      new_advisor_trial.parameter_values = json.dumps(parameter_values_json)
      return_trial_list.append(new_advisor_trial)

    return return_trial_list
예제 #21
0
def find_hyperparameters(
        setting, path, space=None, max_evals=100, trials_per_point=30,
        parallelization="sequential",
        objective="max_reward", max_concurrent_jobs=100):
    """
    This function does hyperparameter optimization for RLPy experiments with the
    hyperopt library.
    At the end an instance of the optimization trials is stored in "path"/trials.pck

    :param setting: file specifying the experimental setup.
        It contains a make_experiment function and a dictionary
        named param_space if the argument space is not used.
        For each key of param_space there needs to be an optional
        argument in make_experiment
    :param path: directory used to store all intermediate results.
    :param space: (optional) an alternative specification of the hyperparameter
        space
    :param max_evals: maximum number of evaluations of a single hyperparameter
        setting
    :param trials_per_point: specifies the number of independent runs (with
        different seeds) of the experiment for evaluating a single hyperparameter
        setting.
    :param parallelization: either **sequential**, **joblib**, **condor_all**
        or **condor_full**, **condor**.
        the condor options can be used in a computing cluster with a HTCondor
        machine. The joblib option parallelizes runs on one machine and sequential
        runs every experiment in sequence.
    :param objective: (optional) string specifying the objective to optimize,
        possible values are *max_reward*, *min_steps*, *max_steps*
    :param max_concurrent_jobs: only relevant for condor_full parallelization.
        specifies the maximum number of jobs that should run at the same time.
    :return: a tuple containing the best hyperarameter settings and the hyperopt
        trials instance of the optimization procedure
    """
    if space is None:
        space = import_param_space(setting)

    def f(hyperparam):
        """function to optimize by hyperopt"""

        # "temporary" directory to use
        full_path = os.path.join(
            path,
            "-".join([str(v) for v in hyperparam.values()]))

        # execute experiment
        rt.run(setting, location=full_path, ids=range(1, trials_per_point + 1),
               parallelization=parallelization, force_rerun=False, block=True, **hyperparam)

        # all jobs should be done
        res = tres.load_results(full_path)

        if objective == "max_steps":
            m, s, n = tres.avg_quantity(res, "steps")
            val = -m
            std = s[-1]
        elif objective == "min_steps":
            m, s, n = tres.avg_quantity(res, "steps")
            val = m
            std = s[-1]
        elif objective == "max_reward":
            m, s, n = tres.avg_quantity(res, "return")
            val = -m
            std = s[-1]
        else:
            print "unknown objective"
        weights = (np.arange(len(val)) + 1) ** 2
        loss = (val * weights).sum() / weights.sum()
        print time.ctime()
        print "Parameters", hyperparam
        print "Loss", loss
        # use #steps/eps at the moment
        return {"loss": loss,
                "num_trials": n[-1],
                "status": hyperopt.STATUS_OK,
                "std_last_mean": std}

    if parallelization == "condor_all":
        trials = CondorTrials(path=path, ids=range(1, trials_per_point + 1),
                              setting=setting, objective=objective)
        domain = hyperopt.Domain(dummy_f, space, rseed=123)
        rval = hyperopt.FMinIter(hyperopt.rand.suggest, domain, trials,
                                 max_evals=30,
                                 max_queue_len=30)
        rval.exhaust()
        rval = hyperopt.FMinIter(hyperopt.tpe.suggest, domain, trials,
                                 max_evals=max_evals,
                                 max_queue_len=1)
        rval.exhaust()
        best = trials.argmin
    elif parallelization == "condor_full":
        trials = _search_condor_parallel(path=path, setting=setting,
                                         objective=objective,
                                         space=space, max_evals=max_evals,
                                         trials_per_point=trials_per_point)
        best = trials.argmin
    else:
        trials = hyperopt.Trials()
        best = hyperopt.fmin(f, space=space, algo=hyperopt.tpe.suggest,
                             max_evals=max_evals, trials=trials)

    with open(os.path.join(path, 'trials.pck'), 'w') as f:
        pickle.dump(trials, f)

    return best, trials
예제 #22
0
파일: tpecall.py 프로젝트: bjkomer/HPOlib
def main():
    # Parse options and arguments
    parser = OptionParser()
    parser.add_option("-p",
                      "--space",
                      dest="spaceFile",
                      help="Where is the space.py located?")
    parser.add_option("-a",
                      "--algoExec",
                      dest="algoExec",
                      help="Which function to load located?")
    parser.add_option("-m",
                      "--maxEvals",
                      dest="maxEvals",
                      help="How many evaluations?")
    parser.add_option("-s",
                      "--seed",
                      dest="seed",
                      default="123",
                      type=int,
                      help="Seed for the TPE algorithm")
    parser.add_option("-r",
                      "--restore",
                      dest="restore",
                      action="store_true",
                      help="When this flag is set state.pkl is restored in " +
                      "the current working directory")
    parser.add_option("--random",
                      default=False,
                      dest="random",
                      action="store_true",
                      help="Use a random search")
    (options, args) = parser.parse_args()

    # First remove ".py"
    algo, ext = os.path.splitext(os.path.basename(options.algoExec))
    space, ext = os.path.splitext(os.path.basename(options.spaceFile))

    # Then load dict searchSpace and out function cv.py
    import sys
    sys.path.append("./")
    sys.path.append("")
    print os.getcwd()
    module = import_module(space)
    search_space = module.space
    fn = import_module(algo)
    fn = fn.doForTPE

    if options.random:
        # We use a random search
        suggest = hyperopt.tpe.rand.suggest
    else:
        suggest = hyperopt.tpe.suggest

    rstate = np.random.RandomState(options.seed)

    # Now run TPE, emulate fmin.fmin()
    state_filename = "state.pkl"
    if options.restore:
        # We do not need to care about the state of the trials object since it
        # is only serialized in a synchronized state, there will never be a save
        # with a running experiment
        fh = open(state_filename)
        tmp_dict = cPickle.load(fh)
        domain = tmp_dict['domain']
        trials = tmp_dict['trials']
        rstate = tmp_dict['rstate']
        print trials.__dict__
    else:
        domain = hyperopt.Domain(fn, search_space)
        trials = hyperopt.Trials()
        fh = open(state_filename, "w")
        # By this we probably loose the seed; not too critical for a restart
        cPickle.dump({
            "trials": trials,
            "domain": domain,
            "rstate": rstate
        }, fh)
        fh.close()

    for i in range(int(options.maxEvals) + 1):
        # in exhaust, the number of evaluations is max_evals - num_done
        rval = hyperopt.FMinIter(suggest,
                                 domain,
                                 trials,
                                 max_evals=i,
                                 rstate=rstate)
        rval.exhaust()
        fh = open(state_filename, "w")
        cPickle.dump({
            "trials": trials,
            "domain": domain,
            "rstate": rstate
        }, fh)
        fh.close()

    best = trials.argmin
    print "Best Value found for params:", best
예제 #23
0
    def getSuggestions(self, search_space, trials, request_number):
        """
        Get the new suggested trials with the given algorithm.
        """
        # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)}
        hyperopt_search_space = {}
        for param in search_space.params:
            if param.type == INTEGER:
                hyperopt_search_space[param.name] = hyperopt.hp.quniform(
                    param.name, float(param.min), float(param.max), 1)
            elif param.type == DOUBLE:
                hyperopt_search_space[param.name] = hyperopt.hp.uniform(
                    param.name, float(param.min), float(param.max))
            elif param.type == CATEGORICAL \
                    or param.type == DISCRETE:
                hyperopt_search_space[param.name] = hyperopt.hp.choice(
                    param.name, param.list)
        # New hyperopt variables
        hyperopt_rstate = np.random.RandomState()
        hyperopt_domain = hyperopt.Domain(None,
                                          hyperopt_search_space,
                                          pass_expr_memo_ctrl=None)

        hyperopt_trial_specs = []
        hyperopt_trial_results = []
        # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}]
        hyperopt_trial_miscs = []
        hyperopt_trial_new_ids = []

        # Update hyperopt for trained trials with completed advisor trials
        completed_hyperopt_trials = hyperopt.Trials()
        for trial in trials:
            # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}
            hyperopt_trial_miscs_idxs = {}
            # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}
            hyperopt_trial_miscs_vals = {}
            new_id = trial.name
            hyperopt_trial_new_ids.append(new_id)
            hyperopt_trial_misc = dict(tid=new_id,
                                       cmd=hyperopt_domain.cmd,
                                       workdir=hyperopt_domain.workdir)
            for param in search_space.params:
                parameter_value = None
                for assignment in trial.assignments:
                    if assignment.name == param.name:
                        parameter_value = assignment.value
                        break
                if param.type == INTEGER:
                    hyperopt_trial_miscs_idxs[param.name] = [new_id]
                    hyperopt_trial_miscs_vals[param.name] = [parameter_value]
                elif param.type == DOUBLE:
                    hyperopt_trial_miscs_idxs[param.name] = [new_id]
                    hyperopt_trial_miscs_vals[param.name] = [parameter_value]
                elif param.type == DISCRETE or param.type == CATEGORICAL:
                    index_of_value_in_list = param.list.index(parameter_value)
                    hyperopt_trial_miscs_idxs[param.name] = [trial.name]
                    hyperopt_trial_miscs_vals[param.name] = [
                        index_of_value_in_list
                    ]

            hyperopt_trial_specs.append(None)

            hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs
            hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals
            hyperopt_trial_miscs.append(hyperopt_trial_misc)

            # TODO: Use negative objective value for loss or not
            objective_for_hyperopt = float(trial.target_metric.value)
            if search_space.goal == MAX_GOAL:
                # Now hyperopt only supports fmin and we need to reverse objective value for maximization
                objective_for_hyperopt = -1 * objective_for_hyperopt
            hyperopt_trial_result = {
                "loss": objective_for_hyperopt,
                "status": hyperopt.STATUS_OK
            }
            hyperopt_trial_results.append(hyperopt_trial_result)
        if len(trials) > 0:
            # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None}
            hyperopt_trials = completed_hyperopt_trials.new_trial_docs(
                hyperopt_trial_new_ids, hyperopt_trial_specs,
                hyperopt_trial_results, hyperopt_trial_miscs)
            for current_hyperopt_trials in hyperopt_trials:
                current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE

            completed_hyperopt_trials.insert_trial_docs(hyperopt_trials)
            completed_hyperopt_trials.refresh()

        rval = hyperopt.FMinIter(self.hyperopt_algorithm,
                                 hyperopt_domain,
                                 completed_hyperopt_trials,
                                 max_evals=-1,
                                 rstate=hyperopt_rstate,
                                 verbose=0)
        rval.catch_eval_exceptions = False

        new_ids = rval.trials.new_trial_ids(request_number)

        rval.trials.refresh()

        random_state = rval.rstate.randint(2**31 - 1)
        new_trials = self.hyperopt_algorithm(new_ids, rval.domain,
                                             completed_hyperopt_trials,
                                             random_state)
        rval.trials.refresh()

        # Construct return advisor trials from new hyperopt trials
        list_of_assignments = []
        for i in range(request_number):
            vals = new_trials[i]['misc']['vals']
            list_of_assignments.append(
                BaseHyperoptService.convert(search_space, vals))
        return list_of_assignments
예제 #24
0
def view2(
    host,
    port,
    exp_key,
    bagging_fraction,
    max_n_per_class=None,
    maybe_test_view2=True,
    assume_promising=True,
    tid=None,
    fake=False,
):
    fake = int(fake)
    real_trials = make_trials(host, port, exp_key)
    print 'n. real trials', len(real_trials)
    if tid is None:
        best_trial = real_trials.best_trial
    else:
        try:
            best_trial = [
                t for t in real_trials.trials if t['tid'] == int(tid)
            ][0]
        except IndexError:
            print[t['tid'] for t in real_trials.trials]
    print 'Best trial'
    print ' ["tid"]', best_trial['tid']
    best_result = best_trial['result']
    print 'Best trial had loss', best_result['loss']
    best_trace = best_result.get('trace')
    if 0:
        print ' ["Result trace"]', best_trace
    fake_trials = hyperopt.Trials()
    fn = slm_visitor_lfw_partial(
        max_n_per_class,
        maybe_test_view2=int(maybe_test_view2),
        assume_promising=int(assume_promising),
        foobar_trace_target=list(best_trace),
    )
    #space = search_space
    space = hpconvnet.lfw.build_search_space(
        max_n_features=16000,
        trn='DevTrain',  # -- split used for unsupervised images
        n_unsup=300,  # -- number of images from which to draw patches
        batched_lmap_speed_thresh={
            'seconds': 60,
            'elements': 1
        },
        bagging_fraction=float(bagging_fraction),
    )
    domain = hyperopt.Domain(fn, space, rseed=123)
    domain.rng = None  # -- this rng is never to be used
    if fake:
        ctrl = hyperopt.Ctrl(trials=fake_trials, current_trial=None)
        print 'WARNING: running on fake ctrl object'
    else:
        ctrl = MongoCtrl(trials=real_trials,
                         current_trial=best_trial,
                         read_only=False)

    #real_trials.handle.update(best_result, msg)
    #ctrl.checkpoint(dict(best_trial['result'], foodebug='yes'))
    config = hyperopt.base.spec_from_misc(best_trial['misc'])
    #print 'Config', config
    r_dct = domain.evaluate(config, ctrl, attach_attachments=(not fake))
    print 'r_dct'
    print r_dct
    if fake:
        print 'WARNING: running on fake ctrl object, not saving result'
        attachments = r_dct.pop('attachments', {})
        print 'Attachments:', attachments.keys()
        print ' ["Best Result trace"]'

        def print_trace(r):
            trace = r['trace']
            for t in trace:
                print '  ', t

        print_trace(best_result)
        print ' ["result trace"]'
        print_trace(r_dct)
    else:
        # -- the loss should have been re-computed identically
        r_dct['view2_recalculated_loss'] = r_dct['loss']
        r_dct['loss'] = best_result['loss']
        if (r_dct['view2_recalculated_loss'] > best_result['loss']):
            print 'WARNING: recalculated loss was worst than loss during search'
            print ' -> original loss', best_result['loss']
            print ' -> recalculated loss', r_dct['view2_recalculated_loss']

        print 'Checkpointing back to db'
        # -- N.B. attachments should have been saved by Domain.evaluate,
        #    since we called it with attach_attachments=True. So they should
        #    not be here anymore.
        assert 'attachments' not in r_dct

        ctrl.checkpoint(r_dct)