コード例 #1
0
    def update_search_space(self, search_space):
        """
        Update search space definition in tuner by search_space in parameters.

        Will called when first setup experiemnt or update search space in WebUI.

        Parameters
        ----------
        search_space : dict
        """
        self.json = search_space

        search_space_instance = json2space(self.json)
        rstate = np.random.RandomState()
        trials = hp.Trials()
        domain = hp.Domain(None,
                           search_space_instance,
                           pass_expr_memo_ctrl=None)
        algorithm = self._choose_tuner(self.algorithm_name)
        self.rval = hp.FMinIter(algorithm,
                                domain,
                                trials,
                                max_evals=-1,
                                rstate=rstate,
                                verbose=0)
        self.rval.catch_eval_exceptions = False
コード例 #2
0
ファイル: base_service.py プロジェクト: zuston/katib
    def create_fmin(self):
        self.fmin = hyperopt.FMinIter(self.hyperopt_algorithm,
                                      self.hyperopt_domain,
                                      trials=hyperopt.Trials(),
                                      max_evals=-1,
                                      rstate=self.hyperopt_rstate,
                                      verbose=False)

        self.fmin.catch_eval_exceptions = False
コード例 #3
0
    def get_suggestions(self,
                        configs: List[Dict] = None,
                        metrics: List[float] = None) -> List[Dict]:
        if not self.config.num_runs:
            raise ValueError("This search strategy requires `num_runs`.")
        suggestions = []
        rand_generator = get_random_generator(seed=self.config.seed)
        hyperopt_domain = hyperopt.Domain(None,
                                          self._search_space,
                                          pass_expr_memo_ctrl=None)

        hyperopt_trials = self._get_previous_observations(
            hyperopt_domain=hyperopt_domain, configs=configs, metrics=metrics)
        is_first = not all([configs, metrics])

        minimize = hyperopt.FMinIter(
            self.config.algorithm,
            hyperopt_domain,
            hyperopt_trials,
            max_evals=-1,
            rstate=rand_generator,
            verbose=0,
        )

        minimize.catch_eval_exceptions = False
        new_ids = minimize.trials.new_trial_ids(self.config.num_runs)
        minimize.trials.refresh()
        random_state = minimize.rstate.randint(2**31 - 1)
        new_trials = self.run_algorithm(is_first, new_ids, minimize.domain,
                                        hyperopt_trials, random_state)
        minimize.trials.refresh()

        for tid in range(self.config.num_runs):
            vals = new_trials[tid]["misc"]["vals"]
            suggestion = {}
            for param in vals:
                observation_value = vals[param][0]
                if param in self._param_to_value:
                    value = self._param_to_value[param][observation_value]
                    suggestion[param] = value
                else:
                    suggestion[param] = observation_value

            suggestions.append(suggestion)

        return suggestions
コード例 #4
0
    def update_search_space(self, search_space):
        validate_search_space(search_space)
        self.json = search_space

        search_space_instance = json2space(self.json)
        rstate = np.random.RandomState()
        trials = hp.Trials()
        domain = hp.Domain(None,
                           search_space_instance,
                           pass_expr_memo_ctrl=None)
        algorithm = self._choose_tuner(self.algorithm_name)
        self.rval = hp.FMinIter(algorithm,
                                domain,
                                trials,
                                max_evals=-1,
                                rstate=rstate,
                                verbose=0)
        self.rval.catch_eval_exceptions = False
コード例 #5
0
  def get_new_suggestions(self, study_name, input_trials=[], number=1):
    """
    Get the new suggested trials with TPE algorithm.
    """

    # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)}
    hyperopt_search_space = {}

    study = Study.objects.get(name=study_name)
    study_configuration_json = json.loads(study.study_configuration)
    params = study_configuration_json["params"]

    for param in params:
      param_name = param["parameterName"]

      if param["type"] == "INTEGER":
        # TODO: Support int type of search space)
        pass

      elif param["type"] == "DOUBLE":
        hyperopt_search_space[param_name] = hyperopt.hp.uniform(
            param_name, param["minValue"], param["maxValue"])

      elif param["type"] == "DISCRETE" or param["type"] == "CATEGORICAL":
        feasible_point_list = [
            value.strip() for value in param["feasiblePoints"].split(",")
        ]
        hyperopt_search_space[param_name] = hyperopt.hp.choice(
            param_name, feasible_point_list)

    # New hyperopt variables
    hyperopt_rstate = np.random.RandomState()
    hyperopt_domain = hyperopt.Domain(
        None, hyperopt_search_space, pass_expr_memo_ctrl=None)

    hyperopt_trial_specs = []
    hyperopt_trial_results = []
    # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}]
    hyperopt_trial_miscs = []
    hyperopt_trial_new_ids = []

    # Update hyperopt for trained trials with completed advisor trials
    completed_hyperopt_trials = hyperopt.Trials()

    completed_advisor_trials = Trial.objects.filter(
        study_name=study_name, status="Completed")

    for index, advisor_trial in enumerate(completed_advisor_trials):
      # Example: {"learning_rate": 0.01, "optimizer": "ftrl"}
      parameter_values_json = json.loads(advisor_trial.parameter_values)

      # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}
      hyperopt_trial_miscs_idxs = {}
      # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}
      hyperopt_trial_miscs_vals = {}
      new_id = index
      hyperopt_trial_new_ids.append(new_id)
      hyperopt_trial_misc = dict(
          tid=new_id, cmd=hyperopt_domain.cmd, workdir=hyperopt_domain.workdir)

      for param in params:

        if param["type"] == "INTEGER":
          pass

        elif param["type"] == "DOUBLE":
          parameter_value = parameter_values_json[param["parameterName"]]
          hyperopt_trial_miscs_idxs[param["parameterName"]] = [index]
          hyperopt_trial_miscs_vals[param["parameterName"]] = [parameter_value]

        elif param["type"] == "DISCRETE":
          feasible_points_string = param["feasiblePoints"]
          feasible_points = [
              float(value.strip())
              for value in feasible_points_string.split(",")
          ]
          parameter_value = parameter_values_json[param["parameterName"]]
          index_of_value_in_list = feasible_points.index(parameter_value)
          hyperopt_trial_miscs_idxs[param["parameterName"]] = [index]
          hyperopt_trial_miscs_vals[param["parameterName"]] = [
              index_of_value_in_list
          ]

        elif param["type"] == "CATEGORICAL":
          feasible_points_string = param["feasiblePoints"]
          feasible_points = [
              value.strip() for value in feasible_points_string.split(",")
          ]
          # Example: "ftrl"
          parameter_value = parameter_values_json[param["parameterName"]]
          index_of_value_in_list = feasible_points.index(parameter_value)
          hyperopt_trial_miscs_idxs[param["parameterName"]] = [index]
          hyperopt_trial_miscs_vals[param["parameterName"]] = [
              index_of_value_in_list
          ]

      hyperopt_trial_specs.append(None)

      hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs
      hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals
      hyperopt_trial_miscs.append(hyperopt_trial_misc)

      # TODO: Use negative objective value for loss or not

      loss_for_hyperopt = advisor_trial.objective_value
      if study_configuration_json["goal"] == "MAXIMIZE":
        # Now hyperopt only supports fmin and we need to reverse objective value for maximization
        loss_for_hyperopt = -1 * advisor_trial.objective_value

      hyperopt_trial_result = {
          "loss": loss_for_hyperopt,
          "status": hyperopt.STATUS_OK
      }
      hyperopt_trial_results.append(hyperopt_trial_result)

    if len(completed_advisor_trials) > 0:
      # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None}
      hyperopt_trials = completed_hyperopt_trials.new_trial_docs(
          hyperopt_trial_new_ids, hyperopt_trial_specs, hyperopt_trial_results,
          hyperopt_trial_miscs)
      for current_hyperopt_trials in hyperopt_trials:
        current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE

      completed_hyperopt_trials.insert_trial_docs(hyperopt_trials)
      completed_hyperopt_trials.refresh()

    rval = hyperopt.FMinIter(
        self.hyperopt_algorithm,
        hyperopt_domain,
        completed_hyperopt_trials,
        max_evals=-1,
        rstate=hyperopt_rstate,
        verbose=0)
    rval.catch_eval_exceptions = False

    new_ids = rval.trials.new_trial_ids(number)

    rval.trials.refresh()

    random_state = rval.rstate.randint(2**31 - 1)
    new_trials = self.hyperopt_algorithm(
        new_ids, rval.domain, completed_hyperopt_trials, random_state)
    rval.trials.refresh()

    # Construct return advisor trials from new hyperopt trials
    return_trial_list = []

    for i in range(number):

      # Example: {u'hidden2': [2], u'learning_rate': [0.04633366105812467], u'l1_normalization': [0.16858448611765364], u'optimizer': [3]}
      vals = new_trials[0]['misc']['vals']

      new_advisor_trial = Trial.create(study.name, "TpeTrial")
      parameter_values_json = {}

      for param in params:

        if param["type"] == "INTEGER":
          pass

        elif param["type"] == "DOUBLE":
          suggest_value = vals[param["parameterName"]][0]
          parameter_values_json[param["parameterName"]] = suggest_value

        elif param["type"] == "DISCRETE":
          feasible_point_list = [
              float(value.strip())
              for value in param["feasiblePoints"].split(",")
          ]
          suggest_index = vals[param["parameterName"]][0]
          suggest_value = feasible_point_list[suggest_index]

        elif param["type"] == "CATEGORICAL":
          feasible_point_list = [
              value.strip() for value in param["feasiblePoints"].split(",")
          ]
          suggest_index = vals[param["parameterName"]][0]
          suggest_value = feasible_point_list[suggest_index]

        parameter_values_json[param["parameterName"]] = suggest_value

      new_advisor_trial.parameter_values = json.dumps(parameter_values_json)
      return_trial_list.append(new_advisor_trial)

    return return_trial_list
コード例 #6
0
def main():
    prog = "python statistics.py WhatIsThis <manyPickles> WhatIsThis <manyPickles> [WhatIsThis <manyPickles>]"
    description = "Return some statistical information"

    parser = ArgumentParser(description=description, prog=prog)

    parser.add_argument("-p",
                        "--space",
                        dest="spaceFile",
                        help="Where is the space.py located?")
    parser.add_argument("-m",
                        "--maxEvals",
                        dest="maxEvals",
                        help="How many evaluations?")
    parser.add_argument("-s",
                        "--seed",
                        default="1",
                        dest="seed",
                        type=int,
                        help="Seed for the TPE algorithm")
    parser.add_argument(
        "-r",
        "--restore",
        action="store_true",
        dest="restore",
        help="When this flag is set state.pkl is restored in " +
        "the current working directory")
    parser.add_argument("--random",
                        default=False,
                        action="store_true",
                        dest="random",
                        help="Use a random search")
    parser.add_argument("--cwd",
                        help="Change the working directory before "
                        "optimizing.")

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    cfg = load_experiment_config_file()
    log_level = cfg.getint("HPOLIB", "HPOlib_loglevel")
    logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %('
                        'message)s',
                        datefmt='%H:%M:%S')
    logger.setLevel(log_level)

    if not os.path.exists(args.spaceFile):
        logger.critical("Search space not found: %s" % args.spaceFile)
        sys.exit(1)

    # First remove ".py"
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append("./")
    sys.path.append("")

    module = import_module(space)
    search_space = module.space

    cli_target = "HPOlib.optimization_interceptor"
    fn = partial(command_line_function, cli_target=cli_target)

    if args.random:
        # We use a random search
        tpe_with_seed = partial(hyperopt.tpe.rand.suggest, seed=int(args.seed))
        logger.info("Using Random Search")
    else:
        tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))

    # Now run TPE, emulate fmin.fmin()
    state_filename = "state.pkl"
    if args.restore:
        # We do not need to care about the state of the trials object since it
        # is only serialized in a synchronized state, there will never be a save
        # with a running experiment
        fh = open(state_filename)
        tmp_dict = cPickle.load(fh)
        domain = tmp_dict['domain']
        trials = tmp_dict['trials']
        print trials.__dict__
    else:
        domain = hyperopt.Domain(fn, search_space, rseed=int(args.seed))
        trials = hyperopt.Trials()
        fh = open(state_filename, "w")
        # By this we probably loose the seed; not too critical for a restart
        cPickle.dump({"trials": trials, "domain": domain}, fh)
        fh.close()

    for i in range(int(args.maxEvals) + 1):
        # in exhaust, the number of evaluations is max_evals - num_done
        rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i)
        rval.exhaust()
        fh = open(state_filename, "w")
        cPickle.dump({"trials": trials, "domain": domain}, fh)
        fh.close()

    best = trials.argmin
    print "Best Value found for params:", best
コード例 #7
0
    def getSuggestions(self, search_space, trials, request_number):
        """
        Get the new suggested trials with the given algorithm.
        """
        # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)}
        hyperopt_search_space = {}
        for param in search_space.params:
            if param.type == INTEGER:
                hyperopt_search_space[param.name] = hyperopt.hp.quniform(
                    param.name, float(param.min), float(param.max), 1)
            elif param.type == DOUBLE:
                hyperopt_search_space[param.name] = hyperopt.hp.uniform(
                    param.name, float(param.min), float(param.max))
            elif param.type == CATEGORICAL \
                    or param.type == DISCRETE:
                hyperopt_search_space[param.name] = hyperopt.hp.choice(
                    param.name, param.list)
        # New hyperopt variables
        hyperopt_rstate = np.random.RandomState()
        hyperopt_domain = hyperopt.Domain(None,
                                          hyperopt_search_space,
                                          pass_expr_memo_ctrl=None)

        hyperopt_trial_specs = []
        hyperopt_trial_results = []
        # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}]
        hyperopt_trial_miscs = []
        hyperopt_trial_new_ids = []

        # Update hyperopt for trained trials with completed advisor trials
        completed_hyperopt_trials = hyperopt.Trials()
        for trial in trials:
            # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}
            hyperopt_trial_miscs_idxs = {}
            # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}
            hyperopt_trial_miscs_vals = {}
            new_id = trial.name
            hyperopt_trial_new_ids.append(new_id)
            hyperopt_trial_misc = dict(tid=new_id,
                                       cmd=hyperopt_domain.cmd,
                                       workdir=hyperopt_domain.workdir)
            for param in search_space.params:
                parameter_value = None
                for assignment in trial.assignments:
                    if assignment.name == param.name:
                        parameter_value = assignment.value
                        break
                if param.type == INTEGER:
                    hyperopt_trial_miscs_idxs[param.name] = [new_id]
                    hyperopt_trial_miscs_vals[param.name] = [parameter_value]
                elif param.type == DOUBLE:
                    hyperopt_trial_miscs_idxs[param.name] = [new_id]
                    hyperopt_trial_miscs_vals[param.name] = [parameter_value]
                elif param.type == DISCRETE or param.type == CATEGORICAL:
                    index_of_value_in_list = param.list.index(parameter_value)
                    hyperopt_trial_miscs_idxs[param.name] = [trial.name]
                    hyperopt_trial_miscs_vals[param.name] = [
                        index_of_value_in_list
                    ]

            hyperopt_trial_specs.append(None)

            hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs
            hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals
            hyperopt_trial_miscs.append(hyperopt_trial_misc)

            # TODO: Use negative objective value for loss or not
            objective_for_hyperopt = float(trial.target_metric.value)
            if search_space.goal == MAX_GOAL:
                # Now hyperopt only supports fmin and we need to reverse objective value for maximization
                objective_for_hyperopt = -1 * objective_for_hyperopt
            hyperopt_trial_result = {
                "loss": objective_for_hyperopt,
                "status": hyperopt.STATUS_OK
            }
            hyperopt_trial_results.append(hyperopt_trial_result)
        if len(trials) > 0:
            # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None}
            hyperopt_trials = completed_hyperopt_trials.new_trial_docs(
                hyperopt_trial_new_ids, hyperopt_trial_specs,
                hyperopt_trial_results, hyperopt_trial_miscs)
            for current_hyperopt_trials in hyperopt_trials:
                current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE

            completed_hyperopt_trials.insert_trial_docs(hyperopt_trials)
            completed_hyperopt_trials.refresh()

        rval = hyperopt.FMinIter(self.hyperopt_algorithm,
                                 hyperopt_domain,
                                 completed_hyperopt_trials,
                                 max_evals=-1,
                                 rstate=hyperopt_rstate,
                                 verbose=0)
        rval.catch_eval_exceptions = False

        new_ids = rval.trials.new_trial_ids(request_number)

        rval.trials.refresh()

        random_state = rval.rstate.randint(2**31 - 1)
        new_trials = self.hyperopt_algorithm(new_ids, rval.domain,
                                             completed_hyperopt_trials,
                                             random_state)
        rval.trials.refresh()

        # Construct return advisor trials from new hyperopt trials
        list_of_assignments = []
        for i in range(request_number):
            vals = new_trials[i]['misc']['vals']
            list_of_assignments.append(
                BaseHyperoptService.convert(search_space, vals))
        return list_of_assignments
コード例 #8
0
def find_hyperparameters(
        setting, path, space=None, max_evals=100, trials_per_point=30,
        parallelization="sequential",
        objective="max_reward", max_concurrent_jobs=100):
    """
    This function does hyperparameter optimization for RLPy experiments with the
    hyperopt library.
    At the end an instance of the optimization trials is stored in "path"/trials.pck

    :param setting: file specifying the experimental setup.
        It contains a make_experiment function and a dictionary
        named param_space if the argument space is not used.
        For each key of param_space there needs to be an optional
        argument in make_experiment
    :param path: directory used to store all intermediate results.
    :param space: (optional) an alternative specification of the hyperparameter
        space
    :param max_evals: maximum number of evaluations of a single hyperparameter
        setting
    :param trials_per_point: specifies the number of independent runs (with
        different seeds) of the experiment for evaluating a single hyperparameter
        setting.
    :param parallelization: either **sequential**, **joblib**, **condor_all**
        or **condor_full**, **condor**.
        the condor options can be used in a computing cluster with a HTCondor
        machine. The joblib option parallelizes runs on one machine and sequential
        runs every experiment in sequence.
    :param objective: (optional) string specifying the objective to optimize,
        possible values are *max_reward*, *min_steps*, *max_steps*
    :param max_concurrent_jobs: only relevant for condor_full parallelization.
        specifies the maximum number of jobs that should run at the same time.
    :return: a tuple containing the best hyperarameter settings and the hyperopt
        trials instance of the optimization procedure
    """
    if space is None:
        space = import_param_space(setting)

    def f(hyperparam):
        """function to optimize by hyperopt"""

        # "temporary" directory to use
        full_path = os.path.join(
            path,
            "-".join([str(v) for v in hyperparam.values()]))

        # execute experiment
        rt.run(setting, location=full_path, ids=range(1, trials_per_point + 1),
               parallelization=parallelization, force_rerun=False, block=True, **hyperparam)

        # all jobs should be done
        res = tres.load_results(full_path)

        if objective == "max_steps":
            m, s, n = tres.avg_quantity(res, "steps")
            val = -m
            std = s[-1]
        elif objective == "min_steps":
            m, s, n = tres.avg_quantity(res, "steps")
            val = m
            std = s[-1]
        elif objective == "max_reward":
            m, s, n = tres.avg_quantity(res, "return")
            val = -m
            std = s[-1]
        else:
            print "unknown objective"
        weights = (np.arange(len(val)) + 1) ** 2
        loss = (val * weights).sum() / weights.sum()
        print time.ctime()
        print "Parameters", hyperparam
        print "Loss", loss
        # use #steps/eps at the moment
        return {"loss": loss,
                "num_trials": n[-1],
                "status": hyperopt.STATUS_OK,
                "std_last_mean": std}

    if parallelization == "condor_all":
        trials = CondorTrials(path=path, ids=range(1, trials_per_point + 1),
                              setting=setting, objective=objective)
        domain = hyperopt.Domain(dummy_f, space, rseed=123)
        rval = hyperopt.FMinIter(hyperopt.rand.suggest, domain, trials,
                                 max_evals=30,
                                 max_queue_len=30)
        rval.exhaust()
        rval = hyperopt.FMinIter(hyperopt.tpe.suggest, domain, trials,
                                 max_evals=max_evals,
                                 max_queue_len=1)
        rval.exhaust()
        best = trials.argmin
    elif parallelization == "condor_full":
        trials = _search_condor_parallel(path=path, setting=setting,
                                         objective=objective,
                                         space=space, max_evals=max_evals,
                                         trials_per_point=trials_per_point)
        best = trials.argmin
    else:
        trials = hyperopt.Trials()
        best = hyperopt.fmin(f, space=space, algo=hyperopt.tpe.suggest,
                             max_evals=max_evals, trials=trials)

    with open(os.path.join(path, 'trials.pck'), 'w') as f:
        pickle.dump(trials, f)

    return best, trials
コード例 #9
0
ファイル: tpecall.py プロジェクト: bjkomer/HPOlib
def main():
    # Parse options and arguments
    parser = OptionParser()
    parser.add_option("-p",
                      "--space",
                      dest="spaceFile",
                      help="Where is the space.py located?")
    parser.add_option("-a",
                      "--algoExec",
                      dest="algoExec",
                      help="Which function to load located?")
    parser.add_option("-m",
                      "--maxEvals",
                      dest="maxEvals",
                      help="How many evaluations?")
    parser.add_option("-s",
                      "--seed",
                      dest="seed",
                      default="123",
                      type=int,
                      help="Seed for the TPE algorithm")
    parser.add_option("-r",
                      "--restore",
                      dest="restore",
                      action="store_true",
                      help="When this flag is set state.pkl is restored in " +
                      "the current working directory")
    parser.add_option("--random",
                      default=False,
                      dest="random",
                      action="store_true",
                      help="Use a random search")
    (options, args) = parser.parse_args()

    # First remove ".py"
    algo, ext = os.path.splitext(os.path.basename(options.algoExec))
    space, ext = os.path.splitext(os.path.basename(options.spaceFile))

    # Then load dict searchSpace and out function cv.py
    import sys
    sys.path.append("./")
    sys.path.append("")
    print os.getcwd()
    module = import_module(space)
    search_space = module.space
    fn = import_module(algo)
    fn = fn.doForTPE

    if options.random:
        # We use a random search
        suggest = hyperopt.tpe.rand.suggest
    else:
        suggest = hyperopt.tpe.suggest

    rstate = np.random.RandomState(options.seed)

    # Now run TPE, emulate fmin.fmin()
    state_filename = "state.pkl"
    if options.restore:
        # We do not need to care about the state of the trials object since it
        # is only serialized in a synchronized state, there will never be a save
        # with a running experiment
        fh = open(state_filename)
        tmp_dict = cPickle.load(fh)
        domain = tmp_dict['domain']
        trials = tmp_dict['trials']
        rstate = tmp_dict['rstate']
        print trials.__dict__
    else:
        domain = hyperopt.Domain(fn, search_space)
        trials = hyperopt.Trials()
        fh = open(state_filename, "w")
        # By this we probably loose the seed; not too critical for a restart
        cPickle.dump({
            "trials": trials,
            "domain": domain,
            "rstate": rstate
        }, fh)
        fh.close()

    for i in range(int(options.maxEvals) + 1):
        # in exhaust, the number of evaluations is max_evals - num_done
        rval = hyperopt.FMinIter(suggest,
                                 domain,
                                 trials,
                                 max_evals=i,
                                 rstate=rstate)
        rval.exhaust()
        fh = open(state_filename, "w")
        cPickle.dump({
            "trials": trials,
            "domain": domain,
            "rstate": rstate
        }, fh)
        fh.close()

    best = trials.argmin
    print "Best Value found for params:", best
コード例 #10
0
ファイル: lrcall.py プロジェクト: yuyuz/FLASH
def main():
    parser = ArgumentParser()

    parser.add_argument('-p',
                        '--space',
                        dest='spaceFile',
                        help='Where is the space.py located?')
    parser.add_argument(
        '--use_optimal_design',
        dest='use_optimal_design',
        help='Use optimal design or pure random initialization?')
    parser.add_argument('--init_budget',
                        dest='init_budget',
                        help='How many evaluations for random burning period?')
    parser.add_argument(
        '--ei_budget',
        dest='ei_budget',
        help='How many evaluations for EI controlled online period?')
    parser.add_argument(
        '--bopt_budget',
        dest='bopt_budget',
        help=
        'How many evaluations for Bayesian optimization after get subspace?')
    parser.add_argument(
        '--ei_xi',
        dest='ei_xi',
        help='What is the exploration parameter for computing EI?')
    parser.add_argument(
        '--top_k_pipelines',
        dest='top_k_pipelines',
        help='How many top (LR predicted) pipelines to cover in subspace?')
    parser.add_argument('-s',
                        '--seed',
                        default='1',
                        dest='seed',
                        type=int,
                        help='Seed for the algorithm')

    parser.add_argument(
        '-a',
        '--algo',
        default='SMAC',
        dest='algo',
        type=str,
        help='Specify the algorithm after LR, can be SMAC or TPE')

    parser.add_argument(
        '-r',
        '--restore',
        action='store_true',
        dest='restore',
        help='When this flag is set state.pkl is restored in ' +
        'the current working directory')
    parser.add_argument('--random',
                        default=False,
                        action='store_true',
                        dest='random',
                        help='Use a random search')
    parser.add_argument('--cwd',
                        help='Change the working directory before '
                        'optimizing.')

    args, unknown = parser.parse_known_args()

    if args.cwd:
        os.chdir(args.cwd)

    if not os.path.exists(args.spaceFile):
        logger.critical('Search space not found: %s' % args.spaceFile)
        sys.exit(1)

    # First remove '.py'
    space, ext = os.path.splitext(os.path.basename(args.spaceFile))

    # Then load dict searchSpace and out function cv.py
    sys.path.append('./')
    sys.path.append('')

    module = import_module(space)
    search_space = module.space
    ni = [len(d)
          for d in module.layer_dict_list]  # number of units in each layer
    cum_ni = np.cumsum(ni)

    log_filename = 'lr.pkl'

    # Random burning period as initialization
    init_budget = int(args.init_budget)
    if args.use_optimal_design == '1':
        picks = get_random_picks_by_optimal_design(ni, init_budget)
    else:
        picks = get_pure_random_picks(ni, init_budget)
    for i in range(init_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo)
        logger.info('Total evaluation times: %d, valid times: %d' %
                    (times, valid_times))
        logger.info('Random burning period times: %d, valid times: %d' %
                    (times, valid_times))
        subspace = construct_subspace(module, picks[i])
        params = sample(subspace)
        cv.main(params)
    valid_times_in_random_period = get_num_of_trials(log_filename,
                                                     filter_valid=True)

    # Train the first LR model before entering into EI controlled period
    fh = open(log_filename)
    log = cPickle.load(fh)
    trials = log['trials']
    fh.close()
    X = []
    y = []
    y_time = []
    for trial in trials:
        result = trial['result']
        time = trial['duration']
        # make sure the logged result is a number (accept evaluations return 100.0)
        if result <= 100:
            params = trial['params']
            rescaling = params['-rescaling']
            balancing = params['-balancing']
            feat_pre = params['-feat_pre']
            clf = params['-classifier']
            x = [[0] * n for n in ni]
            x[0][module.d_rescaling[rescaling]] = 1
            x[1][module.d_balancing[balancing]] = 1
            x[2][module.d_feat_pre[feat_pre]] = 1
            x[3][module.d_clf[clf]] = 1
            x_flat = np.array(x[0] + x[1] + x[2] + x[3])
            X.append(x_flat)
            y.append(result)
            y_time.append(np.log(time))
    X = np.array(X)
    alpha = 1.0
    lr = linear_model.Ridge(alpha=alpha)
    lr.fit(X, y)
    lr_time = linear_model.Ridge(alpha=alpha)
    lr_time.fit(X, y_time)

    # Online period controlled by EI
    ei_budget = int(args.ei_budget)
    for i in range(ei_budget):
        times = get_num_of_trials(log_filename, filter_valid=False)
        valid_times = get_num_of_trials(log_filename, filter_valid=True)
        logger.info('Total evaluation times: %d, valid times: %d' %
                    (times, valid_times))
        logger.info(
            'EI controlled period times: %d, valid times: %d' %
            (times - init_budget, valid_times - valid_times_in_random_period))
        ebeta = lr.coef_[:cum_ni[0]], \
                lr.coef_[cum_ni[0]:cum_ni[1]], \
                lr.coef_[cum_ni[1]:cum_ni[2]], \
                lr.coef_[cum_ni[2]:]
        logger.info('LR model estimated unit ranking: %s %s %s %s' %
                    (str(ebeta[0].argsort()), str(ebeta[1].argsort()),
                     str(ebeta[2].argsort()), str(ebeta[3].argsort())))
        ebeta_time = lr_time.coef_[:cum_ni[0]], \
                     lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                     lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                     lr_time.coef_[cum_ni[2]:]
        logger.info(
            'LR Time model estimated unit ranking: %s %s %s %s' %
            (str(ebeta_time[0].argsort()), str(ebeta_time[1].argsort()),
             str(ebeta_time[2].argsort()), str(ebeta_time[3].argsort())))
        # pick the best pipeline by EI
        x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y,
                                float(args.ei_xi))
        pick = [[np.argmax(x_next_i)] for x_next_i in x_next]
        subspace = construct_subspace(module, pick)
        params = sample(subspace)
        cv.main(params)

        result, time = get_last_run(log_filename)
        if result <= 100:
            x_next_flat = np.array(x_next[0] + x_next[1] + x_next[2] +
                                   x_next[3])
            X = np.vstack([X, x_next_flat])
            y.append(result)
            y_time.append(np.log(time))
            lr = linear_model.Ridge(alpha=alpha)
            lr.fit(X, y)
            lr_time = linear_model.Ridge(alpha=alpha)
            lr_time.fit(X, y_time)
    valid_times_in_ei_period = get_num_of_trials(
        log_filename, filter_valid=True) - valid_times_in_random_period

    # Construct subspace based on LR prediction
    final_ebeta = lr.coef_[:cum_ni[0]], \
                  lr.coef_[cum_ni[0]:cum_ni[1]], \
                  lr.coef_[cum_ni[1]:cum_ni[2]], \
                  lr.coef_[cum_ni[2]:]
    final_ebeta_time = lr_time.coef_[:cum_ni[0]], \
                       lr_time.coef_[cum_ni[0]:cum_ni[1]], \
                       lr_time.coef_[cum_ni[1]:cum_ni[2]], \
                       lr_time.coef_[cum_ni[2]:]
    final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0,
                                         int(args.top_k_pipelines))
    final_subspace = construct_subspace(module, final_pick)

    logger.info('LR model estimated unit ranking: %s %s %s %s' %
                (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()),
                 str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort())))
    logger.info(
        'LR Time model estimated unit ranking: %s %s %s %s' %
        (str(final_ebeta_time[0].argsort()), str(
            final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()),
         str(final_ebeta_time[3].argsort())))
    logger.info('Selected pipelines: %s %s %s %s' %
                (final_pick[0], final_pick[1], final_pick[2], final_pick[3]))

    # Phase 3 with SMAC
    if args.algo == 'SMAC':
        fh = file('pickup.txt', 'w')
        for layer_pick in final_pick:
            for i in layer_pick:
                fh.write('%d ' % i)
            fh.write('\n')
        fh.close()
        subspace = construct_subspace(module, final_pick)
        new_space = convert_tpe_to_smac_from_object(subspace)
        fh = open('params.pcs', 'w')
        fh.write(new_space)
        fh.close()

    # Phase 3 with TPE
    elif args.algo == 'TPE':
        fn = cv.main
        domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed))
        trials = hyperopt.Trials()
        bopt_budget = int(args.bopt_budget)
        for i in range(bopt_budget):
            times = get_num_of_trials(log_filename, filter_valid=False)
            valid_times = get_num_of_trials(log_filename, filter_valid=True)
            logger.info('Total evaluation times: %d, valid times: %d' %
                        (times, valid_times))
            logger.info(
                'TPE period times: %d, valid times: %d' %
                (times - init_budget - ei_budget, valid_times -
                 valid_times_in_random_period - valid_times_in_ei_period))
            logger.info(
                'LR model estimated unit ranking: %s %s %s %s' %
                (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()),
                 str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort())))
            logger.info('LR Time model estimated unit ranking: %s %s %s %s' %
                        (str(final_ebeta_time[0].argsort()),
                         str(final_ebeta_time[1].argsort()),
                         str(final_ebeta_time[2].argsort()),
                         str(final_ebeta_time[3].argsort())))
            logger.info(
                'Selected pipelines: %s %s %s %s' %
                (final_pick[0], final_pick[1], final_pick[2], final_pick[3]))
            # in exhaust, the number of evaluations is max_evals - num_done
            tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed))
            rval = hyperopt.FMinIter(tpe_with_seed,
                                     domain,
                                     trials,
                                     max_evals=i)
            rval.exhaust()
コード例 #11
0
ファイル: tpe.py プロジェクト: 2793145003/advisor
    def get_new_suggestions(self, study_id, trials=[], number=1):
        """
    Get the new suggested trials with random search.
    """

        search_space = hyperopt.hp.uniform('x', -10, 10)

        search_space_instance = search_space
        rstate = np.random.RandomState()
        trials = hyperopt.Trials()
        domain = hyperopt.Domain(None,
                                 search_space_instance,
                                 pass_expr_memo_ctrl=None)
        algorithm = hyperopt.tpe.suggest
        rval = hyperopt.FMinIter(algorithm,
                                 domain,
                                 trials,
                                 max_evals=-1,
                                 rstate=rstate,
                                 verbose=0)
        rval.catch_eval_exceptions = False

        algorithm = rval.algo
        new_ids = rval.trials.new_trial_ids(1)
        rval.trials.refresh()
        random_state = rval.rstate.randint(2**31 - 1)
        new_trials = algorithm(new_ids, rval.domain, trials, random_state)
        rval.trials.refresh()

        # Example: {'x': [8.721658602103911]}
        vals = new_trials[0]['misc']['vals']

        #import ipdb;ipdb.set_trace()
        """
    parameter = dict()
    for key in vals:
      try:
        parameter[key] = vals[key][0].item()
      except Exception:
        parameter[key] = None
    """
        """
    trials =rval.trials

    trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0]
    trial['result'] = {'loss': reward, 'status': 'ok'}
    trial['state'] = hp.JOB_STATE_DONE
    trials.insert_trial_docs([trial])
    trials.refresh()
    """
        """
    def _choose_tuner(self, algorithm_name):
      if algorithm_name == 'tpe':
        return hp.tpe.suggest
      if algorithm_name == 'random_search':
        return hp.rand.suggest
      if algorithm_name == 'anneal':
        return hp.anneal.suggest
      raise RuntimeError('Not support tuner algorithm in hyperopt.')
    """

        return_trial_list = []

        study = Study.objects.get(id=study_id)
        study_configuration_json = json.loads(study.study_configuration)
        params = study_configuration_json["params"]

        for i in range(number):
            trial = Trial.create(study.id, "TpeTrial")
            parameter_values_json = {}

            for param in params:

                if param["type"] == "INTEGER" or param[
                        "type"] == "DISCRETE" or param["type"] == "CATEGORICAL":
                    pass

                elif param["type"] == "DOUBLE":
                    # TODO: Get the specified value from hyperopt
                    suggest_value = vals["x"][0]
                    parameter_values_json[
                        param["parameterName"]] = suggest_value

                parameter_values_json[param["parameterName"]] = suggest_value

            trial.parameter_values = json.dumps(parameter_values_json)
            trial.save()
            return_trial_list.append(trial)

        return return_trial_list