def update_search_space(self, search_space): """ Update search space definition in tuner by search_space in parameters. Will called when first setup experiemnt or update search space in WebUI. Parameters ---------- search_space : dict """ self.json = search_space search_space_instance = json2space(self.json) rstate = np.random.RandomState() trials = hp.Trials() domain = hp.Domain(None, search_space_instance, pass_expr_memo_ctrl=None) algorithm = self._choose_tuner(self.algorithm_name) self.rval = hp.FMinIter(algorithm, domain, trials, max_evals=-1, rstate=rstate, verbose=0) self.rval.catch_eval_exceptions = False
def create_fmin(self): self.fmin = hyperopt.FMinIter(self.hyperopt_algorithm, self.hyperopt_domain, trials=hyperopt.Trials(), max_evals=-1, rstate=self.hyperopt_rstate, verbose=False) self.fmin.catch_eval_exceptions = False
def get_suggestions(self, configs: List[Dict] = None, metrics: List[float] = None) -> List[Dict]: if not self.config.num_runs: raise ValueError("This search strategy requires `num_runs`.") suggestions = [] rand_generator = get_random_generator(seed=self.config.seed) hyperopt_domain = hyperopt.Domain(None, self._search_space, pass_expr_memo_ctrl=None) hyperopt_trials = self._get_previous_observations( hyperopt_domain=hyperopt_domain, configs=configs, metrics=metrics) is_first = not all([configs, metrics]) minimize = hyperopt.FMinIter( self.config.algorithm, hyperopt_domain, hyperopt_trials, max_evals=-1, rstate=rand_generator, verbose=0, ) minimize.catch_eval_exceptions = False new_ids = minimize.trials.new_trial_ids(self.config.num_runs) minimize.trials.refresh() random_state = minimize.rstate.randint(2**31 - 1) new_trials = self.run_algorithm(is_first, new_ids, minimize.domain, hyperopt_trials, random_state) minimize.trials.refresh() for tid in range(self.config.num_runs): vals = new_trials[tid]["misc"]["vals"] suggestion = {} for param in vals: observation_value = vals[param][0] if param in self._param_to_value: value = self._param_to_value[param][observation_value] suggestion[param] = value else: suggestion[param] = observation_value suggestions.append(suggestion) return suggestions
def update_search_space(self, search_space): validate_search_space(search_space) self.json = search_space search_space_instance = json2space(self.json) rstate = np.random.RandomState() trials = hp.Trials() domain = hp.Domain(None, search_space_instance, pass_expr_memo_ctrl=None) algorithm = self._choose_tuner(self.algorithm_name) self.rval = hp.FMinIter(algorithm, domain, trials, max_evals=-1, rstate=rstate, verbose=0) self.rval.catch_eval_exceptions = False
def get_new_suggestions(self, study_name, input_trials=[], number=1): """ Get the new suggested trials with TPE algorithm. """ # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)} hyperopt_search_space = {} study = Study.objects.get(name=study_name) study_configuration_json = json.loads(study.study_configuration) params = study_configuration_json["params"] for param in params: param_name = param["parameterName"] if param["type"] == "INTEGER": # TODO: Support int type of search space) pass elif param["type"] == "DOUBLE": hyperopt_search_space[param_name] = hyperopt.hp.uniform( param_name, param["minValue"], param["maxValue"]) elif param["type"] == "DISCRETE" or param["type"] == "CATEGORICAL": feasible_point_list = [ value.strip() for value in param["feasiblePoints"].split(",") ] hyperopt_search_space[param_name] = hyperopt.hp.choice( param_name, feasible_point_list) # New hyperopt variables hyperopt_rstate = np.random.RandomState() hyperopt_domain = hyperopt.Domain( None, hyperopt_search_space, pass_expr_memo_ctrl=None) hyperopt_trial_specs = [] hyperopt_trial_results = [] # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}] hyperopt_trial_miscs = [] hyperopt_trial_new_ids = [] # Update hyperopt for trained trials with completed advisor trials completed_hyperopt_trials = hyperopt.Trials() completed_advisor_trials = Trial.objects.filter( study_name=study_name, status="Completed") for index, advisor_trial in enumerate(completed_advisor_trials): # Example: {"learning_rate": 0.01, "optimizer": "ftrl"} parameter_values_json = json.loads(advisor_trial.parameter_values) # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]} hyperopt_trial_miscs_idxs = {} # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]} hyperopt_trial_miscs_vals = {} new_id = index hyperopt_trial_new_ids.append(new_id) hyperopt_trial_misc = dict( tid=new_id, cmd=hyperopt_domain.cmd, workdir=hyperopt_domain.workdir) for param in params: if param["type"] == "INTEGER": pass elif param["type"] == "DOUBLE": parameter_value = parameter_values_json[param["parameterName"]] hyperopt_trial_miscs_idxs[param["parameterName"]] = [index] hyperopt_trial_miscs_vals[param["parameterName"]] = [parameter_value] elif param["type"] == "DISCRETE": feasible_points_string = param["feasiblePoints"] feasible_points = [ float(value.strip()) for value in feasible_points_string.split(",") ] parameter_value = parameter_values_json[param["parameterName"]] index_of_value_in_list = feasible_points.index(parameter_value) hyperopt_trial_miscs_idxs[param["parameterName"]] = [index] hyperopt_trial_miscs_vals[param["parameterName"]] = [ index_of_value_in_list ] elif param["type"] == "CATEGORICAL": feasible_points_string = param["feasiblePoints"] feasible_points = [ value.strip() for value in feasible_points_string.split(",") ] # Example: "ftrl" parameter_value = parameter_values_json[param["parameterName"]] index_of_value_in_list = feasible_points.index(parameter_value) hyperopt_trial_miscs_idxs[param["parameterName"]] = [index] hyperopt_trial_miscs_vals[param["parameterName"]] = [ index_of_value_in_list ] hyperopt_trial_specs.append(None) hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals hyperopt_trial_miscs.append(hyperopt_trial_misc) # TODO: Use negative objective value for loss or not loss_for_hyperopt = advisor_trial.objective_value if study_configuration_json["goal"] == "MAXIMIZE": # Now hyperopt only supports fmin and we need to reverse objective value for maximization loss_for_hyperopt = -1 * advisor_trial.objective_value hyperopt_trial_result = { "loss": loss_for_hyperopt, "status": hyperopt.STATUS_OK } hyperopt_trial_results.append(hyperopt_trial_result) if len(completed_advisor_trials) > 0: # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None} hyperopt_trials = completed_hyperopt_trials.new_trial_docs( hyperopt_trial_new_ids, hyperopt_trial_specs, hyperopt_trial_results, hyperopt_trial_miscs) for current_hyperopt_trials in hyperopt_trials: current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE completed_hyperopt_trials.insert_trial_docs(hyperopt_trials) completed_hyperopt_trials.refresh() rval = hyperopt.FMinIter( self.hyperopt_algorithm, hyperopt_domain, completed_hyperopt_trials, max_evals=-1, rstate=hyperopt_rstate, verbose=0) rval.catch_eval_exceptions = False new_ids = rval.trials.new_trial_ids(number) rval.trials.refresh() random_state = rval.rstate.randint(2**31 - 1) new_trials = self.hyperopt_algorithm( new_ids, rval.domain, completed_hyperopt_trials, random_state) rval.trials.refresh() # Construct return advisor trials from new hyperopt trials return_trial_list = [] for i in range(number): # Example: {u'hidden2': [2], u'learning_rate': [0.04633366105812467], u'l1_normalization': [0.16858448611765364], u'optimizer': [3]} vals = new_trials[0]['misc']['vals'] new_advisor_trial = Trial.create(study.name, "TpeTrial") parameter_values_json = {} for param in params: if param["type"] == "INTEGER": pass elif param["type"] == "DOUBLE": suggest_value = vals[param["parameterName"]][0] parameter_values_json[param["parameterName"]] = suggest_value elif param["type"] == "DISCRETE": feasible_point_list = [ float(value.strip()) for value in param["feasiblePoints"].split(",") ] suggest_index = vals[param["parameterName"]][0] suggest_value = feasible_point_list[suggest_index] elif param["type"] == "CATEGORICAL": feasible_point_list = [ value.strip() for value in param["feasiblePoints"].split(",") ] suggest_index = vals[param["parameterName"]][0] suggest_value = feasible_point_list[suggest_index] parameter_values_json[param["parameterName"]] = suggest_value new_advisor_trial.parameter_values = json.dumps(parameter_values_json) return_trial_list.append(new_advisor_trial) return return_trial_list
def main(): prog = "python statistics.py WhatIsThis <manyPickles> WhatIsThis <manyPickles> [WhatIsThis <manyPickles>]" description = "Return some statistical information" parser = ArgumentParser(description=description, prog=prog) parser.add_argument("-p", "--space", dest="spaceFile", help="Where is the space.py located?") parser.add_argument("-m", "--maxEvals", dest="maxEvals", help="How many evaluations?") parser.add_argument("-s", "--seed", default="1", dest="seed", type=int, help="Seed for the TPE algorithm") parser.add_argument( "-r", "--restore", action="store_true", dest="restore", help="When this flag is set state.pkl is restored in " + "the current working directory") parser.add_argument("--random", default=False, action="store_true", dest="random", help="Use a random search") parser.add_argument("--cwd", help="Change the working directory before " "optimizing.") args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) cfg = load_experiment_config_file() log_level = cfg.getint("HPOLIB", "HPOlib_loglevel") logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') logger.setLevel(log_level) if not os.path.exists(args.spaceFile): logger.critical("Search space not found: %s" % args.spaceFile) sys.exit(1) # First remove ".py" space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append("./") sys.path.append("") module = import_module(space) search_space = module.space cli_target = "HPOlib.optimization_interceptor" fn = partial(command_line_function, cli_target=cli_target) if args.random: # We use a random search tpe_with_seed = partial(hyperopt.tpe.rand.suggest, seed=int(args.seed)) logger.info("Using Random Search") else: tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) # Now run TPE, emulate fmin.fmin() state_filename = "state.pkl" if args.restore: # We do not need to care about the state of the trials object since it # is only serialized in a synchronized state, there will never be a save # with a running experiment fh = open(state_filename) tmp_dict = cPickle.load(fh) domain = tmp_dict['domain'] trials = tmp_dict['trials'] print trials.__dict__ else: domain = hyperopt.Domain(fn, search_space, rseed=int(args.seed)) trials = hyperopt.Trials() fh = open(state_filename, "w") # By this we probably loose the seed; not too critical for a restart cPickle.dump({"trials": trials, "domain": domain}, fh) fh.close() for i in range(int(args.maxEvals) + 1): # in exhaust, the number of evaluations is max_evals - num_done rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust() fh = open(state_filename, "w") cPickle.dump({"trials": trials, "domain": domain}, fh) fh.close() best = trials.argmin print "Best Value found for params:", best
def getSuggestions(self, search_space, trials, request_number): """ Get the new suggested trials with the given algorithm. """ # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)} hyperopt_search_space = {} for param in search_space.params: if param.type == INTEGER: hyperopt_search_space[param.name] = hyperopt.hp.quniform( param.name, float(param.min), float(param.max), 1) elif param.type == DOUBLE: hyperopt_search_space[param.name] = hyperopt.hp.uniform( param.name, float(param.min), float(param.max)) elif param.type == CATEGORICAL \ or param.type == DISCRETE: hyperopt_search_space[param.name] = hyperopt.hp.choice( param.name, param.list) # New hyperopt variables hyperopt_rstate = np.random.RandomState() hyperopt_domain = hyperopt.Domain(None, hyperopt_search_space, pass_expr_memo_ctrl=None) hyperopt_trial_specs = [] hyperopt_trial_results = [] # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}] hyperopt_trial_miscs = [] hyperopt_trial_new_ids = [] # Update hyperopt for trained trials with completed advisor trials completed_hyperopt_trials = hyperopt.Trials() for trial in trials: # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]} hyperopt_trial_miscs_idxs = {} # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]} hyperopt_trial_miscs_vals = {} new_id = trial.name hyperopt_trial_new_ids.append(new_id) hyperopt_trial_misc = dict(tid=new_id, cmd=hyperopt_domain.cmd, workdir=hyperopt_domain.workdir) for param in search_space.params: parameter_value = None for assignment in trial.assignments: if assignment.name == param.name: parameter_value = assignment.value break if param.type == INTEGER: hyperopt_trial_miscs_idxs[param.name] = [new_id] hyperopt_trial_miscs_vals[param.name] = [parameter_value] elif param.type == DOUBLE: hyperopt_trial_miscs_idxs[param.name] = [new_id] hyperopt_trial_miscs_vals[param.name] = [parameter_value] elif param.type == DISCRETE or param.type == CATEGORICAL: index_of_value_in_list = param.list.index(parameter_value) hyperopt_trial_miscs_idxs[param.name] = [trial.name] hyperopt_trial_miscs_vals[param.name] = [ index_of_value_in_list ] hyperopt_trial_specs.append(None) hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals hyperopt_trial_miscs.append(hyperopt_trial_misc) # TODO: Use negative objective value for loss or not objective_for_hyperopt = float(trial.target_metric.value) if search_space.goal == MAX_GOAL: # Now hyperopt only supports fmin and we need to reverse objective value for maximization objective_for_hyperopt = -1 * objective_for_hyperopt hyperopt_trial_result = { "loss": objective_for_hyperopt, "status": hyperopt.STATUS_OK } hyperopt_trial_results.append(hyperopt_trial_result) if len(trials) > 0: # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None} hyperopt_trials = completed_hyperopt_trials.new_trial_docs( hyperopt_trial_new_ids, hyperopt_trial_specs, hyperopt_trial_results, hyperopt_trial_miscs) for current_hyperopt_trials in hyperopt_trials: current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE completed_hyperopt_trials.insert_trial_docs(hyperopt_trials) completed_hyperopt_trials.refresh() rval = hyperopt.FMinIter(self.hyperopt_algorithm, hyperopt_domain, completed_hyperopt_trials, max_evals=-1, rstate=hyperopt_rstate, verbose=0) rval.catch_eval_exceptions = False new_ids = rval.trials.new_trial_ids(request_number) rval.trials.refresh() random_state = rval.rstate.randint(2**31 - 1) new_trials = self.hyperopt_algorithm(new_ids, rval.domain, completed_hyperopt_trials, random_state) rval.trials.refresh() # Construct return advisor trials from new hyperopt trials list_of_assignments = [] for i in range(request_number): vals = new_trials[i]['misc']['vals'] list_of_assignments.append( BaseHyperoptService.convert(search_space, vals)) return list_of_assignments
def find_hyperparameters( setting, path, space=None, max_evals=100, trials_per_point=30, parallelization="sequential", objective="max_reward", max_concurrent_jobs=100): """ This function does hyperparameter optimization for RLPy experiments with the hyperopt library. At the end an instance of the optimization trials is stored in "path"/trials.pck :param setting: file specifying the experimental setup. It contains a make_experiment function and a dictionary named param_space if the argument space is not used. For each key of param_space there needs to be an optional argument in make_experiment :param path: directory used to store all intermediate results. :param space: (optional) an alternative specification of the hyperparameter space :param max_evals: maximum number of evaluations of a single hyperparameter setting :param trials_per_point: specifies the number of independent runs (with different seeds) of the experiment for evaluating a single hyperparameter setting. :param parallelization: either **sequential**, **joblib**, **condor_all** or **condor_full**, **condor**. the condor options can be used in a computing cluster with a HTCondor machine. The joblib option parallelizes runs on one machine and sequential runs every experiment in sequence. :param objective: (optional) string specifying the objective to optimize, possible values are *max_reward*, *min_steps*, *max_steps* :param max_concurrent_jobs: only relevant for condor_full parallelization. specifies the maximum number of jobs that should run at the same time. :return: a tuple containing the best hyperarameter settings and the hyperopt trials instance of the optimization procedure """ if space is None: space = import_param_space(setting) def f(hyperparam): """function to optimize by hyperopt""" # "temporary" directory to use full_path = os.path.join( path, "-".join([str(v) for v in hyperparam.values()])) # execute experiment rt.run(setting, location=full_path, ids=range(1, trials_per_point + 1), parallelization=parallelization, force_rerun=False, block=True, **hyperparam) # all jobs should be done res = tres.load_results(full_path) if objective == "max_steps": m, s, n = tres.avg_quantity(res, "steps") val = -m std = s[-1] elif objective == "min_steps": m, s, n = tres.avg_quantity(res, "steps") val = m std = s[-1] elif objective == "max_reward": m, s, n = tres.avg_quantity(res, "return") val = -m std = s[-1] else: print "unknown objective" weights = (np.arange(len(val)) + 1) ** 2 loss = (val * weights).sum() / weights.sum() print time.ctime() print "Parameters", hyperparam print "Loss", loss # use #steps/eps at the moment return {"loss": loss, "num_trials": n[-1], "status": hyperopt.STATUS_OK, "std_last_mean": std} if parallelization == "condor_all": trials = CondorTrials(path=path, ids=range(1, trials_per_point + 1), setting=setting, objective=objective) domain = hyperopt.Domain(dummy_f, space, rseed=123) rval = hyperopt.FMinIter(hyperopt.rand.suggest, domain, trials, max_evals=30, max_queue_len=30) rval.exhaust() rval = hyperopt.FMinIter(hyperopt.tpe.suggest, domain, trials, max_evals=max_evals, max_queue_len=1) rval.exhaust() best = trials.argmin elif parallelization == "condor_full": trials = _search_condor_parallel(path=path, setting=setting, objective=objective, space=space, max_evals=max_evals, trials_per_point=trials_per_point) best = trials.argmin else: trials = hyperopt.Trials() best = hyperopt.fmin(f, space=space, algo=hyperopt.tpe.suggest, max_evals=max_evals, trials=trials) with open(os.path.join(path, 'trials.pck'), 'w') as f: pickle.dump(trials, f) return best, trials
def main(): # Parse options and arguments parser = OptionParser() parser.add_option("-p", "--space", dest="spaceFile", help="Where is the space.py located?") parser.add_option("-a", "--algoExec", dest="algoExec", help="Which function to load located?") parser.add_option("-m", "--maxEvals", dest="maxEvals", help="How many evaluations?") parser.add_option("-s", "--seed", dest="seed", default="123", type=int, help="Seed for the TPE algorithm") parser.add_option("-r", "--restore", dest="restore", action="store_true", help="When this flag is set state.pkl is restored in " + "the current working directory") parser.add_option("--random", default=False, dest="random", action="store_true", help="Use a random search") (options, args) = parser.parse_args() # First remove ".py" algo, ext = os.path.splitext(os.path.basename(options.algoExec)) space, ext = os.path.splitext(os.path.basename(options.spaceFile)) # Then load dict searchSpace and out function cv.py import sys sys.path.append("./") sys.path.append("") print os.getcwd() module = import_module(space) search_space = module.space fn = import_module(algo) fn = fn.doForTPE if options.random: # We use a random search suggest = hyperopt.tpe.rand.suggest else: suggest = hyperopt.tpe.suggest rstate = np.random.RandomState(options.seed) # Now run TPE, emulate fmin.fmin() state_filename = "state.pkl" if options.restore: # We do not need to care about the state of the trials object since it # is only serialized in a synchronized state, there will never be a save # with a running experiment fh = open(state_filename) tmp_dict = cPickle.load(fh) domain = tmp_dict['domain'] trials = tmp_dict['trials'] rstate = tmp_dict['rstate'] print trials.__dict__ else: domain = hyperopt.Domain(fn, search_space) trials = hyperopt.Trials() fh = open(state_filename, "w") # By this we probably loose the seed; not too critical for a restart cPickle.dump({ "trials": trials, "domain": domain, "rstate": rstate }, fh) fh.close() for i in range(int(options.maxEvals) + 1): # in exhaust, the number of evaluations is max_evals - num_done rval = hyperopt.FMinIter(suggest, domain, trials, max_evals=i, rstate=rstate) rval.exhaust() fh = open(state_filename, "w") cPickle.dump({ "trials": trials, "domain": domain, "rstate": rstate }, fh) fh.close() best = trials.argmin print "Best Value found for params:", best
def main(): parser = ArgumentParser() parser.add_argument('-p', '--space', dest='spaceFile', help='Where is the space.py located?') parser.add_argument( '--use_optimal_design', dest='use_optimal_design', help='Use optimal design or pure random initialization?') parser.add_argument('--init_budget', dest='init_budget', help='How many evaluations for random burning period?') parser.add_argument( '--ei_budget', dest='ei_budget', help='How many evaluations for EI controlled online period?') parser.add_argument( '--bopt_budget', dest='bopt_budget', help= 'How many evaluations for Bayesian optimization after get subspace?') parser.add_argument( '--ei_xi', dest='ei_xi', help='What is the exploration parameter for computing EI?') parser.add_argument( '--top_k_pipelines', dest='top_k_pipelines', help='How many top (LR predicted) pipelines to cover in subspace?') parser.add_argument('-s', '--seed', default='1', dest='seed', type=int, help='Seed for the algorithm') parser.add_argument( '-a', '--algo', default='SMAC', dest='algo', type=str, help='Specify the algorithm after LR, can be SMAC or TPE') parser.add_argument( '-r', '--restore', action='store_true', dest='restore', help='When this flag is set state.pkl is restored in ' + 'the current working directory') parser.add_argument('--random', default=False, action='store_true', dest='random', help='Use a random search') parser.add_argument('--cwd', help='Change the working directory before ' 'optimizing.') args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) if not os.path.exists(args.spaceFile): logger.critical('Search space not found: %s' % args.spaceFile) sys.exit(1) # First remove '.py' space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append('./') sys.path.append('') module = import_module(space) search_space = module.space ni = [len(d) for d in module.layer_dict_list] # number of units in each layer cum_ni = np.cumsum(ni) log_filename = 'lr.pkl' # Random burning period as initialization init_budget = int(args.init_budget) if args.use_optimal_design == '1': picks = get_random_picks_by_optimal_design(ni, init_budget) else: picks = get_pure_random_picks(ni, init_budget) for i in range(init_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('Random burning period times: %d, valid times: %d' % (times, valid_times)) subspace = construct_subspace(module, picks[i]) params = sample(subspace) cv.main(params) valid_times_in_random_period = get_num_of_trials(log_filename, filter_valid=True) # Train the first LR model before entering into EI controlled period fh = open(log_filename) log = cPickle.load(fh) trials = log['trials'] fh.close() X = [] y = [] y_time = [] for trial in trials: result = trial['result'] time = trial['duration'] # make sure the logged result is a number (accept evaluations return 100.0) if result <= 100: params = trial['params'] rescaling = params['-rescaling'] balancing = params['-balancing'] feat_pre = params['-feat_pre'] clf = params['-classifier'] x = [[0] * n for n in ni] x[0][module.d_rescaling[rescaling]] = 1 x[1][module.d_balancing[balancing]] = 1 x[2][module.d_feat_pre[feat_pre]] = 1 x[3][module.d_clf[clf]] = 1 x_flat = np.array(x[0] + x[1] + x[2] + x[3]) X.append(x_flat) y.append(result) y_time.append(np.log(time)) X = np.array(X) alpha = 1.0 lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) # Online period controlled by EI ei_budget = int(args.ei_budget) for i in range(ei_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info( 'EI controlled period times: %d, valid times: %d' % (times - init_budget, valid_times - valid_times_in_random_period)) ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(ebeta[0].argsort()), str(ebeta[1].argsort()), str(ebeta[2].argsort()), str(ebeta[3].argsort()))) ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] logger.info( 'LR Time model estimated unit ranking: %s %s %s %s' % (str(ebeta_time[0].argsort()), str(ebeta_time[1].argsort()), str(ebeta_time[2].argsort()), str(ebeta_time[3].argsort()))) # pick the best pipeline by EI x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y, float(args.ei_xi)) pick = [[np.argmax(x_next_i)] for x_next_i in x_next] subspace = construct_subspace(module, pick) params = sample(subspace) cv.main(params) result, time = get_last_run(log_filename) if result <= 100: x_next_flat = np.array(x_next[0] + x_next[1] + x_next[2] + x_next[3]) X = np.vstack([X, x_next_flat]) y.append(result) y_time.append(np.log(time)) lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) valid_times_in_ei_period = get_num_of_trials( log_filename, filter_valid=True) - valid_times_in_random_period # Construct subspace based on LR prediction final_ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] final_ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0, int(args.top_k_pipelines)) final_subspace = construct_subspace(module, final_pick) logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info( 'LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str( final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # Phase 3 with SMAC if args.algo == 'SMAC': fh = file('pickup.txt', 'w') for layer_pick in final_pick: for i in layer_pick: fh.write('%d ' % i) fh.write('\n') fh.close() subspace = construct_subspace(module, final_pick) new_space = convert_tpe_to_smac_from_object(subspace) fh = open('params.pcs', 'w') fh.write(new_space) fh.close() # Phase 3 with TPE elif args.algo == 'TPE': fn = cv.main domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed)) trials = hyperopt.Trials() bopt_budget = int(args.bopt_budget) for i in range(bopt_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info( 'TPE period times: %d, valid times: %d' % (times - init_budget - ei_budget, valid_times - valid_times_in_random_period - valid_times_in_ei_period)) logger.info( 'LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str(final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info( 'Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # in exhaust, the number of evaluations is max_evals - num_done tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust()
def get_new_suggestions(self, study_id, trials=[], number=1): """ Get the new suggested trials with random search. """ search_space = hyperopt.hp.uniform('x', -10, 10) search_space_instance = search_space rstate = np.random.RandomState() trials = hyperopt.Trials() domain = hyperopt.Domain(None, search_space_instance, pass_expr_memo_ctrl=None) algorithm = hyperopt.tpe.suggest rval = hyperopt.FMinIter(algorithm, domain, trials, max_evals=-1, rstate=rstate, verbose=0) rval.catch_eval_exceptions = False algorithm = rval.algo new_ids = rval.trials.new_trial_ids(1) rval.trials.refresh() random_state = rval.rstate.randint(2**31 - 1) new_trials = algorithm(new_ids, rval.domain, trials, random_state) rval.trials.refresh() # Example: {'x': [8.721658602103911]} vals = new_trials[0]['misc']['vals'] #import ipdb;ipdb.set_trace() """ parameter = dict() for key in vals: try: parameter[key] = vals[key][0].item() except Exception: parameter[key] = None """ """ trials =rval.trials trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0] trial['result'] = {'loss': reward, 'status': 'ok'} trial['state'] = hp.JOB_STATE_DONE trials.insert_trial_docs([trial]) trials.refresh() """ """ def _choose_tuner(self, algorithm_name): if algorithm_name == 'tpe': return hp.tpe.suggest if algorithm_name == 'random_search': return hp.rand.suggest if algorithm_name == 'anneal': return hp.anneal.suggest raise RuntimeError('Not support tuner algorithm in hyperopt.') """ return_trial_list = [] study = Study.objects.get(id=study_id) study_configuration_json = json.loads(study.study_configuration) params = study_configuration_json["params"] for i in range(number): trial = Trial.create(study.id, "TpeTrial") parameter_values_json = {} for param in params: if param["type"] == "INTEGER" or param[ "type"] == "DISCRETE" or param["type"] == "CATEGORICAL": pass elif param["type"] == "DOUBLE": # TODO: Get the specified value from hyperopt suggest_value = vals["x"][0] parameter_values_json[ param["parameterName"]] = suggest_value parameter_values_json[param["parameterName"]] = suggest_value trial.parameter_values = json.dumps(parameter_values_json) trial.save() return_trial_list.append(trial) return return_trial_list