def test_constant_liar_runs(strategy, surrogate, acq_func): """ Tests whether the optimizer runs properly during the random initialization phase and beyond Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer( base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_func=acq_func, acq_optimizer='sampling', random_state=0 ) # test arguments check assert_raises(ValueError, optimizer.ask, {"strategy": "cl_maen"}) assert_raises(ValueError, optimizer.ask, {"n_points": "0"}) assert_raises(ValueError, optimizer.ask, {"n_points": 0}) for i in range(n_steps): x = optimizer.ask(n_points=n_points, strategy=strategy) # check if actually n_points was generated assert_equal(len(x), n_points) if "ps" in acq_func: optimizer.tell(x, [[branin(v), 1.1] for v in x]) else: optimizer.tell(x, [branin(v) for v in x])
def test_reproducible_runs(strategy, surrogate): # two runs of the optimizer should yield exactly the same results optimizer = Optimizer( base_estimator=surrogate(random_state=1), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1 ) points = [] for i in range(n_steps): x = optimizer.ask(n_points, strategy) points.append(x) optimizer.tell(x, [branin(v) for v in x]) # the x's should be exaclty as they are in `points` optimizer = Optimizer( base_estimator=surrogate(random_state=1), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1 ) for i in range(n_steps): x = optimizer.ask(n_points, strategy) assert points[i] == x optimizer.tell(x, [branin(v) for v in x])
def test_all_points_different(strategy, surrogate): """ Tests whether the parallel optimizer always generates different points to evaluate. Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer( base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=1 ) tolerance = 1e-3 # distance above which points are assumed same for i in range(n_steps): x = optimizer.ask(n_points, strategy) optimizer.tell(x, [branin(v) for v in x]) distances = pdist(x) assert all(distances > tolerance)
def test_same_set_of_points_ask(strategy, surrogate): """ For n_points not None, tests whether two consecutive calls to ask return the same sets of points. Parameters ---------- * `strategy` [string]: Name of the strategy to use during optimization. * `surrogate` [scikit-optimize surrogate class]: A class of the scikit-optimize surrogate used in Optimizer. """ optimizer = Optimizer( base_estimator=surrogate(), dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], acq_optimizer='sampling', random_state=2 ) for i in range(n_steps): xa = optimizer.ask(n_points, strategy) xb = optimizer.ask(n_points, strategy) optimizer.tell(xa, [branin(v) for v in xa]) assert_equal(xa, xb) # check if the sets of points generated are equal
def test_names_dimensions(): # Define objective def objective(x, noise_level=0.1): return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) +\ np.random.randn() * noise_level # Initialize Optimizer opt = Optimizer([(-2.0, 2.0)], n_initial_points=1) # Optimize for i in range(2): next_x = opt.ask() f_val = objective(next_x) res = opt.tell(next_x, f_val) # Plot results assert_raises(ValueError, plots.plot_objective, res)
class SkoptBackend(Backend): """The scikit-optimize backend uses scikit-optimize for black box optimization.""" backend_name = "scikit-optimize" implemented_funcs = ( "choice", "randrange", "uniform", ) def __init__(self, examples, params, base_estimator="GP", **options): self.init_fallback_backend() if not params: self.current_values = {} return if isinstance(base_estimator, str): base_estimator = py_str(base_estimator) self.optimizer = Optimizer(create_dimensions(params), base_estimator, **options) if examples: self.tell_examples(examples, params) else: self.current_values = {} def tell_examples(self, new_examples, params): """Special method that allows fast updating of the backend with new examples.""" data_points, losses = split_examples(new_examples, params) self.result = self.optimizer.tell(data_points, losses) current_point = self.optimizer.ask() self.current_values = make_values(params, current_point) @property def space(self): """The space over which optimization was performed.""" return self.optimizer.space @property def model(self): """Get the most recently fit model.""" return self.optimizer.models[-1]
class SkoptBackend(StandardBackend): """The scikit-optimize backend uses scikit-optimize for black box optimization.""" backend_name = "scikit-optimize" implemented_funcs = ("choice", "randrange", "uniform") @override def setup_backend(self, params, base_estimator="GP", n_initial_points=None, **options): """Special method to initialize the backend from params.""" self.params = params if isinstance(base_estimator, str): base_estimator = py_str(base_estimator) if n_initial_points is None: n_initial_points = guess_n_initial_points(params) self.optimizer = Optimizer(create_dimensions(params), base_estimator, n_initial_points=n_initial_points, **options) @override def tell_examples(self, new_examples): """Special method that allows fast updating of the backend with new examples.""" data_points, losses = split_examples(new_examples, self.params) self.result = self.optimizer.tell(data_points, losses) current_point = self.optimizer.ask() self.current_values = make_values(self.params, current_point) @property def space(self): """The space over which optimization was performed.""" return self.optimizer.space @property def model(self): """Get the most recently fit model.""" return self.optimizer.models[-1]
results = train_model( mdl, optimizer, scheduler, hp_opt, train_loader=train_loader, valid_loader=valid_loader, device=device, output_dir=cfg.OUTPUT_DIR, iteration=iteration, resume=resume, best_final_acc=best_final_acc, num_epochs=cfg.TRAIN.NUM_EPOCHS, lr=lr, l2=l2, momentum=momentum, track_misclassified=cfg.TRAIN.TRACK_MISCLASSIFIED) if resume: resume = False # Update optimizer with best accuracy obtained. hp_opt.tell([lr, l2, momentum], results['best_acc']) # Save best results. if results['best_acc'] > best_final_acc: best_results = results best_final_acc = results['best_acc'] torch.save(best_results, os.path.join(cfg.OUTPUT_DIR, 'best_results.pth.tar'))
def main_rule_utils(config, main_loop_seed=MAIN_LOOP_INITIAL_SEED): rule_utils.store_config_on_first_run(config) experience_buffer = utils.ExperienceBuffer(config['max_experience_buffer']) config_keys = list(config['initial_config_ranges'].keys()) if deterministic_games: utils.set_seed(main_loop_seed) fixed_pool_mode = config['play_fixed_pool_only'] if fixed_pool_mode: fixed_opp_repeats = config['fixed_opponents_num_repeat_first_configs'] config_has_range = isinstance( list(config['initial_config_ranges'].values())[0], tuple) # Prepare the Bayesian optimizer if config_has_range: opt_range = [config['initial_config_ranges'][k][0] for k in config_keys] opt = Optimizer(opt_range) if config['play_fixed_pool_fit_prev_data']: fixed_pool_experience_path = rule_utils.get_self_play_experience_path( config['pool_name']) if os.path.exists(fixed_pool_experience_path): print('\nBayesian fit to earlier experiments') this_folder = os.path.dirname(__file__) agents_folder = os.path.join( this_folder, '../Rule agents/' + config['pool_name']) config_settings_path = os.path.join( agents_folder, CONFIG_SETTINGS_EXTENSION) if os.path.exists(config_settings_path): config_results = pd.read_csv(config_settings_path) suggested = config_results.iloc[:, :-1].values.tolist() target_scores = (-config_results.iloc[:, -1].values).tolist() opt.tell(suggested, target_scores) # import pdb; pdb.set_trace() # print(opt.get_result().x, opt.get_result().fun) # WRONG! else: opt = None next_fixed_opponent_suggested = None iteration_config_rewards = None experience_features_rewards_path = None while True: if deterministic_games: utils.set_seed(main_loop_seed) # Section 1: play games against agents of N previous pools if config['num_games_previous_pools'] and not fixed_pool_mode: print('\nPlay vs other rule based agents from the last {} pools'.format( config['max_pool_size'])) (self_play_experience, rules_config_path, avg_reward_sp, _) = rule_experience.play_games( pool_name=config['pool_name'], num_games=config['num_games_previous_pools'], max_pool_size=config['max_pool_size'], num_agents=config['num_agents_per_game'], exclude_current_from_opponents=False, record_videos_new_iteration=config['record_videos_new_iteration'], initial_config_ranges=config['initial_config_ranges'], use_multiprocessing=config['use_multiprocessing'], episode_steps_override=config['episode_steps_override'], early_episode_termination=config['early_episode_termination_steps'], ) experience_buffer.add(self_play_experience) # Section 2: play games against agents of the previous pool if config['num_games_evaluation'] and not fixed_pool_mode: print('\nPlay vs previous iteration') (evaluation_experience, rules_config_path, avg_reward_eval, _) = rule_experience.play_games( pool_name=config['pool_name'], num_games=config['num_games_evaluation'], max_pool_size=2, num_agents=config['num_agents_per_game'], exclude_current_from_opponents=True, use_multiprocessing=config['use_multiprocessing'], episode_steps_override=config['episode_steps_override'], early_episode_termination=config['early_episode_termination_steps'], ) # experience_buffer.add(evaluation_experience) if fixed_pool_mode: if iteration_config_rewards is not None: # Update the optimizer using the most recent fixed opponent pool # results target_scores = np.reshape(-iteration_config_rewards[ 'episode_reward'].values, [-1, fixed_opp_repeats]).mean(1).tolist() if config_has_range: opt.tell(next_fixed_opponent_suggested, target_scores) # Append the tried settings to the settings-scores file config_rewards = rule_utils.append_config_scores( next_fixed_opponent_suggested, target_scores, config_keys, config['pool_name'], CONFIG_SETTINGS_EXTENSION) # Update the plot of the tried settings and obtained scores rule_utils.plot_reward_versus_features( experience_features_rewards_path, config_rewards, target_col="Average win rate", include_all_targets=True, plot_name_suffix="config setting average win rate", all_scatter=True) # Select the next hyperparameters to try try: next_fixed_opponent_suggested, next_fixed_opponent_configs = ( rule_utils.get_next_config_settings( opt, config_keys, config['num_games_fixed_opponents_pool'], fixed_opp_repeats, config['initial_config_ranges']) ) except: import pdb; pdb.set_trace() # Section 3: play games against a fixed opponents pool if config['num_games_fixed_opponents_pool']: print('\nPlay vs the fixed opponents pool') (fixed_opponents_experience, rules_config_path, avg_reward_fixed_opponents, opponent_rewards) = ( rule_experience.play_games( pool_name=config['pool_name'], num_games=config['num_games_fixed_opponents_pool'], max_pool_size=1, # Any positive integer is fine num_agents=config['num_agents_per_game'], exclude_current_from_opponents=False, fixed_opponent_pool=True, initial_config_ranges=config['initial_config_ranges'], use_multiprocessing=config['use_multiprocessing'], num_repeat_first_configs=fixed_opp_repeats, first_config_overrides=next_fixed_opponent_configs, episode_steps_override=config['episode_steps_override'], early_episode_termination=config['early_episode_termination_steps'], ) ) experience_buffer.add(fixed_opponents_experience) # import pdb; pdb.set_trace() # Select the values that will be used to determine if a next iteration file # will be created serialized_raw_experience = fixed_opponents_experience if ( fixed_pool_mode) else self_play_experience # Optionally append the experience of interest to disk iteration_config_rewards = ( rule_utils.serialize_game_experience_for_learning( serialized_raw_experience, fixed_pool_mode, config_keys)) if config['save_experience_data_to_disk']: experience_features_rewards_path = rule_utils.write_experience_data( config['pool_name'], iteration_config_rewards) # Section 4: Update the iteration, store videos and record learning # progress. if fixed_pool_mode: update_config = {'Time stamp': str(datetime.now())} for i in range(len(opponent_rewards)): update_config['Reward ' + opponent_rewards[i][2]] = np.round( opponent_rewards[i][1]/(1e-10+opponent_rewards[i][0]), 2) rule_utils.update_learning_progress(config['pool_name'], update_config) config_override_agents = ( fixed_opponents_experience[0].config_game_agents) # import pdb; pdb.set_trace() rule_utils.record_videos( rules_config_path, config['num_agents_per_game'], extension_override=str(datetime.now())[:19], config_override_agents=config_override_agents, env_seed_deterministic=fixed_opponents_experience[0].env_random_seed, rng_action_seeds=fixed_opponents_experience[0].act_random_seeds, first_game_recording=fixed_opponents_experience[0].game_recording, deterministic_games=config['deterministic_games'], deterministic_extension=f" - Seed {main_loop_seed}") else: # Save a new iteration if it has significantly improved data_rules_path = rules_config_path if min(avg_reward_sp, avg_reward_eval) >= config[ 'min_new_iteration_win_rate']: original_rules_config_path = rules_config_path incremented_rules_path = utils.increment_iteration_id( rules_config_path, extension='.json') copyfile(rules_config_path, incremented_rules_path) rules_config_path = incremented_rules_path if config['record_videos_new_iteration']: rule_utils.record_videos( original_rules_config_path, config['num_agents_per_game']) elif config['record_videos_each_main_loop']: rule_utils.record_videos( rules_config_path, config['num_agents_per_game'], str(datetime.now())[:19]) # Record learning progress # import pdb; pdb.set_trace() rule_utils.update_learning_progress(config['pool_name'], { 'Time stamp': str(datetime.now()), 'Average reward self play': avg_reward_sp, 'Average evaluation reward': avg_reward_eval, 'Experience buffer size': experience_buffer.size(), 'Data rules path': data_rules_path, }) # Section 5: Update the latest config range using the data in the # experience buffer if rules_config_path is not None: if not fixed_pool_mode: # Evolve the config ranges in a very simple gradient free way. rule_utils.evolve_config( rules_config_path, iteration_config_rewards, config['initial_config_ranges']) # Create plot(s) of the terminal reward as a function of all serialized # features if config['save_experience_data_to_disk']: rule_utils.plot_reward_versus_features( experience_features_rewards_path, iteration_config_rewards, plot_name_suffix=str(datetime.now())[:19]) main_loop_seed += 1
n_initial_points=10) """ kappa [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. Used when the acquisition is "LCB" (lower confidence bound). If set high, then we are favouring exploration over exploitation and vice versa. xi [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either "EI" or "PI". to use this, i think i need a way to scale xi by the variance of the signal, which howver will depend on the participant and will have to be adapted online... """ # run the experiment for i in range(50): next_x = opt.ask() f_val = -1 * objective(next_x) # hart6(next_x) ## branin(next_x) result = opt.tell(next_x, f_val) #print('iteration:', i, next_x, f_val) with open('my_optimizer.pkl', 'wb') as f: pickle.dump(opt, f) # visualize evaluations and partial dependence plots ( Friedman (2001) (doi:10.1214/aos/1013203451 section 8.2)) # good explanation: https://www.kaggle.com/dansbecker/partial-dependence-plots #visulize how the value of xj influences the average predicted values y (marginalize over of all other variables). # marginalize: find the probability distribution of a random variable REGARDLESS of the value taken # by all other random variables. concretly, summ joint values of the variable we are interested in # and the one we want to marginalize over all possible values of the variable we want to marginalize plot_evaluations(result, bins=10) plot_objective(result) print(result.x) #save_fig('kappa_def')
class Optimizer: SEED = 12345 KAPPA = 1.96 def __init__(self, num_workers: int, space, learner, acq_func, liar_strategy, **kwargs): assert learner in [ "RF", "ET", "GBRT", "GP", "DUMMY" ], f"Unknown scikit-optimize base_estimator: {learner}" assert liar_strategy in "cl_min cl_mean cl_max".split() self.space = space self.learner = learner self.acq_func = acq_func self.liar_strategy = liar_strategy n_init = inf if learner == 'DUMMY' else num_workers self._optimizer = SkOptimizer(dimensions=self.space.dimensions, base_estimator=self.learner, acq_optimizer='sampling', acq_func=self.acq_func, acq_func_kwargs={'kappa': self.KAPPA}, random_state=self.SEED, n_initial_points=n_init) self.evals = {} self.counter = 0 logger.info("Using skopt.Optimizer with %s base_estimator" % self.learner) def _get_lie(self): if self.liar_strategy == "cl_min": return min(self._optimizer.yi) if self._optimizer.yi else 0.0 elif self.liar_strategy == "cl_mean": return np.mean(self._optimizer.yi) if self._optimizer.yi else 0.0 else: return max(self._optimizer.yi) if self._optimizer.yi else 0.0 def _xy_from_dict(self): XX = list(self.evals.keys()) YY = [self.evals[x] for x in XX] return XX, YY def to_dict(self, x: list) -> dict: return self.space.to_dict(x) def _ask(self): x = self._optimizer.ask() y = self._get_lie() key = tuple(x) if key not in self.evals: self.counter += 1 self._optimizer.tell(x, y) self.evals[key] = y logger.debug(f'_ask: {x} lie: {y}') else: logger.debug(f'Duplicate _ask: {x} lie: {y}') return self.to_dict(x) def ask(self, n_points=None, batch_size=20): if n_points is None: return self._ask() else: batch = [] for _ in range(n_points): batch.append(self._ask()) if len(batch) == batch_size: yield batch batch = [] if batch: yield batch def ask_initial(self, n_points): XX = self._optimizer.ask(n_points=n_points) for x in XX: y = self._get_lie() key = tuple(x) if key not in self.evals: self.counter += 1 self._optimizer.tell(x, y) self.evals[key] = y return [self.to_dict(x) for x in XX] def tell(self, xy_data): assert isinstance(xy_data, list), f"where type(xy_data)=={type(xy_data)}" maxval = max(self._optimizer.yi) if self._optimizer.yi else 0.0 for x, y in xy_data: key = tuple(x.values()) # * tuple(x[k] for k in self.space) assert key in self.evals, f"where key=={key} and self.evals=={self.evals}" logger.debug(f'tell: {x} --> {key}: evaluated objective: {y}') self.evals[key] = (y if y < float_info.max else maxval) self._optimizer.Xi = [] self._optimizer.yi = [] XX, YY = self._xy_from_dict() assert len(XX) == len(YY) == self.counter, ( f"where len(XX)=={len(XX)}," f"len(YY)=={len(YY)}, self.counter=={self.counter}") self._optimizer.tell(XX, YY) assert len(self._optimizer.Xi) == len( self._optimizer.yi) == self.counter, ( f"where len(self._optimizer.Xi)=={len(self._optimizer.Xi)}, " f"len(self._optimizer.yi)=={len(self._optimizer.yi)}," f"self.counter=={self.counter}")
class opt_bo(object): def __init__(self, config, w_num, popsize, resample, search_space): self.popsize = popsize self.resample = resample self.optimizer = Optimizer( dimensions=[Real(search_space[0], search_space[1])] * w_num, random_state=1, # use the same seed for repeatability n_initial_points=self.popsize*self.resample, # if self.resample > 1, then we will continue ask-tell cycles self.resample times acq_optimizer_kwargs = {'n_points':10000} # 'n_jobs' slows down, 'noise' seem not to be used ) def stop(self): return False def ask(self): x = self.optimizer.ask(n_points=self.popsize) x = [xx for xx in x for i in range(self.resample)] return x def tell(self, solutions, damage): solutions = solutions[::self.resample] damage = chunks_(damage, self.resample) X, Y, rejected = [], [], [] for batch_id, batch in enumerate(damage): valid_batch = [x for x in batch if x is not None] valid_batch, outliers = dixon_test(valid_batch, pres=-1) if len(valid_batch) > 0: batch_mean = np.mean(valid_batch) X.append(solutions[batch_id]) Y.append(batch_mean) # else if all batch damages are None then we hope tell() will not # compalin about missing sample (according to manual, should be ok) if outliers: outliers = [x + self.resample*batch_id for x in outliers] rejected.append(outliers) res = self.optimizer.tell(X, Y) rejected = [y for x in rejected for y in x] # flatten list of rejected episodes' indexes return res, rejected def reeval(self, g, solutions, damage, hp): pass def log(self, root, alg, g, damage_info, reeval_damage_info, rejected): ibest = np.argmin(self.optimizer.yi) with open('{}/opt_bo.txt'.format(root), 'w') as f: for p in zip(self.optimizer.Xi,self.optimizer.yi): f.write(str(p)+'\n') with open('{}/{}-g{:04}.txt'.format(root, alg, g), 'w') as f: f.write(str(self.optimizer.yi[ibest])+'\n') f.write(str(self.optimizer.Xi[ibest])+'\n\n\n') for i, di in enumerate(damage_info): rej_symb = '*' if i in rejected else ' ' f.write('{:2d}'.format(i).rjust(3) + ': ' + rej_symb + str(di) + '\n') def save(self, root, fname): with open(root+'/'+fname, 'wb') as f: pickle.dump(self.optimizer, f) def load(self, root, fname): with open(root+'/'+fname, 'rb') as f: self.optimizer = pickle.load(f)
class SkOptOptimizer(PhotonSlaveOptimizer): def __init__(self, n_configurations: int = 20, acq_func: str = 'gp_hedge', acq_func_kwargs: dict = None): self.optimizer = None self.hyperparameter_list = [] self.metric_to_optimize = '' self.ask = self.ask_generator() self.n_configurations = n_configurations self.acq_func = acq_func self.acq_func_kwargs = acq_func_kwargs self.maximize_metric = True self.constant_dictionary = {} def prepare(self, pipeline_elements: list, maximize_metric: bool): self.hyperparameter_list = [] self.maximize_metric = maximize_metric # build space space = [] for pipe_element in pipeline_elements: if pipe_element.__class__.__name__ == 'Switch': error_msg = 'Scikit-Optimize cannot operate in the specified hyperparameter space with a Switch ' \ 'element. We recommend the use of SMAC.' logger.error(error_msg) raise ValueError(error_msg) if hasattr(pipe_element, 'hyperparameters'): for name, value in pipe_element.hyperparameters.items(): # if we only have one value we do not need to optimize if isinstance(value, list) and len(value) < 2: self.constant_dictionary[name] = value[0] continue if isinstance(value, PhotonCategorical) and len(value.values) < 2: self.constant_dictionary[name] = value.values[0] continue skopt_param = self._convert_PHOTON_to_skopt_space( value, name) if skopt_param is not None: space.append(skopt_param) if len(space) == 0: logger.warning( "Did not find any hyperparameters to convert into skopt space") self.optimizer = None else: self.optimizer = Optimizer(space, "ET", acq_func=self.acq_func, acq_func_kwargs=self.acq_func_kwargs) self.ask = self.ask_generator() def _convert_PHOTON_to_skopt_space(self, hyperparam: object, name: str): if not hyperparam: return None self.hyperparameter_list.append(name) if isinstance(hyperparam, PhotonCategorical): return skoptCategorical(hyperparam.values, name=name) elif isinstance(hyperparam, list): return skoptCategorical(hyperparam, name=name) elif isinstance(hyperparam, FloatRange): if hyperparam.range_type == 'linspace': return Real(hyperparam.start, hyperparam.stop, name=name, prior='uniform') elif hyperparam.range_type == 'logspace': return Real(hyperparam.start, hyperparam.stop, name=name, prior='log-uniform') else: return Real(hyperparam.start, hyperparam.stop, name=name) elif isinstance(hyperparam, IntegerRange): return Integer(hyperparam.start, hyperparam.stop, name=name) def ask_generator(self): if self.optimizer is None: yield {} else: for i in range(self.n_configurations): next_config_list = self.optimizer.ask() next_config_dict = { self.hyperparameter_list[number]: self._convert_to_native(value) for number, value in enumerate(next_config_list) } yield next_config_dict def _convert_to_native(self, obj): # check if we have a numpy object, if so convert it to python native if type(obj).__module__ == np.__name__: return obj.item() else: return obj def tell(self, config, performance): # convert dictionary to list in correct order if self.optimizer is not None: config_values = [config[name] for name in self.hyperparameter_list] best_config_metric_performance = performance[1] if self.maximize_metric: best_config_metric_performance = -best_config_metric_performance # random_accuracy = np.random.randn(1)[0] self.optimizer.tell(config_values, best_config_metric_performance)
class AgEBO(RegularizedEvolution): """Aging evolution with Bayesian Optimization. This algorithm build on the 'Regularized Evolution' from https://arxiv.org/abs/1802.01548. It cumulates Hyperparameter optimization with bayesian optimisation and Neural architecture search with regularized evolution. Args: problem (str): Module path to the Problem instance you want to use for the search (e.g. deephyper.benchmark.nas.linearReg.Problem). run (str): Module path to the run function you want to use for the search (e.g. deephyper.nas.run.quick). evaluator (str): value in ['balsam', 'subprocess', 'processPool', 'threadPool']. population_size (int, optional): the number of individuals to keep in the population. Defaults to 100. sample_size (int, optional): the number of individuals that should participate in each tournament. Defaults to 10. """ def __init__( self, problem, run, evaluator, population_size=100, sample_size=10, n_jobs=1, kappa=0.001, xi=0.000001, acq_func="LCB", **kwargs, ): super().__init__( problem=problem, run=run, evaluator=evaluator, population_size=population_size, sample_size=sample_size, **kwargs, ) self.n_jobs = int( n_jobs) # parallelism of BO surrogate model estimator # Initialize Hyperaparameter space self.hp_space = [] # add the 'learning_rate' space to the HPO search space self.hp_space.append( self.problem.space["hyperparameters"]["learning_rate"]) # add the 'batch_size' space to the HPO search space self.hp_space.append( self.problem.space["hyperparameters"]["batch_size"]) # add the 'num_ranks_per_node' space to the HPO search space self.hp_space.append( self.problem.space["hyperparameters"]["ranks_per_node"]) # Initialize opitmizer of hyperparameter space acq_func_kwargs = { "xi": float(xi), "kappa": float(kappa) } # tiny exploration self.n_initial_points = self.free_workers self.hp_opt = SkOptimizer( dimensions=self.hp_space, base_estimator=RandomForestRegressor(n_jobs=self.n_jobs), acq_func=acq_func, acq_optimizer="sampling", acq_func_kwargs=acq_func_kwargs, n_initial_points=self.n_initial_points, ) @staticmethod def _extend_parser(parser): RegularizedEvolution._extend_parser(parser) add_arguments_from_signature(parser, AgEBO) return parser def saved_keys(self, val: dict): res = { "learning_rate": val["hyperparameters"]["learning_rate"], "batch_size": val["hyperparameters"]["batch_size"], "ranks_per_node": val["hyperparameters"]["ranks_per_node"], "arch_seq": str(val["arch_seq"]), } return res def main(self): num_evals_done = 0 population = collections.deque(maxlen=self.population_size) # Filling available nodes at start self.evaluator.add_eval_batch( self.gen_random_batch(size=self.free_workers)) # Main loop while num_evals_done < self.max_evals: # Collecting finished evaluations new_results = list(self.evaluator.get_finished_evals()) if len(new_results) > 0: population.extend(new_results) stats = { "num_cache_used": self.evaluator.stats["num_cache_used"] } dhlogger.info(jm(type="env_stats", **stats)) self.evaluator.dump_evals(saved_keys=self.saved_keys) num_received = len(new_results) num_evals_done += num_received hp_results_X, hp_results_y = [], [] # If the population is big enough evolve the population if len(population) == self.population_size: children_batch = [] # For each new parent/result we create a child from it for new_i in range(len(new_results)): # select_sample indexes = np.random.choice(self.population_size, self.sample_size, replace=False) sample = [population[i] for i in indexes] # select_parent parent = self.select_parent(sample) # copy_mutate_parent child = self.copy_mutate_arch(parent) # add child to batch children_batch.append(child) # collect infos for hp optimization new_i_hps = new_results[new_i][0]["hyperparameters"] new_i_y = new_results[new_i][1] hp_new_i = [ new_i_hps["learning_rate"], new_i_hps["batch_size"], new_i_hps["ranks_per_node"], ] hp_results_X.append(hp_new_i) hp_results_y.append(-new_i_y) self.hp_opt.tell(hp_results_X, hp_results_y) #! fit: costly new_hps = self.hp_opt.ask(n_points=len(new_results)) for hps, child in zip(new_hps, children_batch): child["hyperparameters"]["learning_rate"] = hps[0] child["hyperparameters"]["batch_size"] = hps[1] child["hyperparameters"]["ranks_per_node"] = hps[2] # submit_childs if len(new_results) > 0: self.evaluator.add_eval_batch(children_batch) else: # If the population is too small keep increasing it # For each new parent/result we create a child from it for new_i in range(len(new_results)): new_i_hps = new_results[new_i][0]["hyperparameters"] new_i_y = new_results[new_i][1] hp_new_i = [ new_i_hps["learning_rate"], new_i_hps["batch_size"], new_i_hps["ranks_per_node"], ] hp_results_X.append(hp_new_i) hp_results_y.append(-new_i_y) self.hp_opt.tell(hp_results_X, hp_results_y) #! fit: costly new_hps = self.hp_opt.ask(n_points=len(new_results)) new_batch = self.gen_random_batch(size=len(new_results), hps=new_hps) self.evaluator.add_eval_batch(new_batch) def select_parent(self, sample: list) -> list: cfg, _ = max(sample, key=lambda x: x[1]) return cfg["arch_seq"] def gen_random_batch(self, size: int, hps: list = None) -> list: batch = [] if hps is None: points = self.hp_opt.ask(n_points=size) for point in points: #! DeepCopy is critical for nested lists or dicts cfg = copy.deepcopy(self.pb_dict) # hyperparameters cfg["hyperparameters"]["learning_rate"] = point[0] cfg["hyperparameters"]["batch_size"] = point[1] cfg["hyperparameters"]["ranks_per_node"] = point[2] # architecture DNA cfg["arch_seq"] = self.random_search_space() batch.append(cfg) else: # passed hps are used assert size == len(hps) for point in hps: #! DeepCopy is critical for nested lists or dicts cfg = copy.deepcopy(self.pb_dict) # hyperparameters cfg["hyperparameters"]["learning_rate"] = point[0] cfg["hyperparameters"]["batch_size"] = point[1] cfg["hyperparameters"]["ranks_per_node"] = point[2] # architecture DNA cfg["arch_seq"] = self.random_search_space() batch.append(cfg) return batch def random_search_space(self) -> list: return [np.random.choice(b + 1) for (_, b) in self.space_list] def copy_mutate_arch(self, parent_arch: list) -> dict: """ # ! Time performance is critical because called sequentialy Args: parent_arch (list(int)): embedding of the parent's architecture. Returns: dict: embedding of the mutated architecture of the child. """ i = np.random.choice(len(parent_arch)) child_arch = parent_arch[:] range_upper_bound = self.space_list[i][1] elements = [ j for j in range(range_upper_bound + 1) if j != child_arch[i] ] # The mutation has to create a different search_space! sample = np.random.choice(elements, 1)[0] child_arch[i] = sample cfg = self.pb_dict.copy() cfg["arch_seq"] = child_arch return cfg
class SKoptSearcher(BaseSearcher): """SKopt Searcher that uses Bayesian optimization to suggest new hyperparameter configurations. Requires that 'scikit-optimize' package is installed. Parameters ---------- configspace: ConfigSpace.ConfigurationSpace The configuration space to sample from. It contains the full specification of the Hyperparameters with their priors kwargs: Optional arguments passed to skopt.optimizer.Optimizer class. Please see documentation at this link: `skopt.optimizer.Optimizer <http://scikit-optimize.github.io/optimizer/index.html#skopt.optimizer.Optimizer>`_ These kwargs be used to specify which surrogate model Bayesian optimization should rely on, which acquisition function to use, how to optimize the acquisition function, etc. The skopt library provides comprehensive Bayesian optimization functionality, where popular non-default kwargs options here might include: - base_estimator = 'GP' or 'RF' or 'ET' or 'GBRT' (to specify different surrogate models like Gaussian Processes, Random Forests, etc) - acq_func = 'LCB' or 'EI' or 'PI' or 'gp_hedge' (to specify different acquisition functions like Lower Confidence Bound, Expected Improvement, etc) For example, we can tell our Searcher to perform Bayesian optimization with a Random Forest surrogate model and use the Expected Improvement acquisition function by invoking: `SKoptSearcher(cs, base_estimator='RF', acq_func='EI')` Examples -------- By default, the searcher is created along with the scheduler. For example: >>> import autogluon.core as ag >>> @ag.args( ... lr=ag.space.Real(1e-3, 1e-2, log=True)) >>> def train_fn(args, reporter): ... reporter(accuracy = args.lr ** 2) >>> search_options = {'base_estimator': 'RF', 'acq_func': 'EI'} >>> scheduler = ag.scheduler.FIFOScheduler( ... train_fn, searcher='skopt', search_options=search_options, ... num_trials=10, reward_attr='accuracy') This would result in a SKoptSearcher with cs = train_fn.cs. You can also create a SKoptSearcher by hand: >>> import autogluon.core as ag >>> @ag.args( ... lr=ag.space.Real(1e-3, 1e-2, log=True), ... wd=ag.space.Real(1e-3, 1e-2)) >>> def train_fn(args, reporter): ... pass >>> searcher = ag.searcher.SKoptSearcher(train_fn.cs) >>> searcher.get_config() {'lr': 0.0031622777, 'wd': 0.0055} >>> searcher = SKoptSearcher( ... train_fn.cs, reward_attribute='accuracy', base_estimator='RF', ... acq_func='EI') >>> next_config = searcher.get_config() >>> searcher.update(next_config, accuracy=10.0) # made-up value .. note:: - get_config() cannot ensure valid configurations for conditional spaces since skopt does not contain this functionality as it is not integrated with ConfigSpace. If invalid config is produced, `SKoptSearcher.get_config()` will catch these Exceptions and revert to `random_config()` instead. - get_config(max_tries) uses skopt's batch BayesOpt functionality to query at most max_tries number of configs to try out. If all of these have configs have already been scheduled to try (might happen in asynchronous setting), then get_config simply reverts to random search via random_config(). """ errors_tohandle = (ValueError, TypeError, RuntimeError) def __init__(self, configspace, **kwargs): super().__init__(configspace, reward_attribute=kwargs.get('reward_attribute')) self.hp_ordering = configspace.get_hyperparameter_names( ) # fix order of hyperparams in configspace. skopt_hpspace = [] for hp in self.hp_ordering: hp_obj = configspace.get_hyperparameter(hp) hp_type = str(type(hp_obj)).lower() # type of hyperparam if 'integer' in hp_type: hp_dimension = Integer(low=int(hp_obj.lower), high=int(hp_obj.upper), name=hp) elif 'float' in hp_type: if hp_obj.log: # log10-scale hyperparmeter hp_dimension = Real(low=float(hp_obj.lower), high=float(hp_obj.upper), prior='log-uniform', name=hp) else: hp_dimension = Real(low=float(hp_obj.lower), high=float(hp_obj.upper), name=hp) elif 'categorical' in hp_type: hp_dimension = Categorical(hp_obj.choices, name=hp) elif 'ordinal' in hp_type: hp_dimension = Categorical(hp_obj.sequence, name=hp) else: raise ValueError("unknown hyperparameter type: %s" % hp) skopt_hpspace.append(hp_dimension) skopt_keys = { 'base_estimator', 'n_random_starts', 'n_initial_points', 'acq_func', 'acq_optimizer', 'random_state', 'model_queue_size', 'acq_func_kwargs', 'acq_optimizer_kwargs' } skopt_kwargs = self._filter_skopt_kwargs(kwargs, skopt_keys) self.bayes_optimizer = Optimizer(dimensions=skopt_hpspace, **skopt_kwargs) @staticmethod def _filter_skopt_kwargs(kwargs, keys): return {k: v for k, v in kwargs.items() if k in keys} def configure_scheduler(self, scheduler): from ..scheduler import FIFOScheduler assert isinstance(scheduler, FIFOScheduler), \ "This searcher requires FIFOScheduler scheduler" super().configure_scheduler(scheduler) def get_config(self, **kwargs): """Function to sample a new configuration This function is called to query a new configuration that has not yet been tried. Asks for one point at a time from skopt, up to max_tries. If an invalid hyperparameter configuration is proposed by skopt, then reverts to random search (since skopt configurations cannot handle conditional spaces like ConfigSpace can). TODO: may loop indefinitely due to no termination condition (like RandomSearcher.get_config() ) Parameters ---------- max_tries: int, default = 1e2 The maximum number of tries to ask for a unique config from skopt before reverting to random search. """ max_tries = kwargs.get('max_tries', 1e2) if len( self._results ) == 0: # no hyperparams have been tried yet, first try default config return self.default_config() try: new_points = self.bayes_optimizer.ask( n_points=1) # initially ask for one new config new_config_cs = self.skopt2config( new_points[0]) # hyperparameter-config to evaluate try: new_config_cs.is_valid_configuration() new_config = new_config_cs.get_dictionary() if pickle.dumps(new_config) not in self._results.keys( ): # have not encountered this config self._results[pickle.dumps( new_config)] = self._reward_while_pending() return new_config except self.errors_tohandle: pass new_points = self.bayes_optimizer.ask( n_points=max_tries ) # ask skopt for many configs since first one was not new i = 1 # which new point to return as new_config, we already tried the first point above while i < max_tries: new_config_cs = self.skopt2config( new_points[i]) # hyperparameter-config to evaluate try: new_config_cs.is_valid_configuration() new_config = new_config_cs.get_dictionary() if (pickle.dumps(new_config) not in self._results.keys() ): # have not encountered this config self._results[pickle.dumps( new_config)] = self._reward_while_pending() return new_config except self.errors_tohandle: pass i += 1 except self.errors_tohandle: pass logger.info( "used random search instead of skopt to produce new hyperparameter configuration in this trial" ) return self.random_config() def default_config(self): """ Function to return the default configuration that should be tried first. Returns ------- returns: config """ new_config_cs = self.configspace.get_default_configuration() new_config = new_config_cs.get_dictionary() self._results[pickle.dumps(new_config)] = self._reward_while_pending() return new_config def random_config(self): """Function to randomly sample a new configuration (which is ensured to be valid in the case of conditional hyperparameter spaces). """ # TODO: may loop indefinitely due to no termination condition (like RandomSearcher.get_config() ) new_config = self.configspace.sample_configuration().get_dictionary() while pickle.dumps(new_config) in self._results.keys(): new_config = self.configspace.sample_configuration( ).get_dictionary() self._results[pickle.dumps(new_config)] = self._reward_while_pending() return new_config def update(self, config, **kwargs): """Update the searcher with the newest metric report. """ super().update(config, **kwargs) reward = kwargs[self._reward_attribute] try: self.bayes_optimizer.tell( self.config2skopt(config), -reward ) # provide negative reward since skopt performs minimization except self.errors_tohandle: logger.info("surrogate model not updated this trial") def config2skopt(self, config): """ Converts autogluon config (dict object) to skopt format (list object). Returns ------- Object of same type as: `skOpt.Optimizer.ask()` """ point = [] for hp in self.hp_ordering: point.append(config[hp]) return point def skopt2config(self, point): """ Converts skopt point (list object) to autogluon config format (dict object. Returns ------- Object of same type as: `RandomSampling.configspace.sample_configuration().get_dictionary()` """ config = self.configspace.sample_configuration() for i in range(len(point)): hp = self.hp_ordering[i] config[hp] = point[i] return config
class ScikitOptimizer(AbstractOptimizer): primary_import = "scikit-optimize" def __init__(self, api_config, base_estimator="GP", acq_func="gp_hedge", n_initial_points=5): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. base_estimator : {'GP', 'RF', 'ET', 'GBRT'} How to estimate the objective function. acq_func : {'LCB', 'EI', 'PI', 'gp_hedge', 'EIps', 'PIps'} Acquisition objective to decide next suggestion. n_initial_points : int Number of points to sample randomly before actual Bayes opt. """ AbstractOptimizer.__init__(self, api_config) dimensions, self.round_to_values = ScikitOptimizer.get_sk_dimensions( api_config) # Older versions of skopt don't copy over the dimensions names during # normalization and hence the names are missing in # self.skopt.space.dimensions. Therefore, we save our own copy of # dimensions list to be safe. If we can commit to using the newer # versions of skopt we can delete self.dimensions. self.dimensions_list = tuple(dd.name for dd in dimensions) self.skopt = SkOpt( dimensions, n_initial_points=n_initial_points, base_estimator=base_estimator, acq_func=acq_func, acq_optimizer="auto", acq_func_kwargs={}, acq_optimizer_kwargs={}, ) @staticmethod def get_sk_dimensions(api_config, transform="normalize"): """Help routine to setup skopt search space in constructor. Take api_config as argument so this can be static. """ # The ordering of iteration prob makes no difference, but just to be # safe and consistnent with space.py, I will make sorted. param_list = sorted(api_config.keys()) sk_dims = [] round_to_values = {} for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) # Some setup for case that whitelist of values is provided: values_only_type = param_type in ("cat", "ordinal") if (param_values is not None) and (not values_only_type): assert param_range is None param_values = np.unique(param_values) param_range = (param_values[0], param_values[-1]) round_to_values[param_name] = interp1d( param_values, param_values, kind="nearest", fill_value="extrapolate") if param_type == "int": # Integer space in sklearn does not support any warping => Need # to leave the warping as linear in skopt. sk_dims.append( Integer(param_range[0], param_range[-1], transform=transform, name=param_name)) elif param_type == "bool": assert param_range is None assert param_values is None sk_dims.append( Integer(0, 1, transform=transform, name=param_name)) elif param_type in ("cat", "ordinal"): assert param_range is None # Leave x-form to one-hot as per skopt default sk_dims.append(Categorical(param_values, name=param_name)) elif param_type == "real": # Skopt doesn't support all our warpings, so need to pick # closest substitute it does support. prior = "log-uniform" if param_space in ( "log", "logit") else "uniform" sk_dims.append( Real(param_range[0], param_range[-1], prior=prior, transform=transform, name=param_name)) else: assert False, "type %s not handled in API" % param_type return sk_dims, round_to_values def suggest(self, n_suggestions=1): """Get a suggestion from the optimizer. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ # First get list of lists from skopt.ask() next_guess = self.skopt.ask(n_points=n_suggestions) # Then convert to list of dicts next_guess = [dict(zip(self.dimensions_list, x)) for x in next_guess] # Now do the rounding, custom rounding is not supported in skopt. Note # that there is not nec a round function for each dimension here. for param_name, round_f in self.round_to_values.items(): for xx in next_guess: xx[param_name] = round_f(xx[param_name]) return next_guess def observe(self, X, y): """Send an observation of a suggestion back to the optimizer. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ # Supposedly skopt can handle blocks, but not sure about interface for # that. Just do loop to be safe for now. for xx, yy in zip(X, y): # skopt needs lists instead of dicts xx = [xx[dim_name] for dim_name in self.dimensions_list] # Just ignore, any inf observations we got, unclear if right thing if np.isfinite(yy): self.skopt.tell(xx, yy)
def gp_update_hyperparameter_optimization( eval_fn: Callable, hyperparams: Dict, search_key_ranges: Dict, n: int, save_results_to: Optional[str] = "gp_hyper_param_search_results.csv", m: int = 1, metric_should_increase: bool = True, metric_name: str = "mIoU", base: int = 2, n_initial_points: Optional[int] = None, prior: str = "log-uniform"): """ Multitask hyperparameter search with Gaussian process regression of values in search_key_ranges. Calls `eval_fn` with `params`, replacing values in `params` with expected improvement maximizing values sampled from the ranges in `search_key_ranges` for the keys that are in both `params` and `search_key_ranges`. Args: eval_fn: The function to call with params that returns a metric. hyperparams: Dictionary of kwargs that must be specified to call eval_fn. search_key_ranges: Dictionary mapping a key in params to a range to sample from. n: number of hyperparameter configurations to sample. m: number of train-val splits datasets to sample metric_should_increase: If true prior: Sample points from this distribution. E.g., "log-uniform" sample from a log scaled uniform distribution. Returns: Tuple of the sampled values in a dictionary with the same keys as search_key_ranges and the resulting metric. """ for key in search_key_ranges.keys(): assert key in hyperparams, "key: {} not in hyperparams: {}".format( key, hyperparams) if n_initial_points is None: n_initial_points = int(n / 2) print("Sampling {} points initially at random.".format(n_initial_points)) search_dim_types = { key: get_dim_type(val) for key, val in search_key_ranges.items() } dims = [ search_dim_types[key](domain[0], domain[1], prior=prior, base=base, name=key) for key, domain in search_key_ranges.items() if domain[0] != domain[1] ] dim_names = [dim.name for dim in dims] opt = Optimizer( dims, "GP", # Estimate metric as a function of lr using a Gaussian Process. acq_func='EI', # Use Expected Improvement as an acquisition function. acq_optimizer= "lbfgs", # Draw random samples from GP then optimize to find best lr to suggest. n_initial_points= n_initial_points, # First points will be completely random to avoid exploiting too early. ) results = [] for i in range(n): print("Running configuration sample {} of {}.".format(i + 1, n)) print("With sampled hyperparams:") sampled_list = opt.ask() sampled = {name: x for name, x in zip(dim_names, sampled_list)} print(sampled) hyperparams = insert_sampled_into_full_set_of_hyperparams( sampled, hyperparams) task_ids, num_steps, metrics = run_m(eval_fn, hyperparams, m) # Most recent metric observed for given params objective = np.nanmean(metrics) if metric_should_increase: objective *= -1 print("Objective value at sample {} of {}: {}".format( i + 1, n, objective)) opt_result = opt.tell(sampled_list, objective) results_i = (sampled, (task_ids, num_steps, metrics)) results.append(results_i) log_opt_progress(hyperparams, results_i, task_ids, num_steps, metrics, save_results_to) best_config, expected_best_step_num, best_metric = compute_best_configuration( results, metric_should_increase) return best_config, expected_best_step_num, best_metric, results
class AMBSMixed(NeuralArchitectureSearch): def __init__( self, problem, run, evaluator, surrogate_model="RF", acq_func="LCB", kappa=1.96, xi=0.001, liar_strategy="cl_min", n_jobs=1, **kwargs, ): super().__init__( problem=problem, run=run, evaluator=evaluator, **kwargs, ) self.n_jobs = int( n_jobs) # parallelism of BO surrogate model estimator self.kappa = float(kappa) self.xi = float(xi) self.n_initial_points = self.evaluator.num_workers self.liar_strategy = liar_strategy # Setup na_search_space = self.problem.build_search_space() self.hp_space = self.problem._hp_space #! hyperparameters self.hp_size = len(self.hp_space.space.get_hyperparameter_names()) self.na_space = HpProblem(self.problem.seed) for i, vnode in enumerate(na_search_space.variable_nodes): self.na_space.add_hyperparameter((0, vnode.num_ops - 1), name=f"vnode_{i:05d}") self.space = CS.ConfigurationSpace(seed=self.problem.seed) self.space.add_configuration_space( prefix="1", configuration_space=self.hp_space.space) self.space.add_configuration_space( prefix="2", configuration_space=self.na_space.space) # Initialize opitmizer of hyperparameter space self.opt = SkOptimizer( dimensions=self.space, base_estimator=self.get_surrogate_model(surrogate_model, self.n_jobs), acq_func=acq_func, acq_optimizer="sampling", acq_func_kwargs={ "xi": self.xi, "kappa": self.kappa }, n_initial_points=self.n_initial_points, ) @staticmethod def _extend_parser(parser): NeuralArchitectureSearch._extend_parser(parser) add_arguments_from_signature(parser, AMBSMixed) return parser def saved_keys(self, val: dict): res = {"arch_seq": str(val["arch_seq"])} hp_names = self.hp_space._space.get_hyperparameter_names() for hp_name in hp_names: if hp_name == "loss": res["loss"] = val["loss"] else: res[hp_name] = val["hyperparameters"][hp_name] return res def main(self): num_evals_done = 0 # Filling available nodes at start dhlogger.info( f"Generating {self.evaluator.num_workers} initial points...") self.evaluator.add_eval_batch( self.get_random_batch(size=self.n_initial_points)) # Main loop while num_evals_done < self.max_evals: # Collecting finished evaluations new_results = list(self.evaluator.get_finished_evals()) if len(new_results) > 0: stats = { "num_cache_used": self.evaluator.stats["num_cache_used"] } dhlogger.info(jm(type="env_stats", **stats)) self.evaluator.dump_evals(saved_keys=self.saved_keys) num_received = len(new_results) num_evals_done += num_received # Transform configurations to list to fit optimizer opt_X = [] opt_y = [] for cfg, obj in new_results: arch_seq = cfg["arch_seq"] hp_val = self.problem.extract_hp_values(cfg) x = replace_nan(hp_val + arch_seq) opt_X.append(x) opt_y.append(-obj) #! maximizing self.opt.tell(opt_X, opt_y) #! fit: costly new_X = self.opt.ask(n_points=len(new_results), strategy=self.liar_strategy) new_batch = [] for x in new_X: new_cfg = self.problem.gen_config(x[self.hp_size:], x[:self.hp_size]) new_batch.append(new_cfg) # submit_childs if len(new_results) > 0: self.evaluator.add_eval_batch(new_batch) def get_surrogate_model(self, name: str, n_jobs: int = None): """Get a surrogate model from Scikit-Optimize. Args: name (str): name of the surrogate model. n_jobs (int): number of parallel processes to distribute the computation of the surrogate model. Raises: ValueError: when the name of the surrogate model is unknown. """ accepted_names = ["RF", "ET", "GBRT", "GP", "DUMMY"] if not (name in accepted_names): raise ValueError( f"Unknown surrogate model {name}, please choose among {accepted_names}." ) if name == "RF": surrogate = skopt.learning.RandomForestRegressor(n_jobs=n_jobs) elif name == "ET": surrogate = skopt.learning.ExtraTreesRegressor(n_jobs=n_jobs) elif name == "GBRT": surrogate = skopt.learning.GradientBoostingQuantileRegressor( n_jobs=n_jobs) else: # for DUMMY and GP surrogate = name return surrogate def get_random_batch(self, size: int) -> list: batch = [] n_points = max(0, size - len(batch)) if n_points > 0: points = self.opt.ask(n_points=n_points) for point in points: point_as_dict = self.problem.gen_config( point[self.hp_size:], point[:self.hp_size]) batch.append(point_as_dict) return batch def to_dict(self, x: list) -> dict: hp_x = x[:self.hp_size] arch_seq = x[self.hp_size:] cfg = self.problem.space.copy() cfg["arch_seq"] = arch_seq return cfg
gpr = skopt.learning.GaussianProcessRegressor(kernel=cov_amplitude * other_kernel, normalize_y=True, noise=noise_level, n_restarts_optimizer=2) opt = Optimizer(spaceg, base_estimator=gpr, acq_optimizer="sampling") g_vals = [] # We first query the first n (here 10) points randomly for _ in range(10): next_x = opt.ask() g_val = g(next_x) g_vals.append(g_val) diff_val = g_val - prior(next_x, noise_level=0) print(next_x, diff_val) opt.tell(next_x, diff_val) # We also give it the max from the 1D GP x0 = [(-5, -5), (-5, -4), (-4, -5), (-4, -4)] for next_x in x0: g_val = g(next_x) g_vals.append(g_val) diff_val = g_val - prior(next_x, noise_level=0) opt.tell(next_x, diff_val) x = np.arange(-8, 9) y = np.arange(-8, 9) # We use xy for the GP xy = np.array([[xi, yi] for xi in x for yi in y]) xy_model = opt.space.transform(xy.tolist()) priorxy = np.array([prior(xy_i) for xy_i in xy])
record_time = str(datetime.datetime.now())[:19] this_experiment_path = experiments_folder / ('Hyperparameter sweep ' + record_time + '.csv') experiment_path_override = experiments_folder / experiment_path_override_ext experiment_path = this_experiment_path if (experiment_path_override_ext == '') else experiment_path_override if experiment_path.is_file(): print('\nBayesian fit to earlier experiments') config_results = pd.read_csv(experiment_path) run_id = config_results.shape[0] suggested = config_results.iloc[:, :-(1 + len(fixed_config))].values.tolist() target_scores = (config_results.iloc[:, -1].values).tolist() opt.tell(suggested, target_scores) else: run_id = 0 while max_runs is None or run_id < max_runs: print(f"Experiment {run_id+1} - {str(datetime.datetime.now())[:19]}") run_ids = opt.ask(1)[0] config = {config_range_keys[i]: v for i, v in enumerate(run_ids)} config.update(fixed_config) all_outputs = non_parametric_wifi_utils.multiple_floors_train_predict( config, df, debug_floor, None,
print("Acquisition function: %s" % optimizer.acq_func) print("Acquisition function kwargs:") print(optimizer.acq_func_kwargs) print("Acquisition function optimizer kwargs:") print(optimizer.acq_optimizer_kwargs) print("Maximum iterations: %d " % runopts.max_iter) print("Points per iteration: %d " % runopts.num_points) with Logger(params=case.params, queue=files, logfile=runopts.logfilename, best_dir=runopts.output_dir) as log: if fvals is not None: print("Initializing metamodel with given points") log.log_points(x, fvals) optimizer.tell(x, fvals) x = optimizer.ask(runopts.num_points) y_pred = optimizer.models[-1].predict(x) else: y_pred = None while N_iter < runopts.max_iter: # evaluate points (in parallel) print("Evaluating {num:d} points.".format(num=len(x))) y = list(executor.map(fun, x)) log() if y_pred is not None: err = np.abs(np.array(y) - np.array(y_pred)) print("Metamodel prediction error:") print(" Min: {0:.2e} Max: {1:.2e} Mean: {2:.2e}".format( np.min(err), np.max(err), np.mean(err))) print("Updating metamodel and asking for points", flush=True)
running_proc = [] run_id = 0 while run_id < n_trials: #print("new run: " + str(run_id)) while len(running_proc) < concurrent_train: print("new run: " + str(run_id)) suggested = bayesian_opt.ask() print(suggested) p_i = train_model_async(model.build(suggested), n_epochs=3, n_nodes=2, trial_id=run_id, get_output=True, model_name=model.get_name()) print((run_id, suggested, p_i)) running_proc.append((run_id, suggested, p_i)) run_id += 1 # infinite loop until some configuration to finish run_completed, par_i, proc_id = get_finished_process(running_proc) running_proc.remove( (run_completed, par_i, proc_id)) # remove completed process fname = '_'.join( [model.get_name(), str(run_completed), "history.json"]) y = get_fom_from_json_file(fname) result = bayesian_opt.tell(suggested, y) print('completed iteration:', run_completed, par_i, y) print("Best parameters: " + str(result.x))
def main(self): # Initializations and preliminaries comm = MPI.COMM_WORLD # get MPI communicator object size = comm.size # total number of processes rank = comm.rank # rank of this process status = MPI.Status() # get MPI status object comm.Barrier() start_time = time.time() # Master process executes code below if rank == 0: num_workers = size - 1 closed_workers = 0 space = [self.spaceDict[key] for key in self.params] print("space: ", space) eval_counter = 0 parDict = {} evalDict = {} resultsList = [] parDict['kappa'] = self.kappa init_x = [] delta = 0.05 #patience = max(100, 3 * num_workers-1) patience = len(self.params) * self.patience_fac last_imp = 0 curr_best = math.inf if self.base_estimator == 'NND': opt = Optimizer( space, base_estimator=NeuralNetworksDropoutRegressor(), acq_optimizer='sampling', acq_func=self.acq_func, acq_func_kwargs=parDict, random_state=seed) else: opt = Optimizer(space, base_estimator=self.base_estimator, acq_optimizer=self.acq_optimizer, acq_func=self.acq_func, acq_func_kwargs=parDict, random_state=seed, n_initial_points=self.n_initial_points) name2 = MPI.Get_processor_name() print('Master starting with %d workers %s' % (num_workers, name2)) while closed_workers < num_workers: data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() tag = status.Get_tag() elapsed_time = float(time.time() - start_time) print('elapsed_time:%1.3f' % elapsed_time) if tag == tags.READY: if last_imp < patience and eval_counter < self.max_evals and elapsed_time < self.max_time: if self.starting_point is not None: x = self.starting_point if num_workers - 1 > 0: init_x = opt.ask(n_points=num_workers - 1) self.starting_point = None else: if len(init_x) > 0: x = init_x.pop(0) else: x = opt.ask(n_points=1, strategy='cl_min')[0] key = str(x) print('sample %s' % key) if key in evalDict.keys(): print('%s already evalauted' % key) evalDict[key] = None task = {} task['x'] = x task['eval_counter'] = eval_counter task['rank_master'] = rank #task['start_time'] = elapsed_time print('Sending task {} to worker {}'.format( eval_counter, source)) comm.send(task, dest=source, tag=tags.START) eval_counter = eval_counter + 1 else: comm.send(None, dest=source, tag=tags.EXIT) elif tag == tags.DONE: result = data result['end_time'] = elapsed_time print('Got data from worker {}'.format(source)) resultsList.append(result) x = result['x'] y = result['cost'] opt.tell(x, y) percent_improv = -100 * ( (y + 0.1) - (curr_best + 0.1)) / (curr_best + 0.1) if y < curr_best: if percent_improv >= delta or curr_best == math.inf: curr_best = y last_imp = 0 else: last_imp = last_imp + 1 print( 'curr_best={} percent_improv={} patience={}/{}'.format( curr_best, percent_improv, last_imp, patience)) elif tag == tags.EXIT: print('Worker {} exited.'.format(source)) closed_workers = closed_workers + 1 resultsList = data print('Search finished..') #resultsList = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) #comm.recv(source=MPI.ANY_SOURCE, tag=tags.EXIT) #print(resultsList) saveResults(resultsList, self.results_json_fname, self.results_csv_fname) y_best = np.min(opt.yi) best_index = np.where(opt.yi == y_best)[0][0] x_best = opt.Xi[best_index] print('Best: x = {}; y={}'.format(y_best, x_best)) else: # Worker processes execute code below name = MPI.Get_processor_name() print("worker with rank %d on %s." % (rank, name)) resultsList = [] while True: comm.send(None, dest=0, tag=tags.READY) task = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) tag = status.Get_tag() if tag == tags.START: result = self.evaluate(self.problem, task, self.jobs_dir, self.results_dir) elapsed_time = float(time.time() - start_time) result['elapsed_time'] = elapsed_time print(result) resultsList.append(result) comm.send(result, dest=0, tag=tags.DONE) elif tag == tags.EXIT: print(f'Exit rank={comm.rank}') break comm.send(resultsList, dest=0, tag=tags.EXIT)
class Optimizer: SEED = 12345 def __init__(self, problem, num_workers, learner='RF', acq_func='gp_hedge', acq_kappa=1.96, liar_strategy='cl_max', n_jobs=-1, **kwargs): assert learner in [ "RF", "ET", "GBRT", "GP", "DUMMY" ], f"Unknown scikit-optimize base_estimator: {learner}" if learner == "RF": base_estimator = RandomForestRegressor(n_jobs=n_jobs) elif learner == "ET": base_estimator = ExtraTreesRegressor(n_jobs=n_jobs) elif learner == "GBRT": base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs) else: base_estimator = learner self.space = problem.space cs_kwargs = self.space['create_search_space'].get('kwargs') if cs_kwargs is None: search_space = self.space['create_search_space']['func']() else: search_space = self.space['create_search_space']['func']( **cs_kwargs) # // queue of remaining starting points # // self.starting_points = problem.starting_point n_init = np.inf if learner == 'DUMMY' else num_workers self.starting_points = [] # ! EMPTY for now TODO # Building search space for SkOptimizer skopt_space = [(0, vnode.num_ops - 1) for vnode in search_space.variable_nodes] self._optimizer = SkOptimizer(skopt_space, base_estimator=base_estimator, acq_optimizer='sampling', acq_func=acq_func, acq_func_kwargs={'kappa': acq_kappa}, random_state=self.SEED, n_initial_points=n_init) assert liar_strategy in "cl_min cl_mean cl_max".split() self.strategy = liar_strategy self.evals = {} self.counter = 0 logger.info("Using skopt.Optimizer with %s base_estimator" % learner) def _get_lie(self): if self.strategy == "cl_min": return min(self._optimizer.yi) if self._optimizer.yi else 0.0 elif self.strategy == "cl_mean": return np.mean(self._optimizer.yi) if self._optimizer.yi else 0.0 else: # self.strategy == "cl_max" return max(self._optimizer.yi) if self._optimizer.yi else 0.0 def _xy_from_dict(self): XX = list(self.evals.keys()) YY = [-self.evals[x] for x in XX] return XX, YY def to_dict(self, x): cfg = self.space.copy() cfg['arch_seq'] = list(x) return cfg def _ask(self): if len(self.starting_points) > 0: x = self.starting_points.pop() else: x = self._optimizer.ask() y = self._get_lie() key = tuple(x) if key not in self.evals: self.counter += 1 self._optimizer.tell(x, y) self.evals[key] = y logger.debug(f'_ask: {x} lie: {y}') else: logger.debug(f'Duplicate _ask: {x} lie: {y}') return self.to_dict(x) def ask(self, n_points=None, batch_size=20): if n_points is None: return self._ask() else: batch = [] for _ in range(n_points): batch.append(self._ask()) if len(batch) == batch_size: yield batch batch = [] if batch: yield batch def ask_initial(self, n_points): if len(self.starting_points) > 0: XX = [ self.starting_points.pop() for i in range(min(n_points, len(self.starting_points))) ] if len(XX) < n_points: XX += self._optimizer.ask(n_points=n_points - len(XX)) else: XX = self._optimizer.ask(n_points=n_points) for x in XX: y = self._get_lie() key = tuple(x) if key not in self.evals: self.counter += 1 self._optimizer.tell(x, y) self.evals[key] = y return [self.to_dict(x) for x in XX] def tell(self, xy_data): assert isinstance(xy_data, list), f"where type(xy_data)=={type(xy_data)}" minval = min(self._optimizer.yi) if self._optimizer.yi else 0.0 for x, y in xy_data: key = tuple(x['arch_seq']) assert key in self.evals, f"where key=={key} and self.evals=={self.evals}" logger.debug(f'tell: {x} --> {key}: evaluated objective: {y}') self.evals[key] = (y if y > float_info.min else minval) self._optimizer.Xi = [] self._optimizer.yi = [] XX, YY = self._xy_from_dict() assert len(XX) == len(YY) == self.counter, ( f"where len(XX)=={len(XX)}," f"len(YY)=={len(YY)}, self.counter=={self.counter}") self._optimizer.tell(XX, YY) assert len(self._optimizer.Xi) == len( self._optimizer.yi) == self.counter, ( f"where len(self._optimizer.Xi)=={len(self._optimizer.Xi)}, " f"len(self._optimizer.yi)=={len(self._optimizer.yi)}," f"self.counter=={self.counter}")
class Optimizer: SEED = 12345 def __init__( self, problem, num_workers, surrogate_model="RF", acq_func="gp_hedge", acq_kappa=1.96, liar_strategy="cl_max", n_jobs=1, **kwargs, ): assert surrogate_model in [ "RF", "ET", "GBRT", "GP", "DUMMY", ], f"Unknown scikit-optimize base_estimator: {surrogate_model}" if surrogate_model == "RF": base_estimator = RandomForestRegressor(n_jobs=n_jobs) elif surrogate_model == "ET": base_estimator = ExtraTreesRegressor(n_jobs=n_jobs) elif surrogate_model == "GBRT": base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs) else: base_estimator = surrogate_model self.space = problem.space # queue of remaining starting points self.starting_points = problem.starting_point n_init = (inf if surrogate_model == "DUMMY" else max( num_workers, len(self.starting_points))) self._optimizer = SkOptimizer( dimensions=self.space, base_estimator=base_estimator, acq_optimizer="sampling", acq_func=acq_func, acq_func_kwargs={"kappa": acq_kappa}, random_state=self.SEED, n_initial_points=n_init, ) assert liar_strategy in "cl_min cl_mean cl_max".split() self.strategy = liar_strategy self.evals = {} self.counter = 0 logger.info( f"Using skopt.Optimizer with {surrogate_model} base_estimator") def _get_lie(self): if self.strategy == "cl_min": return min(self._optimizer.yi) if self._optimizer.yi else 0.0 elif self.strategy == "cl_mean": return np.mean(self._optimizer.yi) if self._optimizer.yi else 0.0 else: return max(self._optimizer.yi) if self._optimizer.yi else 0.0 def _xy_from_dict(self): XX = [] for key in self.evals.keys(): x = tuple(convert2np(k) for k in key) XX.append(x) YY = [-self.evals[x] for x in self.evals.keys()] # ! "-" maximizing return XX, YY def dict_to_xy(self, xy_dict: dict): XX = [] for key in xy_dict.keys(): x = [convert2np(k) for k in key] XX.append(x) YY = [-xy_dict[x] for x in xy_dict.keys()] # ! "-" maximizing return XX, YY def to_dict(self, x: list) -> dict: res = {} hps_names = self.space.get_hyperparameter_names() for i in range(len(x)): res[hps_names[i]] = "nan" if isnan(x[i]) else x[i] return res def _ask(self): if len(self.starting_points) > 0: x = self.starting_points.pop() else: x = self._optimizer.ask() y = self._get_lie() key = tuple(self.to_dict(x).values()) if key not in self.evals: self.counter += 1 self._optimizer.tell(x, y) self.evals[key] = y logger.debug(f"_ask: {x} lie: {y}") else: logger.debug(f"Duplicate _ask: {x} lie: {y}") return self.to_dict(x) def ask(self, n_points=None, batch_size=20): if n_points is None: return self._ask() else: batch = [] for _ in range(n_points): batch.append(self._ask()) if len(batch) == batch_size: yield batch batch = [] if batch: yield batch def ask_initial(self, n_points): if len(self.starting_points) > 0: XX = [ self.starting_points.pop() for i in range(min(n_points, len(self.starting_points))) ] if len(XX) < n_points: XX += self._optimizer.ask(n_points=n_points - len(XX)) else: XX = self._optimizer.ask(n_points=n_points) for x in XX: y = self._get_lie() x = [convert2np(xi) for xi in x] key = tuple(self.to_dict(x).values()) if key not in self.evals: self.counter += 1 self._optimizer.tell(x, y) self.evals[key] = y return [self.to_dict(x) for x in XX] def tell(self, xy_data): assert isinstance(xy_data, list), f"where type(xy_data)=={type(xy_data)}" minval = min(self._optimizer.yi) if self._optimizer.yi else 0.0 xy_dict = {} for x, y in xy_data: key = tuple(x[k] for k in self.space) assert key in self.evals, f"where key=={key} and self.evals=={self.evals}" logger.debug(f"tell: {x} --> {key}: evaluated objective: {y}") self.evals[key] = y if y > np.finfo(np.float32).min else minval xy_dict[key] = y if y > np.finfo(np.float32).min else minval XX, YY = self.dict_to_xy(xy_dict) selection = [ (xi, yi) for xi, yi in zip(self._optimizer.Xi, self._optimizer.yi) if not (any([equal_list(xi, x) for x in XX])) # all([diff(xi, x) for x in XX]) ] new_Xi, new_yi = list(zip(*selection)) if len(selection) > 0 else ([], []) new_Xi, new_yi = list(new_Xi), list(new_yi) self._optimizer.Xi = new_Xi self._optimizer.yi = new_yi self._optimizer.tell(XX, YY) assert len(self._optimizer.Xi) == len( self._optimizer.yi) == self.counter, ( f"where len(self._optimizer.Xi)=={len(self._optimizer.Xi)}, " f"len(self._optimizer.yi)=={len(self._optimizer.yi)}," f"self.counter=={self.counter}")
class AgeBO(RegularizedEvolution): """Regularized evolution. https://arxiv.org/abs/1802.01548 Args: problem (str): Module path to the Problem instance you want to use for the search (e.g. deephyper.benchmark.nas.linearReg.Problem). run (str): Module path to the run function you want to use for the search (e.g. deephyper.nas.run.quick). evaluator (str): value in ['balsam', 'subprocess', 'processPool', 'threadPool']. population_size (int, optional): the number of individuals to keep in the population. Defaults to 100. sample_size (int, optional): the number of individuals that should participate in each tournament. Defaults to 10. """ def __init__( self, problem, run, evaluator, population_size=100, sample_size=10, plot="true", n_jobs=1, **kwargs, ): super().__init__( problem=problem, run=run, evaluator=evaluator, population_size=population_size, sample_size=sample_size, **kwargs, ) self.do_plot = plot == "true" self.n_jobs = int(n_jobs) # Initialize Hyperaparameter space # self.hp_space = cs.ConfigurationSpace(seed=42) # self.hp_space.add_hyperparameter( # check_hyperparameter( # self.problem.space["hyperparameters"]["learning_rate"], "learning_rate" # ) # ) # self.hp_space.add_hyperparameter( # check_hyperparameter( # self.problem.space["hyperparameters"]["batch_size"], "batch_size" # ) # ) self.hp_space = [] self.hp_space.append( self.problem.space["hyperparameters"]["learning_rate"]) # ploting lr_range = self.problem.space["hyperparameters"]["learning_rate"][:2] self.domain_x = np.linspace(*lr_range, 400).reshape(-1, 1) # Initialize opitmizer of hyperparameter space acq_func_kwargs = {"xi": 0.000001, "kappa": 0.001} # tiny exploration self.n_initial_points = self.free_workers self.hp_opt = SkOptimizer( dimensions=self.hp_space, base_estimator=RandomForestRegressor(n_jobs=32), # base_estimator=RandomForestRegressor(n_jobs=self.n_jobs), acq_func="LCB", acq_optimizer="sampling", acq_func_kwargs=acq_func_kwargs, n_initial_points=self.n_initial_points, # model_queue_size=100, ) @staticmethod def _extend_parser(parser): RegularizedEvolution._extend_parser(parser) add_arguments_from_signature(parser, AgeBO) return parser def saved_keys(self, val: dict): res = { "learning_rate": val["hyperparameters"]["learning_rate"], "batch_size": val["hyperparameters"]["batch_size"], "arch_seq": str(val["arch_seq"]), } return res def main(self): num_evals_done = 0 it = 0 population = collections.deque(maxlen=self.population_size) # Filling available nodes at start self.evaluator.add_eval_batch( self.gen_random_batch(size=self.free_workers)) # Main loop while num_evals_done < self.max_evals: # Collecting finished evaluations new_results = list(self.evaluator.get_finished_evals()) if len(new_results) > 0: population.extend(new_results) stats = { "num_cache_used": self.evaluator.stats["num_cache_used"] } dhlogger.info(jm(type="env_stats", **stats)) self.evaluator.dump_evals(saved_keys=self.saved_keys) num_received = len(new_results) num_evals_done += num_received hp_results_X, hp_results_y = [], [] # If the population is big enough evolve the population if len(population) == self.population_size: children_batch = [] # For each new parent/result we create a child from it for new_i in range(len(new_results)): # select_sample indexes = np.random.choice(self.population_size, self.sample_size, replace=False) sample = [population[i] for i in indexes] # select_parent parent = self.select_parent(sample) # copy_mutate_parent child = self.copy_mutate_arch(parent) # add child to batch children_batch.append(child) # hpo # collect infos for hp optimization new_i_hps = new_results[new_i][0]["hyperparameters"] new_i_y = new_results[new_i][1] hp_new_i = [new_i_hps["learning_rate"]] hp_results_X.append(hp_new_i) hp_results_y.append(-new_i_y) self.hp_opt.tell(hp_results_X, hp_results_y) #! fit: costly new_hps = self.hp_opt.ask(n_points=len(new_results)) for hps, child in zip(new_hps, children_batch): child["hyperparameters"]["learning_rate"] = hps[0] # submit_childs if len(new_results) > 0: self.evaluator.add_eval_batch(children_batch) else: # If the population is too small keep increasing it # For each new parent/result we create a child from it for new_i in range(len(new_results)): new_i_hps = new_results[new_i][0]["hyperparameters"] new_i_y = new_results[new_i][1] # hp_new_i = [new_i_hps["learning_rate"], new_i_hps["batch_size"]] hp_new_i = [new_i_hps["learning_rate"]] hp_results_X.append(hp_new_i) hp_results_y.append(-new_i_y) self.hp_opt.tell(hp_results_X, hp_results_y) #! fit: costly new_hps = self.hp_opt.ask(n_points=len(new_results)) new_batch = self.gen_random_batch(size=len(new_results), hps=new_hps) self.evaluator.add_eval_batch(new_batch) try: self.plot_optimizer(x=self.domain_x, it=it) it += 1 except: pass # def ask(self, n_points=None, batch_size=20): # if n_points is None: # return self._ask() # else: # batch = [] # for _ in range(n_points): # batch.append(self._ask()) # if len(batch) == batch_size: # yield batch # batch = [] # if batch: # yield batch def select_parent(self, sample: list) -> list: cfg, _ = max(sample, key=lambda x: x[1]) return cfg["arch_seq"] def gen_random_batch(self, size: int, hps: list = None) -> list: batch = [] if hps is None: points = self.hp_opt.ask(n_points=size) for point in points: #! DeepCopy is critical for nested lists or dicts cfg = copy.deepcopy(self.pb_dict) # hyperparameters cfg["hyperparameters"]["learning_rate"] = point[0] # cfg["hyperparameters"]["batch_size"] = point[1] # architecture dna cfg["arch_seq"] = self.random_search_space() batch.append(cfg) else: # passed hps are used assert size == len(hps) for point in hps: #! DeepCopy is critical for nested lists or dicts cfg = copy.deepcopy(self.pb_dict) # hyperparameters cfg["hyperparameters"]["learning_rate"] = point[0] # cfg["hyperparameters"]["batch_size"] = point[1] # architecture dna cfg["arch_seq"] = self.random_search_space() batch.append(cfg) return batch def random_search_space(self) -> list: return [np.random.choice(b + 1) for (_, b) in self.space_list] def copy_mutate_arch(self, parent_arch: list) -> dict: """ # ! Time performance is critical because called sequentialy Args: parent_arch (list(int)): embedding of the parent's architecture. Returns: dict: embedding of the mutated architecture of the child. """ i = np.random.choice(len(parent_arch)) child_arch = parent_arch[:] range_upper_bound = self.space_list[i][1] elements = [ j for j in range(range_upper_bound + 1) if j != child_arch[i] ] # The mutation has to create a different search_space! sample = np.random.choice(elements, 1)[0] child_arch[i] = sample cfg = self.pb_dict.copy() cfg["arch_seq"] = child_arch return cfg def plot_optimizer(self, x, it=0): opt = self.hp_opt model = opt.models[-1] x_model = opt.space.transform(x.tolist()) plt.figure(figsize=(6.4 * 2, 4.8)) plt.subplot(1, 2, 1) # Plot Model(x) + contours y_pred, sigma = model.predict(x_model, return_std=True) y_pred *= -1 plt.plot(x, y_pred, "g--", label=r"$\mu(x)$") plt.fill( np.concatenate([x, x[::-1]]), np.concatenate( [y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=0.2, fc="g", ec="None", ) # Plot sampled points W = 10 yi = np.array(opt.yi)[-W:] * -1 Xi = opt.Xi[-W:] plt.plot(Xi, yi, "r.", markersize=8, label="Observations") plt.grid() plt.legend(loc="best") plt.xlim(0.001, 0.1) plt.ylim(0, 1) plt.xlabel("Learning Rate") plt.ylabel("Objective") plt.xscale("log") ax = plt.gca() ax.xaxis.set_major_locator(ticker.FixedLocator([0.001, 0.01, 0.1])) ax.xaxis.set_major_formatter( ticker.FixedFormatter(["0.001", "0.01", "0.1"])) ax.yaxis.set_major_locator(ticker.MultipleLocator(0.2)) # LCB plt.subplot(1, 2, 2) acq = gaussian_lcb(x_model, model) * -1 plt.plot(x, acq, "b", label="UCB(x)") plt.fill_between(x.ravel(), 0.0, acq.ravel(), alpha=0.3, color="blue") plt.xlabel("Learning Rate") # Adjust plot layout plt.grid() plt.legend(loc="best") plt.xlim(0.001, 0.1) plt.ylim(0, 1) plt.xscale("log") ax = plt.gca() ax.xaxis.set_major_locator(ticker.FixedLocator([0.001, 0.01, 0.1])) ax.xaxis.set_major_formatter( ticker.FixedFormatter(["0.001", "0.01", "0.1"])) ax.yaxis.set_major_locator(ticker.MultipleLocator(0.2)) # Save Figure plt.savefig(f"opt-{it:05}.png", dpi=100) plt.close()
def fit(self, X, Y, total_duration=6e7, n_iter=100, cv_iter=None, optimizer=None, acq_func='gp_hedge', **kwargs): start = datetime.now() def splitter(itr): for train_idx, test_idx in itr: yield X[train_idx], Y[train_idx], X[test_idx], Y[test_idx] def splitter_dict(itr_dict): n_splits = len(list(itr_dict.values())[0]) for i in range(n_splits): X_train = dict() Y_train = dict() X_test = dict() Y_test = dict() for n_obj, itr in itr_dict.items(): train_idx = itr[i][0] test_idx = itr[i][1] X_train[n_obj] = np.copy(X[n_obj][train_idx]) X_test[n_obj] = np.copy(X[n_obj][test_idx]) Y_train[n_obj] = np.copy(Y[n_obj][train_idx]) Y_test[n_obj] = np.copy(Y[n_obj][test_idx]) yield X_train, Y_train, X_test, Y_test if cv_iter is None: cv_iter = ShuffleSplit(n_splits=3, test_size=0.1, random_state=self.random_state) if isinstance(X, dict): splits = dict() for n_obj, arr in X.items(): if arr.shape[0] == 1: splits[n_obj] = [([0], [0]) for i in range(cv_iter.n_splits)] else: splits[n_obj] = list(cv_iter.split(arr)) else: splits = list(cv_iter.split(X)) # Pre-compute splits for reuse # Here we fix a random seed for all simulations to correlate the random # streams: seed = self.random_state.randint(2**32, dtype='uint32') self.logger.debug( 'Random seed for the ranking algorithm: {}'.format(seed)) opt_seed = self.random_state.randint(2**32, dtype='uint32') self.logger.debug('Random seed for the optimizer: {}'.format(opt_seed)) gp_seed = self.random_state.randint(2**32, dtype='uint32') self.logger.debug( 'Random seed for the GP surrogate: {}'.format(gp_seed)) if optimizer is not None: opt = optimizer self.logger.debug('Setting the provided optimizer') self.log_best_params(opt) else: transformed = [] for param in self.parameter_ranges: transformed.append(check_dimension(param)) self.logger.info("Parameter Space: {}".format(transformed)) space = normalize_dimensions(transformed) self.logger.info( "Parameter Space after transformation: {}".format(space)) # Todo: Make this passable base_estimator = cook_estimator("GP", space=space, random_state=gp_seed, noise="gaussian") opt = Optimizer(dimensions=self.parameter_ranges, random_state=opt_seed, base_estimator=base_estimator, acq_func=acq_func, **kwargs) self._callbacks_set_optimizer(opt) self._callbacks_on_optimization_begin() time_taken = duration_tillnow(start) total_duration -= time_taken max_fit_duration = -10000 self.logger.info('Time left for {} iterations is {}'.format( n_iter, microsec_to_time(total_duration))) try: for t in range(n_iter): start = datetime.now() self._callbacks_on_iteration_begin(t) self.logger.info( 'Starting optimization iteration: {}'.format(t)) if t > 0: self.log_best_params(opt) next_point = opt.ask() self.logger.info('Next parameters:\n{}'.format(next_point)) results = [] running_times = [] if isinstance(X, dict): for X_train, Y_train, X_test, Y_test in splitter_dict( splits): result, time_taken = self._fit_ranker( X_train, Y_train, X_test, Y_test, next_point) running_times.append(time_taken) results.append(result) else: for X_train, Y_train, X_test, Y_test in splitter(splits): result, time_taken = self._fit_ranker( X_train, Y_train, X_test, Y_test, next_point) running_times.append(time_taken) results.append(result) results = np.array(results) running_times = np.array(running_times) mean_result = np.mean(results) mean_fitting_duration = np.mean(running_times) # Storing the maximum time to run the splitting model and adding the time for out of sample evaluation if max_fit_duration < np.sum(running_times): max_fit_duration = np.sum(running_times) self.logger.info( 'Validation error for the parameters is {:.4f}'.format( mean_result)) self.logger.info('Time taken for the parameters is {}'.format( microsec_to_time(np.sum(running_times)))) if "ps" in opt.acq_func: opt.tell(next_point, [mean_result, mean_fitting_duration]) else: opt.tell(next_point, mean_result) self._callbacks_on_iteration_end(t) self.logger.info( "Main optimizer iterations done {} and saving the model". format(np.array(opt.yi).shape[0])) dump(opt, self.optimizer_path) time_taken = duration_tillnow(start) total_duration -= time_taken self.logger.info('Time left for simulations is {} '.format( microsec_to_time(total_duration))) if (total_duration - max_fit_duration) < 0: self.logger.info( 'At iteration {} maximum time required by model to validate a parameter values' .format(microsec_to_time(max_fit_duration))) self.logger.info( 'At iteration {} simulation stops, due to time deficiency' .format(t)) break except KeyboardInterrupt: self.logger.debug( 'Optimizer interrupted saving the model at {}'.format( self.optimizer_path)) self.log_best_params(opt) else: self.logger.debug( 'Finally, fit a model on the complete training set and storing the model at {}' .format(self.optimizer_path)) self._fit_params["epochs"] = self._fit_params.get("epochs", 1000) if "ps" in opt.acq_func: best_point = opt.Xi[np.argmin(np.array(opt.yi)[:, 0])] else: best_point = opt.Xi[np.argmin(opt.yi)] self._set_new_parameters(best_point) self.model = copy.copy(self.ranker) self.model.fit(X, Y, **self._fit_params) finally: self._callbacks_on_optimization_end() self.optimizer = opt if np.array(opt.yi).shape[0] != 0: dump(opt, self.optimizer_path)
task['eval_counter'] = eval_counter task['start_time'] = elapsed_time print('Sending task {} to worker {}'.format( eval_counter, source)) comm.send(task, dest=source, tag=tags.START) eval_counter = eval_counter + 1 else: comm.send(None, dest=source, tag=tags.EXIT) elif tag == tags.DONE: result = data result['end_time'] = elapsed_time print('Got data from worker {}'.format(source)) resultsList.append(result) x = result['x'] y = result['cost'] opt.tell(x, y) percent_improv = -100 * (y - curr_best) / curr_best if y < curr_best: if percent_improv >= delta or curr_best == math.inf: curr_best = y last_imp = 0 else: last_imp = last_imp + 1 print('curr_best={} percent_improv={} patience={}/{}'.format( curr_best, percent_improv, last_imp, patience)) elif tag == tags.EXIT: print('Worker {} exited.'.format(source)) closed_workers = closed_workers + 1 print('Search finished..') y_best = np.min(opt.yi) best_index = np.where(opt.yi == y_best)[0][0]
def run(): start_time = time.time() print("run() start: {}".format(str(datetime.datetime.now()))) comm = MPI.COMM_WORLD # get MPI communicator object size = comm.size # total number of processes rank = comm.rank # rank of this process status = MPI.Status() # get MPI status object print("ME rank is {}".format(rank)) instance = problem.Problem() spaceDict = instance.space params = instance.params global problem_params problem_params = params starting_point = instance.starting_point # handshake to ensure working eqpy.OUT_put("Params") # initial parameter set telling us the number of times to run the loop initparams = eqpy.IN_get() (init_size, max_evals, num_workers, num_buffer, seed, max_threshold, n_jobs) = eval('{}'.format(initparams)) space = [spaceDict[key] for key in params] print(space) parDict = {} resultsList = [] parDict['kappa'] = 1.96 # can set to num cores parDict['n_jobs'] = n_jobs init_x = [] opt = Optimizer(space, base_estimator='RF', acq_optimizer='sampling', acq_func='LCB', acq_func_kwargs=parDict, random_state=seed) eval_counter = 0 askedDict = {} print( "Master starting with {} init_size, {} max_evals, {} num_workers, {} num_buffer, {} max_threshold" .format(init_size, max_evals, num_workers, num_buffer, max_threshold)) x = opt.ask(n_points=init_size) res, resstring = create_list_of_json_strings(x) print("Initial design is {}".format(resstring)) for r, xx in zip(res, x): askedDict[r] = xx eqpy.OUT_put(resstring) currently_out = init_size total_out = init_size results = [] group = comm.Get_group() # Assumes only one adlb_server # num_workers + 1 = num_turbine_workers newgroup = group.Excl([num_workers + 1]) #print("ME newgroup size is {}".format(newgroup.size)) newcomm = comm.Create_group(newgroup, 1) nrank = newcomm.rank #print("ME nrank is {}".format(nrank)) counter_threshold = 1 counter = 0 end_iter_time = 0 while eval_counter < max_evals: start_iter_time = time.time() print("\neval_counter = {}".format(eval_counter)) data = newcomm.recv(source=MPI.ANY_SOURCE, status=status) counter = counter + 1 xstring = data['x'] x = askedDict[xstring] y = data['cost'] if math.isnan(y): y = sys.float_info.max opt.tell(x, y) #source = status.Get_source() #tag = status.Get_tag() elapsed_time = float(time.time() - start_time) print('elapsed_time:%1.3f' % elapsed_time) results.append(str(data)) eval_counter = eval_counter + 1 currently_out = currently_out - 1 # if jobs are finishing within 16 seconds of # each other, then batch the point production if start_iter_time - end_iter_time < 16: counter_threshold = max_threshold if max_evals - eval_counter < counter_threshold: counter_threshold = max_evals - eval_counter if counter_threshold > currently_out: counter_threshold = currently_out else: counter_threshold = 1 print("counter_threshold: {}".format(counter_threshold)) print("currently_out:{}, total_out:{}".format(currently_out, total_out)) if currently_out < num_workers + num_buffer and total_out < max_evals and counter >= counter_threshold: n_points = counter if n_points + total_out > max_evals: n_points = max_evals - total_out ts = time.time() x = opt.ask(n_points=n_points) res, resstring = create_list_of_json_strings(x) for r, xx in zip(res, x): askedDict[r] = xx eqpy.OUT_put(resstring) print('point production elapsed_time:%1.3f' % float(time.time() - ts)) currently_out = currently_out + n_points total_out = total_out + n_points counter = 0 end_iter_time = start_iter_time print('Search finishing') eqpy.OUT_put("DONE") eqpy.OUT_put(";".join(results))
# FILE FOR SAVING PROGRESS IN SKOPT-FORMAT checkpoint_callback = callbacks.CheckpointSaver("./result.pkl") checkpoint_saver = CheckpointSaver("./checkpoint.pkl", compress=9) # BOUNDS FOR THE FOUR VARIABLES bounds = [(0.0, 10.0), (-30.0, 0.0), (-30.0, 0.0), (-30.0, 0.0)] bounds2 = [(-30.0, 0.0), (-30.0, 0.0)] opt = Optimizer(bounds2, "ET", acq_optimizer="sampling", noise=float(os.environ['NOISE'])**2) for i in range(int(os.environ['OPT_STEPS'])): next_x = opt.ask() f_val = f2(next_x) opt.tell(next_x, f_val) with open('my-optimizer.pkl', 'wb') as file1: pickle.dump(opt, file1) # RUN OPTIMIZATION # gp_minimize(f2, # bounds2, # n_calls=int(os.environ['OPT_STEPS']), # random_state=int(os.environ['OPT_INIT_STEPS']), # callback=[checkpoint_callback], # noise=float(os.environ['NOISE'])**2 # )
class SkOptOptimizer(PhotonBaseOptimizer): def __init__( self, n_configurations: int = 20, acq_func: str = "gp_hedge", acq_func_kwargs: dict = None, ): self.optimizer = None self.hyperparameter_list = [] self.metric_to_optimize = "" self.ask = self.ask_generator() self.n_configurations = n_configurations self.acq_func = acq_func self.acq_func_kwargs = acq_func_kwargs self.maximize_metric = True self.constant_dictionary = {} def prepare(self, pipeline_elements: list, maximize_metric: bool): self.hyperparameter_list = [] self.maximize_metric = maximize_metric # build space space = [] for pipe_element in pipeline_elements: if hasattr(pipe_element, "hyperparameters"): for name, value in pipe_element.hyperparameters.items(): # if we only have one value we do not need to optimize if isinstance(value, list) and len(value) < 2: self.constant_dictionary[name] = value[0] continue if isinstance(value, PhotonCategorical) and len(value.values) < 2: self.constant_dictionary[name] = value.values[0] continue skopt_param = self._convert_PHOTON_to_skopt_space( value, name) if skopt_param is not None: space.append(skopt_param) if len(space) == 0: logger.warn( "Did not find any hyperparameters to convert into skopt space") self.optimizer = None else: self.optimizer = Optimizer( space, "ET", acq_func=self.acq_func, acq_func_kwargs=self.acq_func_kwargs, ) self.ask = self.ask_generator() def _convert_PHOTON_to_skopt_space(self, hyperparam: object, name: str): if not hyperparam: return None self.hyperparameter_list.append(name) if isinstance(hyperparam, PhotonCategorical): return skoptCategorical(hyperparam.values, name=name) elif isinstance(hyperparam, list): return skoptCategorical(hyperparam, name=name) elif isinstance(hyperparam, FloatRange): if hyperparam.range_type == "linspace": return Real(hyperparam.start, hyperparam.stop, name=name, prior="uniform") elif hyperparam.range_type == "logspace": return Real(hyperparam.start, hyperparam.stop, name=name, prior="log-uniform") else: return Real(hyperparam.start, hyperparam.stop, name=name) elif isinstance(hyperparam, IntegerRange): return Integer(hyperparam.start, hyperparam.stop, name=name) def ask_generator(self): if self.optimizer is None: yield {} else: for i in range(self.n_configurations): next_config_list = self.optimizer.ask() next_config_dict = { self.hyperparameter_list[number]: self._convert_to_native(value) for number, value in enumerate(next_config_list) } yield next_config_dict def _convert_to_native(self, obj): # check if we have a numpy object, if so convert it to python native if type(obj).__module__ == np.__name__: return np.asscalar(obj) else: return obj def tell(self, config, performance): # convert dictionary to list in correct order if self.optimizer is not None: config_values = [config[name] for name in self.hyperparameter_list] best_config_metric_performance = performance[1] if self.maximize_metric: if isinstance(best_config_metric_performance, list): print("BEST CONFIG METRIC PERFORMANCE: " + str(best_config_metric_performance)) best_config_metric_performance = best_config_metric_performance[ 0] best_config_metric_performance = -best_config_metric_performance # random_accuracy = np.random.randn(1)[0] self.optimizer.tell(config_values, best_config_metric_performance) def plot_evaluations(self): results = SkoptResults() results.space = self.optimizer.space results.x_iters = self.optimizer.Xi results = self._convert_categorical_hyperparameters(results) results.x = results.x_iters[np.argmin(self.optimizer.yi)] plt.figure(figsize=(10, 10)) return plot_evaluations(results) def plot_objective(self): results = SkoptResults() results.space = self.optimizer.space results.x_iters = self.optimizer.Xi results = self._convert_categorical_hyperparameters(results) results.x = results.x_iters[np.argmin(self.optimizer.yi)] results.models = self.optimizer.models plt.figure(figsize=(10, 10)) return plot_objective(results) def _convert_categorical_hyperparameters(self, results): parameter_types = list() for i, dim in enumerate(results.space.dimensions): if isinstance(dim, skoptCategorical): parameter_types.append(dim.transformer) setattr(results.space.dimensions[i], "categories", dim.transformed_bounds) else: parameter_types.append(False) for i, xs in enumerate(results.x_iters): for k, xsk in enumerate(xs): if parameter_types[k]: results.x_iters[i][k] = parameter_types[k].transform([xsk]) return results
from skopt import Optimizer from skopt.space import Real from joblib import Parallel, delayed # example objective taken from skopt from skopt.benchmarks import branin optimizer = Optimizer( dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)], random_state=1, base_estimator='gp' ) for i in range(10): x = optimizer.ask(n_points=4) # x is a list of n_points points y = Parallel(n_jobs=4)(delayed(branin)(v) for v in x) # evaluate points in parallel optimizer.tell(x, y) # takes ~ 20 sec to get here print(min(optimizer.yi)) # print the best objective found ############################################################################# # Note that if `n_points` is set to some integer > 0 for the `ask` method, the # result will be a list of points, even for `n_points` = 1. If the argument is # set to `None` (default value) then a single point (but not a list of points) # will be returned. # # The default "minimum constant liar" [1]_ parallelization strategy is used in # the example, which allows to obtain multiple points for evaluation with a # single call to the `ask` method with any surrogate or acquisition function. # Parallelization strategy can be set using the "strategy" argument of `ask`. # For supported parallelization strategies see the documentation of
class BayesianOptimizedExperimentQueue(ExperimentQueue): def __init__(self, dimensions_file: str, min_num_results_to_fit: int=8, lease_timout='2 days'): self.__all_experiments = pd.DataFrame() self.__all_experiments['status'] = [self.WAITING] * len(self.__all_experiments) self.__all_experiments['last_update'] = pd.Series(pd.Timestamp(float('NaN'))) self.__all_experiments['client'] = [""] * len(self.__all_experiments) self.__lease_duration = pd.to_timedelta(lease_timout) self.__leased_experiments = [] dims = self.__load_dimensions(dimensions_file) self.__dimension_names = list(dims.keys()) self.__dimensions = list(dims.values()) self.__min_num_results_to_fit = min_num_results_to_fit # Initialize dim_types = [check_dimension(d) for d in self.__dimensions] is_cat = all([isinstance(check_dimension(d), Categorical) for d in dim_types]) if is_cat: transformed_dims = [check_dimension(d, transform="identity") for d in self.__dimensions] else: transformed_dims = [] for dim_type, dim in zip(dim_types, self.__dimensions): if isinstance(dim_type, Categorical): transformed_dims.append(check_dimension(dim, transform="onehot")) # To make sure that GP operates in the [0, 1] space else: transformed_dims.append(check_dimension(dim, transform="normalize")) space = Space(transformed_dims) # Default GP cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) if is_cat: other_kernel = HammingKernel(length_scale=np.ones(space.transformed_n_dims)) acq_optimizer = "lbfgs" else: other_kernel = Matern( length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, random_state=None, alpha=0.0, noise='gaussian', n_restarts_optimizer=2) self.__opt = Optimizer(self.__dimensions, base_estimator, acq_optimizer="lbfgs", n_random_starts=100, acq_optimizer_kwargs=dict(n_points=10000)) @property def all_experiments(self) -> pd.DataFrame: """ :return: The PandasFrame containing the details for all the experiments in the queue. """ return self.__all_experiments @property def completed_percent(self) -> float: return 0. @property def leased_percent(self) -> float: return 0 @property def experiment_parameters(self) -> List: return self.__dimension_names def lease_new(self, client_name: str) -> Tuple[int, Dict]: """ Lease a new experiment lock. Select first any waiting experiments and then re-lease expired ones :param client_name: The name of the leasing client :return: a tuple (id, parameters) or None if nothing is available """ experiment_params = self.__opt.ask() if experiment_params in self.__leased_experiments: experiment_params = self.__compute_alternative_params() self.__leased_experiments.append(experiment_params) # TODO: Add to all experiments, use Ids def parse_dim_val(value, dim_type): if type(dim_type) is Real: return float(value) elif type(dim_type) is Integer: return int(value) return value return {name: parse_dim_val(value, dim_type) for name, dim_type, value in zip(self.__dimension_names, self.__dimensions, experiment_params)}, -1 def __compute_alternative_params(self): # Copied directly from skopt transformed_bounds = np.array(self.__opt.space.transformed_bounds) est = clone(self.__opt.base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(self.__opt.space.transform(self.__opt.Xi), self.__opt.yi) X = self.__opt.space.transform(self.__opt.space.rvs( n_samples=self.__opt.n_points, random_state=self.__opt.rng)) values = _gaussian_acquisition(X=X, model=est, y_opt=np.min(self.__opt.yi), acq_func='EI', acq_func_kwargs=dict(n_points=10000)) print('original point ei: %s' % np.min(values)) discount_width = .5 values = self.__discount_leased_params(X, values, discount_width) while np.min(values) > -1e-5 and discount_width > 1e-2: discount_width *= .9 values = _gaussian_acquisition(X=X, model=est, y_opt=np.min(self.__opt.yi), acq_func='EI', acq_func_kwargs=dict(n_points=10000)) values = self.__discount_leased_params(X, values, discount_width) next_x = X[np.argmin(values)] print('new point ei: %s' % np.min(values)) if not self.__opt.space.is_categorical: next_x = np.clip(next_x, transformed_bounds[:, 0], transformed_bounds[:, 1]) return self.__opt.space.inverse_transform(next_x.reshape((1, -1)))[0] @staticmethod def leased_discount(center, width, x_values): """Triangular (cone) discount""" distance_from_center = np.linalg.norm(x_values - center, 2, axis=1) discount = -distance_from_center / width + 1 discount[discount < 0] = 0 return discount def __discount_leased_params(self, X, values, discount_width_size): transformed_leased_params = self.__opt.space.transform(np.array(self.__leased_experiments)) discount_factor = reduce(lambda x, y: x * y, (self.leased_discount(p, discount_width_size, X) for p in self.__leased_experiments), np.ones(values.shape[0])) out_vals = values * (1. - discount_factor) return out_vals def complete(self, experiment_id: int, parameters: Dict, client: str, result: float = 0) -> None: """ Declare an experiment to be completed. :param experiment_id: the id of the experiment or -1 if unknown :param client: the client id :param result: the output results of the experiment. This may be used in optimizing queues. """ parameters = [parameters[n] for n in self.__dimension_names] if parameters in self.__leased_experiments: self.__leased_experiments.remove(parameters) do_fit_model = len(self.__opt.yi) >= self.__min_num_results_to_fit # Unfortunate hack: this depends on the internals. if do_fit_model: self.__opt._n_random_starts = 0 # Since we have adequately many results, stop using random self.__opt.tell(parameters, result, fit=do_fit_model) def __load_dimensions(self, dimensions_file:str)->Dict: with open(dimensions_file) as f: dimensions = json.load(f) def parse_dimension(specs: Dict[str, Any]): if specs['type'] == 'Real': return specs['name'], Real(specs['low'], specs['high']) elif specs['type'] == 'Integer': return specs['name'], Integer(specs['low'], specs['high']) elif specs['type'] == 'Categorical': return specs['name'], Categorical(specs['categories']) else: raise Exception('Unrecognized dimension type %s' % specs['type']) return OrderedDict([parse_dimension(d) for d in dimensions])
class SkoptOptim(OptimBase): """Scikit-optimize Optimizer class.""" def __init__(self, skopt_args=None, space=None): super().__init__(space) if skopt is None: raise ValueError('scikit-optimize is not installed') skopt_dims = [] param_names = [] for n, p in self.space.named_params(): if isinstance(p, Numeric): if p.is_int(): sd = Integer(*p.bound, name=n) else: sd = Real(*p.bound, name=n) elif isinstance(p, ParamCategorical): sd = Categorical(p.choices, name=n) else: continue skopt_dims.append(sd) param_names.append(n) skopt_args = skopt_args or {} skopt_args['dimensions'] = skopt_dims if 'random_state' not in skopt_args: skopt_args['random_state'] = int(time.time()) self.param_names = param_names self.skoptim = Optimizer(**skopt_args) def has_next(self): """Return True if Optimizer has the next set of parameters.""" return True def convert_param(self, p): """Return value converted from scikit-optimize space.""" if isinstance(p, np.float): return float(p) return p def _next(self): """Return the next set of parameters.""" next_pt = self.skoptim.ask() next_params = OrderedDict() for n, p in zip(self.param_names, next_pt): next_params[n] = self.convert_param(p) return next_params def next(self, batch_size): """Return the next batch of parameter sets.""" if batch_size == 1: return [self._next()] next_pts = self.skoptim.ask(n_points=batch_size) next_params = [] for pt in next_pts: params = OrderedDict() for n, p in zip(self.param_names, pt): params[n] = self.convert_param(p) next_params.append(params) return next_params def step(self, estim): """Update Optimizer states using Estimator evaluation results.""" def to_metrics(res): if isinstance(res, dict): return list(res.values())[0] if isinstance(res, (tuple, list)): return res[0] return res inputs, results = estim.get_last_results() skinputs = [list(inp.values()) for inp in inputs] skresults = [-to_metrics(r) for r in results] self.skoptim.tell(skinputs, skresults)