def __init__(self, objective_function, config_space, sample_strategy='bo', time_limit_per_trial=180, max_runs=200, logging_dir='logs', initial_configurations=None, initial_batch=1, batch_size=3, task_id=None, rng=None): super().__init__(config_space, task_id, output_dir=logging_dir) self.logger = super()._get_logger(self.__class__.__name__) if rng is None: run_id, rng = get_rng() self.batch_size = batch_size self.init_batch = initial_batch self.max_iterations = max_runs self.iteration_id = 0 self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.time_limit_per_trial = time_limit_per_trial self.default_obj_value = MAXINT self.sample_strategy = sample_strategy self.configurations = list() self.failed_configurations = list() self.perfs = list() # Initialize the basic component in BO. self.config_space.seed(rng.randint(MAXINT)) self.objective_function = objective_function types, bounds = get_types(config_space) # TODO: what is the feature array. self.model = RandomForestWithInstances(types=types, bounds=bounds, seed=rng.randint(MAXINT)) if self.sample_strategy == 'local_penalization': self.acquisition_function = LPEI(self.model) else: self.acquisition_function = EI(self.model) self.optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=np.random.RandomState(seed=rng.randint(MAXINT)), max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations) self._random_search = RandomSearch(self.acquisition_function, self.config_space, rng) self.random_configuration_chooser = ChooserProb(prob=0.25, rng=rng)
def build_surrogate(func_str='prf', config_space=None, rng=None, history_hpo_data=None): assert config_space is not None func_str = func_str.lower() types, bounds = get_types(config_space) seed = rng.randint(MAXINT) if func_str == 'prf': return RandomForestWithInstances(types=types, bounds=bounds, seed=seed) if func_str == 'lightgbm': return LightGBM(config_space, types=types, bounds=bounds, seed=seed) elif func_str.startswith('gp'): return create_gp_model(model_type=func_str, config_space=config_space, types=types, bounds=bounds, rng=rng) elif func_str.startswith('mfgpe'): from openbox.surrogate.tlbo.mfgpe import MFGPE inner_surrogate_type = 'prf' return MFGPE(config_space, history_hpo_data, seed, surrogate_type=inner_surrogate_type, num_src_hpo_trial=-1) elif func_str.startswith('tlbo'): print('the current surrogate is', func_str) if 'rgpe' in func_str: from openbox.surrogate.tlbo.rgpe import RGPE inner_surrogate_type = func_str.split('_')[-1] return RGPE(config_space, history_hpo_data, seed, surrogate_type=inner_surrogate_type, num_src_hpo_trial=-1) elif 'sgpr' in func_str: from openbox.surrogate.tlbo.stacking_gpr import SGPR inner_surrogate_type = func_str.split('_')[-1] return SGPR(config_space, history_hpo_data, seed, surrogate_type=inner_surrogate_type, num_src_hpo_trial=-1) elif 'topov3' in func_str: from openbox.surrogate.tlbo.topo_variant3 import TOPO_V3 inner_surrogate_type = func_str.split('_')[-1] return TOPO_V3(config_space, history_hpo_data, seed, surrogate_type=inner_surrogate_type, num_src_hpo_trial=-1) else: raise ValueError('Invalid string %s for tlbo surrogate!' % func_str) else: raise ValueError('Invalid string %s for surrogate!' % func_str)
def __init__(self, types: np.ndarray, bounds: np.ndarray, s_max, eta, weight_list, fusion_method, **kwargs): super().__init__(types=types, bounds=bounds, **kwargs) self.s_max = s_max self.eta = eta self.fusion = fusion_method self.surrogate_weight = dict() self.surrogate_container = dict() self.surrogate_r = list() self.weight_list = weight_list for index, item in enumerate(np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.surrogate_r.append(r) self.surrogate_weight[r] = self.weight_list[index] self.surrogate_container[r] = RandomForestWithInstances(types=types, bounds=bounds)
class BatchBayesianOptimization(BaseFacade): def __init__(self, objective_function, config_space, sample_strategy='bo', time_limit_per_trial=180, max_runs=200, logging_dir='logs', initial_configurations=None, initial_batch=1, batch_size=3, task_id='default_task_id', rng=None): super().__init__(config_space, task_id, output_dir=logging_dir) self.logger = super()._get_logger(self.__class__.__name__) if rng is None: run_id, rng = get_rng() self.batch_size = batch_size self.init_batch = initial_batch self.max_iterations = max_runs self.iteration_id = 0 self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.time_limit_per_trial = time_limit_per_trial self.default_obj_value = MAXINT self.sample_strategy = sample_strategy self.configurations = list() self.failed_configurations = list() self.perfs = list() # Initialize the basic component in BO. self.config_space.seed(rng.randint(MAXINT)) self.objective_function = objective_function types, bounds = get_types(config_space) # TODO: what is the feature array. self.model = RandomForestWithInstances(types=types, bounds=bounds, seed=rng.randint(MAXINT)) if self.sample_strategy == 'local_penalization': self.acquisition_function = LPEI(self.model) else: self.acquisition_function = EI(self.model) self.optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=np.random.RandomState(seed=rng.randint(MAXINT)), max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations ) self._random_search = RandomSearch( self.acquisition_function, self.config_space, rng ) self.random_configuration_chooser = ChooserProb(prob=0.25, rng=rng) def run(self): while self.iteration_id < self.max_iterations: self.iterate() def iterate(self): if len(self.configurations) == 0: X = np.array([]) else: failed_configs = list() if self.max_y is None else self.failed_configurations.copy() X = convert_configurations_to_array(self.configurations + failed_configs) failed_perfs = list() if self.max_y is None else [self.max_y] * len(self.failed_configurations) Y = np.array(self.perfs + failed_perfs, dtype=np.float64) config_list = self.choose_next(X, Y) trial_state_list = list() trial_info_list = list() perf_list = list() for i, config in enumerate(config_list): trial_state_list.append(SUCCESS) trial_info_list.append(None) perf_list.append(None) if config not in (self.configurations + self.failed_configurations): # Evaluate this configuration. try: args, kwargs = (config,), dict() timeout_status, _result = time_limit(self.objective_function, self.time_limit_per_trial, args=args, kwargs=kwargs) if timeout_status: raise TimeoutException( 'Timeout: time limit for this evaluation is %.1fs' % self.time_limit_per_trial) else: perf_list[i] = _result except Exception as e: if isinstance(e, TimeoutException): trial_state_list[i] = TIMEOUT else: traceback.print_exc(file=sys.stdout) trial_state_list[i] = FAILED perf_list[i] = MAXINT trial_info_list[i] = str(e) self.logger.error(trial_info_list[i]) if trial_state_list[i] == SUCCESS and perf_list[i] < MAXINT: if len(self.configurations) == 0: self.default_obj_value = perf_list[i] self.configurations.append(config) self.perfs.append(perf_list[i]) self.history_container.add(config, perf_list[i]) self.perc = np.percentile(self.perfs, self.scale_perc) self.min_y = np.min(self.perfs) self.max_y = np.max(self.perfs) else: self.failed_configurations.append(config) else: self.logger.debug('This configuration has been evaluated! Skip it.') if config in self.configurations: config_idx = self.configurations.index(config) trial_state_list[i], perf_list[i] = SUCCESS, self.perfs[config_idx] else: trial_state_list[i], perf_list[i] = FAILED, MAXINT self.iteration_id += 1 self.logger.info( 'Iteration-%d, objective improvement: %.4f' % ( self.iteration_id, max(0, self.default_obj_value - min(perf_list)))) return config_list, trial_state_list, perf_list, trial_info_list def choose_next(self, X: np.ndarray, Y: np.ndarray): # Select a batch of configs to evaluate next. _config_num = X.shape[0] batch_configs_list = list() if _config_num < self.init_batch * self.batch_size or self.sample_strategy == 'random': for i in range(self.batch_size): batch_configs_list.append(self.sample_config()) return batch_configs_list if self.sample_strategy == 'median_imputation': estimated_y = np.mean(Y) batch_history_container = copy.deepcopy(self.history_container) for i in range(self.batch_size): self.model.train(X, Y) incumbent_value = batch_history_container.get_incumbents()[0][1] self.acquisition_function.update(model=self.model, eta=incumbent_value, num_data=len(batch_history_container.data)) challengers = self.optimizer.maximize( runhistory=batch_history_container, num_points=5000, random_configuration_chooser=self.random_configuration_chooser ) is_repeated_config = True repeated_time = 0 curr_batch_config = None while is_repeated_config: try: curr_batch_config = challengers.challengers[repeated_time] batch_history_container.add(curr_batch_config, estimated_y) except ValueError: is_repeated_config = True repeated_time += 1 else: is_repeated_config = False batch_configs_list.append(curr_batch_config) X = np.append(X, curr_batch_config.get_array().reshape(1, -1), axis=0) Y = np.append(Y, estimated_y) estimated_y = np.mean(Y) elif self.sample_strategy == 'local_penalization': self.model.train(X, Y) incumbent_value = self.history_container.get_incumbents()[0][1] # L = self.estimate_L(X) for i in range(self.batch_size): self.acquisition_function.update(model=self.model, eta=incumbent_value, num_data=len(self.history_container.data), batch_configs=batch_configs_list) challengers = self.optimizer.maximize( runhistory=self.history_container, num_points=5000, random_configuration_chooser=self.random_configuration_chooser ) batch_configs_list.append(challengers.challengers[0]) else: raise ValueError('Invalid sampling strategy - %s.' % self.sample_strategy) return batch_configs_list def sample_config(self): config = None _sample_cnt, _sample_limit = 0, 10000 while True: _sample_cnt += 1 config = self.config_space.sample_configuration() if config not in (self.configurations + self.failed_configurations): break if _sample_cnt >= _sample_limit: config = self.config_space.sample_configuration() break return config
def build_surrogate(func_str='gp', config_space=None, rng=None, history_hpo_data=None): assert config_space is not None func_str = func_str.lower() types, bounds = get_types(config_space) seed = rng.randint(MAXINT) if func_str == 'prf': try: from openbox.surrogate.base.rf_with_instances import RandomForestWithInstances return RandomForestWithInstances(types=types, bounds=bounds, seed=seed) except ModuleNotFoundError: from openbox.surrogate.base.rf_with_instances_sklearn import skRandomForestWithInstances print( '[Build Surrogate] Use probabilistic random forest based on scikit-learn. For better performance, ' 'please install pyrfr: ' 'https://open-box.readthedocs.io/en/latest/installation/install_pyrfr.html' ) return skRandomForestWithInstances(types=types, bounds=bounds, seed=seed) elif func_str == 'sk_prf': from openbox.surrogate.base.rf_with_instances_sklearn import skRandomForestWithInstances return skRandomForestWithInstances(types=types, bounds=bounds, seed=seed) elif func_str == 'lightgbm': from openbox.surrogate.lightgbm import LightGBM return LightGBM(config_space, types=types, bounds=bounds, seed=seed) if func_str == 'random_forest': from openbox.surrogate.skrf import RandomForestSurrogate return RandomForestSurrogate(config_space, types=types, bounds=bounds, seed=seed) elif func_str.startswith('gp'): from openbox.surrogate.base.build_gp import create_gp_model return create_gp_model(model_type=func_str, config_space=config_space, types=types, bounds=bounds, rng=rng) elif func_str.startswith('mfgpe'): from openbox.surrogate.tlbo.mfgpe import MFGPE inner_surrogate_type = 'prf' return MFGPE(config_space, history_hpo_data, seed, surrogate_type=inner_surrogate_type, num_src_hpo_trial=-1) elif func_str.startswith('tlbo'): print('the current surrogate is', func_str) if 'rgpe' in func_str: from openbox.surrogate.tlbo.rgpe import RGPE inner_surrogate_type = func_str.split('_')[-1] return RGPE(config_space, history_hpo_data, seed, surrogate_type=inner_surrogate_type, num_src_hpo_trial=-1) elif 'sgpr' in func_str: from openbox.surrogate.tlbo.stacking_gpr import SGPR inner_surrogate_type = func_str.split('_')[-1] return SGPR(config_space, history_hpo_data, seed, surrogate_type=inner_surrogate_type, num_src_hpo_trial=-1) elif 'topov3' in func_str: from openbox.surrogate.tlbo.topo_variant3 import TOPO_V3 inner_surrogate_type = func_str.split('_')[-1] return TOPO_V3(config_space, history_hpo_data, seed, surrogate_type=inner_surrogate_type, num_src_hpo_trial=-1) else: raise ValueError('Invalid string %s for tlbo surrogate!' % func_str) else: raise ValueError('Invalid string %s for surrogate!' % func_str)
class BayesianOptimization(BaseFacade): def __init__(self, objective_function, config_space, sample_strategy='bo', time_limit_per_trial=180, max_runs=200, logging_dir='logs', initial_configurations=None, initial_runs=3, task_id='default_task_id', rng=None): super().__init__(config_space, task_id, output_dir=logging_dir) self.logger = super()._get_logger(self.__class__.__name__) if rng is None: run_id, rng = get_rng() self.init_num = initial_runs self.max_iterations = max_runs self.iteration_id = 0 self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.time_limit_per_trial = time_limit_per_trial self.default_obj_value = MAXINT self.sample_strategy = sample_strategy self.configurations = list() self.failed_configurations = list() self.perfs = list() # Initialize the basic component in BO. self.config_space.seed(rng.randint(MAXINT)) self.objective_function = objective_function types, bounds = get_types(config_space) # TODO: what is the feature array. self.model = RandomForestWithInstances(types=types, bounds=bounds, seed=rng.randint(MAXINT)) self.acquisition_function = EI(self.model) self.optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=np.random.RandomState(seed=rng.randint(MAXINT)), max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations ) self._random_search = RandomSearch( self.acquisition_function, self.config_space, rng ) self.random_configuration_chooser = ChooserProb(prob=0.25, rng=rng) def run(self): while self.iteration_id < self.max_iterations: self.iterate() def iterate(self): if len(self.configurations) == 0: X = np.array([]) else: failed_configs = list() if self.max_y is None else self.failed_configurations.copy() X = convert_configurations_to_array(self.configurations + failed_configs) failed_perfs = list() if self.max_y is None else [self.max_y] * len(self.failed_configurations) Y = np.array(self.perfs + failed_perfs, dtype=np.float64) config = self.choose_next(X, Y) trial_state = SUCCESS trial_info = None if config not in (self.configurations + self.failed_configurations): # Evaluate this configuration. try: args, kwargs = (config,), dict() timeout_status, _result = time_limit(self.objective_function, self.time_limit_per_trial, args=args, kwargs=kwargs) if timeout_status: raise TimeoutException( 'Timeout: time limit for this evaluation is %.1fs' % self.time_limit_per_trial) else: perf = MAXINT if _result is None else _result except Exception as e: if isinstance(e, TimeoutException): trial_state = TIMEOUT else: traceback.print_exc(file=sys.stdout) trial_state = FAILED perf = MAXINT trial_info = str(e) self.logger.error(trial_info) if trial_state == SUCCESS and perf < MAXINT: if len(self.configurations) == 0: self.default_obj_value = perf self.configurations.append(config) self.perfs.append(perf) self.history_container.add(config, perf) self.perc = np.percentile(self.perfs, self.scale_perc) self.min_y = np.min(self.perfs) self.max_y = np.max(self.perfs) else: self.failed_configurations.append(config) else: self.logger.debug('This configuration has been evaluated! Skip it.') if config in self.configurations: config_idx = self.configurations.index(config) trial_state, perf = SUCCESS, self.perfs[config_idx] else: trial_state, perf = FAILED, MAXINT self.iteration_id += 1 self.logger.info( 'Iteration-%d, objective improvement: %.4f' % (self.iteration_id, max(0, self.default_obj_value - perf))) return config, trial_state, perf, trial_info def choose_next(self, X: np.ndarray, Y: np.ndarray): _config_num = X.shape[0] if _config_num < self.init_num: default_config = self.config_space.get_default_configuration() if default_config not in (self.configurations + self.failed_configurations): return default_config else: return self._random_search.maximize(runhistory=self.history_container, num_points=1)[0] if self.sample_strategy == 'random': return self.sample_config() elif self.sample_strategy == 'bo': if self.random_configuration_chooser.check(self.iteration_id): return self.sample_config() else: self.model.train(X, Y) incumbent_value = self.history_container.get_incumbents()[0][1] self.acquisition_function.update(model=self.model, eta=incumbent_value, num_data=len(self.history_container.data)) challengers = self.optimizer.maximize( runhistory=self.history_container, num_points=5000, random_configuration_chooser=self.random_configuration_chooser ) return challengers.challengers[0] else: raise ValueError('Invalid sampling strategy - %s.' % self.sample_strategy) def sample_config(self): config = None _sample_cnt, _sample_limit = 0, 10000 while True: _sample_cnt += 1 config = self.config_space.sample_configuration() if config not in (self.configurations + self.failed_configurations): break if _sample_cnt >= _sample_limit: config = self.config_space.sample_configuration() break return config
def __init__(self, objective_func, config_space: ConfigurationSpace, R, eta=3, skip_outer_loop=0, rand_prob=0.3, use_bohb=False, init_weight=None, update_enable=True, weight_method='rank_loss_p_norm', fusion_method='idp', power_num=3, random_state=1, method_id='mqAsyncMFES', restart_needed=True, time_limit_per_trial=600, runtime_limit=None, seed=1, ip='', port=13579, authkey=b'abc'): super().__init__(objective_func, config_space, R, eta=eta, skip_outer_loop=skip_outer_loop, random_state=random_state, method_id=method_id, restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, ip=ip, port=port, authkey=authkey) self.seed = seed self.last_n_iteration = None self.use_bohb_strategy = use_bohb self.update_enable = update_enable self.fusion_method = fusion_method # Parameter for weight method `rank_loss_p_norm`. self.power_num = power_num # Specify the weight learning method. self.weight_method = weight_method self.weight_update_id = 0 self.weight_changed_cnt = 0 if init_weight is None: init_weight = [1. / self.s_max] * self.s_max + [0.] assert len(init_weight) == (self.s_max + 1) self.logger.info("Initialize weight to %s" % init_weight[:self.s_max + 1]) types, bounds = get_types(config_space) if not self.use_bohb_strategy: self.surrogate = RandomForestEnsemble(types, bounds, self.s_max, self.eta, init_weight, self.fusion_method) else: self.surrogate = RandomForestWithInstances(types, bounds, seed=self.seed) self.acquisition_function = EI(model=self.surrogate) self.iterate_id = 0 self.iterate_r = list() self.hist_weights = list() # Saving evaluation statistics in Hyperband. self.target_x = dict() self.target_y = dict() for index, item in enumerate( np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = list() self.target_y[r] = list() # BO optimizer settings. self.history_container = HistoryContainer(task_id=self.method_name) self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.rng = np.random.RandomState(seed=self.seed) self.acq_optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=self.rng, max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations, rand_prob=0.0, ) self.random_configuration_chooser = ChooserProb(prob=rand_prob, rng=self.rng) self.random_check_idx = 0
class async_mqMFES(async_mqHyperband): """ The implementation of Asynchronous MFES (combine ASHA and MFES) """ def __init__(self, objective_func, config_space: ConfigurationSpace, R, eta=3, skip_outer_loop=0, rand_prob=0.3, use_bohb=False, init_weight=None, update_enable=True, weight_method='rank_loss_p_norm', fusion_method='idp', power_num=3, random_state=1, method_id='mqAsyncMFES', restart_needed=True, time_limit_per_trial=600, runtime_limit=None, seed=1, ip='', port=13579, authkey=b'abc'): super().__init__(objective_func, config_space, R, eta=eta, skip_outer_loop=skip_outer_loop, random_state=random_state, method_id=method_id, restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, ip=ip, port=port, authkey=authkey) self.seed = seed self.last_n_iteration = None self.use_bohb_strategy = use_bohb self.update_enable = update_enable self.fusion_method = fusion_method # Parameter for weight method `rank_loss_p_norm`. self.power_num = power_num # Specify the weight learning method. self.weight_method = weight_method self.weight_update_id = 0 self.weight_changed_cnt = 0 if init_weight is None: init_weight = [1. / self.s_max] * self.s_max + [0.] assert len(init_weight) == (self.s_max + 1) self.logger.info("Initialize weight to %s" % init_weight[:self.s_max + 1]) types, bounds = get_types(config_space) if not self.use_bohb_strategy: self.surrogate = RandomForestEnsemble(types, bounds, self.s_max, self.eta, init_weight, self.fusion_method) else: self.surrogate = RandomForestWithInstances(types, bounds, seed=self.seed) self.acquisition_function = EI(model=self.surrogate) self.iterate_id = 0 self.iterate_r = list() self.hist_weights = list() # Saving evaluation statistics in Hyperband. self.target_x = dict() self.target_y = dict() for index, item in enumerate( np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = list() self.target_y[r] = list() # BO optimizer settings. self.history_container = HistoryContainer(task_id=self.method_name) self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.rng = np.random.RandomState(seed=self.seed) self.acq_optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=self.rng, max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations, rand_prob=0.0, ) self.random_configuration_chooser = ChooserProb(prob=rand_prob, rng=self.rng) self.random_check_idx = 0 def update_observation(self, config, perf, n_iteration): rung_id = self.get_rung_id(self.bracket, n_iteration) updated = False for job in self.bracket[rung_id]['jobs']: _job_status, _config, _perf, _extra_conf = job if _config == config: assert _job_status == RUNNING job[0] = COMPLETED job[2] = perf updated = True break assert updated # print('=== bracket after update_observation:', self.get_bracket_status(self.bracket)) configs_running = list() for _config in self.bracket[rung_id]['configs']: if _config not in self.target_x[n_iteration]: configs_running.append(_config) value_imputed = np.median(self.target_y[n_iteration]) n_iteration = int(n_iteration) self.target_x[n_iteration].append(config) self.target_y[n_iteration].append(perf) if n_iteration == self.R: self.incumbent_configs.append(config) self.incumbent_perfs.append(perf) # Update history container. self.history_container.add(config, perf) # Refit the ensemble surrogate model. configs_train = self.target_x[n_iteration] + configs_running results_train = self.target_y[n_iteration] + [value_imputed ] * len(configs_running) results_train = np.array(std_normalization(results_train), dtype=np.float64) if not self.use_bohb_strategy: self.surrogate.train( convert_configurations_to_array(configs_train), results_train, r=n_iteration) else: if n_iteration == self.R: self.surrogate.train( convert_configurations_to_array(configs_train), results_train) def choose_next(self): """ sample a config according to MFES. give iterations according to Hyperband strategy. """ next_config = None next_n_iteration = self.get_next_n_iteration() next_rung_id = self.get_rung_id(self.bracket, next_n_iteration) # Update weight when the inner loop of hyperband is finished if self.last_n_iteration != next_n_iteration and not self.use_bohb_strategy: if self.update_enable and self.weight_update_id > self.s_max: self.update_weight() self.weight_update_id += 1 self.last_n_iteration = next_n_iteration # sample config excluded_configs = self.bracket[next_rung_id]['configs'] if len(self.target_y[self.iterate_r[-1]]) == 0: next_config = sample_configuration( self.config_space, excluded_configs=excluded_configs) else: # Like BOHB, sample a fixed percentage of random configurations. self.random_check_idx += 1 if self.random_configuration_chooser.check(self.random_check_idx): next_config = sample_configuration( self.config_space, excluded_configs=excluded_configs) else: acq_configs = self.get_bo_candidates() for config in acq_configs: if config not in self.bracket[next_rung_id]['configs']: next_config = config break if next_config is None: self.logger.warning( 'Cannot get a non duplicate configuration from bo candidates. ' 'Sample a random one.') next_config = sample_configuration( self.config_space, excluded_configs=excluded_configs) next_extra_conf = {} return next_config, next_n_iteration, next_extra_conf def get_bo_candidates(self): std_incumbent_value = np.min( std_normalization(self.target_y[self.iterate_r[-1]])) # Update surrogate model in acquisition function. self.acquisition_function.update(model=self.surrogate, eta=std_incumbent_value, num_data=len(self.incumbent_configs)) challengers = self.acq_optimizer.maximize( runhistory=self.history_container, num_points=5000, ) return challengers.challengers @staticmethod def calculate_preserving_order_num(y_pred, y_true): array_size = len(y_pred) assert len(y_true) == array_size total_pair_num, order_preserving_num = 0, 0 for idx in range(array_size): for inner_idx in range(idx + 1, array_size): if bool(y_true[idx] > y_true[inner_idx]) == bool( y_pred[idx] > y_pred[inner_idx]): order_preserving_num += 1 total_pair_num += 1 return order_preserving_num, total_pair_num def update_weight(self): start_time = time.time() max_r = self.iterate_r[-1] incumbent_configs = self.target_x[max_r] test_x = convert_configurations_to_array(incumbent_configs) test_y = np.array(self.target_y[max_r], dtype=np.float64) r_list = self.surrogate.surrogate_r K = len(r_list) if len(test_y) >= 3: # Get previous weights if self.weight_method == 'rank_loss_p_norm': preserving_order_p = list() preserving_order_nums = list() for i, r in enumerate(r_list): fold_num = 5 if i != K - 1: mean, var = self.surrogate.surrogate_container[ r].predict(test_x) tmp_y = np.reshape(mean, -1) preorder_num, pair_num = self.calculate_preserving_order_num( tmp_y, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) else: if len(test_y) < 2 * fold_num: preserving_order_p.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[ train_idx], test_y[train_idx] valid_configs, valid_y = test_x[ valid_idx], test_y[valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) pred, _ = _surrogate.predict(valid_configs) cv_pred[valid_idx] = pred.reshape(-1) preorder_num, pair_num = self.calculate_preserving_order_num( cv_pred, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) trans_order_weight = np.array(preserving_order_p) power_sum = np.sum(np.power(trans_order_weight, self.power_num)) new_weights = np.power(trans_order_weight, self.power_num) / power_sum elif self.weight_method == 'rank_loss_prob': # For basic surrogate i=1:K-1. mean_list, var_list = list(), list() for i, r in enumerate(r_list[:-1]): mean, var = self.surrogate.surrogate_container[r].predict( test_x) mean_list.append(np.reshape(mean, -1)) var_list.append(np.reshape(var, -1)) sample_num = 100 min_probability_array = [0] * K for _ in range(sample_num): order_preseving_nums = list() # For basic surrogate i=1:K-1. for idx in range(K - 1): sampled_y = self.rng.normal(mean_list[idx], var_list[idx]) _num, _ = self.calculate_preserving_order_num( sampled_y, test_y) order_preseving_nums.append(_num) fold_num = 5 # For basic surrogate i=K. cv if len(test_y) < 2 * fold_num: order_preseving_nums.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[train_idx], test_y[ train_idx] valid_configs, valid_y = test_x[valid_idx], test_y[ valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) _pred, _var = _surrogate.predict(valid_configs) sampled_pred = self.rng.normal( _pred.reshape(-1), _var.reshape(-1)) cv_pred[valid_idx] = sampled_pred _num, _ = self.calculate_preserving_order_num( cv_pred, test_y) order_preseving_nums.append(_num) max_id = np.argmax(order_preseving_nums) min_probability_array[max_id] += 1 new_weights = np.array(min_probability_array) / sample_num else: raise ValueError('Invalid weight method: %s!' % self.weight_method) else: old_weights = list() for i, r in enumerate(r_list): _weight = self.surrogate.surrogate_weight[r] old_weights.append(_weight) new_weights = old_weights.copy() self.logger.info( '[%s] %d-th Updating weights: %s' % (self.weight_method, self.weight_changed_cnt, str(new_weights))) # Assign the weight to each basic surrogate. for i, r in enumerate(r_list): self.surrogate.surrogate_weight[r] = new_weights[i] self.weight_changed_cnt += 1 # Save the weight data. self.hist_weights.append(new_weights) dir_path = os.path.join(self.data_directory, 'saved_weights') file_name = 'mfes_weights_%s.npy' % (self.method_name, ) if not os.path.exists(dir_path): os.makedirs(dir_path) np.save(os.path.join(dir_path, file_name), np.asarray(self.hist_weights)) self.logger.info( 'update_weight() cost %.2fs. new weights are saved to %s' % (time.time() - start_time, os.path.join(dir_path, file_name))) def get_weights(self): return self.hist_weights
def update_weight(self): start_time = time.time() max_r = self.iterate_r[-1] incumbent_configs = self.target_x[max_r] test_x = convert_configurations_to_array(incumbent_configs) test_y = np.array(self.target_y[max_r], dtype=np.float64) r_list = self.surrogate.surrogate_r K = len(r_list) if len(test_y) >= 3: # Get previous weights if self.weight_method == 'rank_loss_p_norm': preserving_order_p = list() preserving_order_nums = list() for i, r in enumerate(r_list): fold_num = 5 if i != K - 1: mean, var = self.surrogate.surrogate_container[ r].predict(test_x) tmp_y = np.reshape(mean, -1) preorder_num, pair_num = self.calculate_preserving_order_num( tmp_y, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) else: if len(test_y) < 2 * fold_num: preserving_order_p.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[ train_idx], test_y[train_idx] valid_configs, valid_y = test_x[ valid_idx], test_y[valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) pred, _ = _surrogate.predict(valid_configs) cv_pred[valid_idx] = pred.reshape(-1) preorder_num, pair_num = self.calculate_preserving_order_num( cv_pred, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) trans_order_weight = np.array(preserving_order_p) power_sum = np.sum(np.power(trans_order_weight, self.power_num)) new_weights = np.power(trans_order_weight, self.power_num) / power_sum elif self.weight_method == 'rank_loss_prob': # For basic surrogate i=1:K-1. mean_list, var_list = list(), list() for i, r in enumerate(r_list[:-1]): mean, var = self.surrogate.surrogate_container[r].predict( test_x) mean_list.append(np.reshape(mean, -1)) var_list.append(np.reshape(var, -1)) sample_num = 100 min_probability_array = [0] * K for _ in range(sample_num): order_preseving_nums = list() # For basic surrogate i=1:K-1. for idx in range(K - 1): sampled_y = self.rng.normal(mean_list[idx], var_list[idx]) _num, _ = self.calculate_preserving_order_num( sampled_y, test_y) order_preseving_nums.append(_num) fold_num = 5 # For basic surrogate i=K. cv if len(test_y) < 2 * fold_num: order_preseving_nums.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[train_idx], test_y[ train_idx] valid_configs, valid_y = test_x[valid_idx], test_y[ valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) _pred, _var = _surrogate.predict(valid_configs) sampled_pred = self.rng.normal( _pred.reshape(-1), _var.reshape(-1)) cv_pred[valid_idx] = sampled_pred _num, _ = self.calculate_preserving_order_num( cv_pred, test_y) order_preseving_nums.append(_num) max_id = np.argmax(order_preseving_nums) min_probability_array[max_id] += 1 new_weights = np.array(min_probability_array) / sample_num else: raise ValueError('Invalid weight method: %s!' % self.weight_method) else: old_weights = list() for i, r in enumerate(r_list): _weight = self.surrogate.surrogate_weight[r] old_weights.append(_weight) new_weights = old_weights.copy() self.logger.info( '[%s] %d-th Updating weights: %s' % (self.weight_method, self.weight_changed_cnt, str(new_weights))) # Assign the weight to each basic surrogate. for i, r in enumerate(r_list): self.surrogate.surrogate_weight[r] = new_weights[i] self.weight_changed_cnt += 1 # Save the weight data. self.hist_weights.append(new_weights) dir_path = os.path.join(self.data_directory, 'saved_weights') file_name = 'mfes_weights_%s.npy' % (self.method_name, ) if not os.path.exists(dir_path): os.makedirs(dir_path) np.save(os.path.join(dir_path, file_name), np.asarray(self.hist_weights)) self.logger.info( 'update_weight() cost %.2fs. new weights are saved to %s' % (time.time() - start_time, os.path.join(dir_path, file_name)))