def __init__(self, config_space, min_points_in_model=None, top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3, min_bandwidth=1e-3, task_id=None, output_dir='logs', random_state=1): self.top_n_percent = top_n_percent self.config_space = config_space self.config_space.seed(random_state) self.bw_factor = bandwidth_factor self.min_bandwidth = min_bandwidth self.history_container = HistoryContainer(task_id, config_space=config_space) self.output_dir = output_dir self.min_points_in_model = min_points_in_model if min_points_in_model is None: self.min_points_in_model = len( self.config_space.get_hyperparameters()) + 1 if self.min_points_in_model < len( self.config_space.get_hyperparameters()) + 1: self.min_points_in_model = len( self.config_space.get_hyperparameters()) + 1 self.num_samples = num_samples self.random_fraction = random_fraction self.random_state = random_state self.rng = np.random.RandomState(random_state) hps = self.config_space.get_hyperparameters() self.kde_vartypes = "" self.vartypes = [] for h in hps: if hasattr(h, 'choices'): self.kde_vartypes += 'u' self.vartypes += [len(h.choices)] else: self.kde_vartypes += 'c' self.vartypes += [0] self.vartypes = np.array(self.vartypes, dtype=int) # store precomputed probs for the categorical parameters self.cat_probs = [] self.good_config_rankings = dict() self.kde_models = dict() self.logger = logging.getLogger(self.__class__.__name__)
def __init__(self, config_space, initial_trials=10, initial_configurations=None, init_strategy='random_explore_first', history_bo_data=None, optimization_strategy='bo', surrogate_type='prf', output_dir='logs', task_id=None, rng=None): # Create output (logging) directory. # Init logging module. # Random seed generator. self.init_strategy = init_strategy self.output_dir = output_dir if rng is None: run_id, rng = get_rng() self.rng = rng self.logger = get_logger(self.__class__.__name__) # Basic components in Advisor. self.optimization_strategy = optimization_strategy self.configurations = list() self.failed_configurations = list() self.perfs = list() self.scale_perc = 5 self.perc = None self.min_y = None self.max_y = None # Init the basic ingredients in Bayesian optimization. self.history_bo_data = history_bo_data self.surrogate_type = surrogate_type self.init_num = initial_trials self.config_space = config_space self.config_space.seed(rng.randint(MAXINT)) if initial_configurations is not None and len( initial_configurations) > 0: self.initial_configurations = initial_configurations self.init_num = len(initial_configurations) else: self.initial_configurations = self.create_initial_design( self.init_strategy) self.init_num = len(self.initial_configurations) self.history_container = HistoryContainer(task_id) self.surrogate_model = None self.acquisition_function = None self.optimizer = None self.setup_bo_basics()
def __init__(self, config_space, task_id, output_dir): self.output_dir = output_dir if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) self.logger = None self.history_container = HistoryContainer(task_id) self.config_space = config_space self.scale_perc = 5 self.perc = None self.min_y = None self.max_y = None
def __init__( self, config_space, num_objs=1, num_constraints=0, population_size=30, subset_size=20, epsilon=0.2, strategy='worst', # 'worst', 'oldest' optimization_strategy='ea', batch_size=1, output_dir='logs', task_id='default_task_id', random_state=None): # Create output (logging) directory. # Init logging module. # Random seed generator. self.num_objs = num_objs self.num_constraints = num_constraints assert self.num_objs == 1 and self.num_constraints == 0 self.output_dir = output_dir self.rng = check_random_state(random_state) self.config_space = config_space self.config_space_seed = self.rng.randint(MAXINT) self.config_space.seed(self.config_space_seed) self.logger = get_logger(self.__class__.__name__) # Init parallel settings self.batch_size = batch_size self.init_num = batch_size # for compatibility in pSMBO self.running_configs = list() # Basic components in Advisor. self.optimization_strategy = optimization_strategy # Init the basic ingredients self.all_configs = set() self.age = 0 self.population = list() self.population_size = population_size self.subset_size = subset_size assert 0 < self.subset_size <= self.population_size self.epsilon = epsilon self.strategy = strategy assert self.strategy in ['worst', 'oldest'] # init history container self.history_container = HistoryContainer( task_id, self.num_constraints, config_space=self.config_space)
class BaseFacade(object, metaclass=abc.ABCMeta): def __init__(self, config_space, task_id, output_dir): self.output_dir = output_dir if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) self.logger = None self.history_container = HistoryContainer(task_id) self.config_space = config_space self.scale_perc = 5 self.perc = None self.min_y = None self.max_y = None @abc.abstractmethod def run(self): raise NotImplementedError() @abc.abstractmethod def iterate(self): raise NotImplementedError() def get_history(self): return self.history_container def get_incumbent(self): return self.history_container.get_incumbents() def _get_logger(self, name): logger_name = 'open-box-%s' % name setup_logger(os.path.join(self.output_dir, '%s.log' % str(logger_name))) return get_logger(logger_name)
class EA_Advisor(object, metaclass=abc.ABCMeta): """ Evolutionary Algorithm Advisor """ def __init__( self, config_space, num_objs=1, num_constraints=0, population_size=30, subset_size=20, epsilon=0.2, strategy='worst', # 'worst', 'oldest' optimization_strategy='ea', batch_size=1, output_dir='logs', task_id='default_task_id', random_state=None): # Create output (logging) directory. # Init logging module. # Random seed generator. self.num_objs = num_objs self.num_constraints = num_constraints assert self.num_objs == 1 and self.num_constraints == 0 self.output_dir = output_dir self.rng = check_random_state(random_state) self.config_space = config_space self.config_space_seed = self.rng.randint(MAXINT) self.config_space.seed(self.config_space_seed) self.logger = get_logger(self.__class__.__name__) # Init parallel settings self.batch_size = batch_size self.init_num = batch_size # for compatibility in pSMBO self.running_configs = list() # Basic components in Advisor. self.optimization_strategy = optimization_strategy # Init the basic ingredients self.all_configs = set() self.age = 0 self.population = list() self.population_size = population_size self.subset_size = subset_size assert 0 < self.subset_size <= self.population_size self.epsilon = epsilon self.strategy = strategy assert self.strategy in ['worst', 'oldest'] # init history container self.history_container = HistoryContainer( task_id, self.num_constraints, config_space=self.config_space) def get_suggestion(self, history_container=None): """ Generate a configuration (suggestion) for this query. Returns ------- A configuration. """ if history_container is None: history_container = self.history_container if len(self.population) < self.population_size: # Initialize population next_config = self.sample_random_config( excluded_configs=self.all_configs) else: # Select a parent by subset tournament and epsilon greedy if self.rng.random() < self.epsilon: parent_config = random.sample(self.population, 1)[0]['config'] else: subset = random.sample(self.population, self.subset_size) subset.sort(key=lambda x: x['perf']) # minimize parent_config = subset[0]['config'] # Mutation to 1-step neighbors next_config = None neighbors_gen = get_one_exchange_neighbourhood( parent_config, seed=self.rng.randint(MAXINT)) for neighbor in neighbors_gen: if neighbor not in self.all_configs: next_config = neighbor break if next_config is None: # If all the neighors are evaluated, sample randomly! next_config = self.sample_random_config( excluded_configs=self.all_configs) self.all_configs.add(next_config) self.running_configs.append(next_config) return next_config def get_suggestions(self, batch_size=None, history_container=None): if batch_size is None: batch_size = self.batch_size configs = list() for i in range(batch_size): config = self.get_suggestion(history_container) configs.append(config) return configs def update_observation(self, observation: Observation): """ Update the current observations. Parameters ---------- observation Returns ------- """ config = observation.config perf = observation.objs[0] trial_state = observation.trial_state assert config in self.running_configs self.running_configs.remove(config) # update population if trial_state == SUCCESS and perf < MAXINT: self.population.append(dict(config=config, age=self.age, perf=perf)) self.age += 1 # Eliminate samples if len(self.population) > self.population_size: if self.strategy == 'oldest': self.population.sort(key=lambda x: x['age']) self.population.pop(0) elif self.strategy == 'worst': self.population.sort(key=lambda x: x['perf']) self.population.pop(-1) else: raise ValueError('Unknown strategy: %s' % self.strategy) return self.history_container.update_observation(observation) def sample_random_config(self, excluded_configs=None): if excluded_configs is None: excluded_configs = set() sample_cnt = 0 max_sample_cnt = 1000 while True: config = self.config_space.sample_configuration() sample_cnt += 1 if config not in excluded_configs: break if sample_cnt >= max_sample_cnt: self.logger.warning( 'Cannot sample non duplicate configuration after %d iterations.' % max_sample_cnt) break return config def get_history(self): return self.history_container
def __init__(self, config_space, task_info, initial_trials=10, initial_configurations=None, init_strategy='random_explore_first', history_bo_data=None, optimization_strategy='bo', surrogate_type=None, acq_type=None, acq_optimizer_type='local_random', ref_point=None, output_dir='logs', task_id=None, random_state=None): # Create output (logging) directory. # Init logging module. # Random seed generator. self.task_info = task_info self.num_objs = task_info['num_objs'] self.num_constraints = task_info['num_constraints'] self.init_strategy = init_strategy self.output_dir = output_dir self.rng = np.random.RandomState(random_state) self.logger = get_logger(self.__class__.__name__) history_folder = os.path.join(self.output_dir, 'bo_history') if not os.path.exists(history_folder): os.makedirs(history_folder) self.history_file = os.path.join(history_folder, 'bo_history_%s.json' % task_id) # Basic components in Advisor. self.optimization_strategy = optimization_strategy # Init the basic ingredients in Bayesian optimization. self.history_bo_data = history_bo_data self.surrogate_type = surrogate_type self.constraint_surrogate_type = None self.acq_type = acq_type self.acq_optimizer_type = acq_optimizer_type self.init_num = initial_trials self.config_space = config_space self.config_space_seed = self.rng.randint(MAXINT) self.config_space.seed(self.config_space_seed) self.ref_point = ref_point # init history container if self.num_objs == 1: self.history_container = HistoryContainer(task_id, self.num_constraints, config_space=self.config_space) else: # multi-objectives self.history_container = MOHistoryContainer(task_id, self.num_objs, self.num_constraints, ref_point) # initial design if initial_configurations is not None and len(initial_configurations) > 0: self.initial_configurations = initial_configurations self.init_num = len(initial_configurations) else: self.initial_configurations = self.create_initial_design(self.init_strategy) self.init_num = len(self.initial_configurations) self.surrogate_model = None self.constraint_models = None self.acquisition_function = None self.optimizer = None self.check_setup() self.setup_bo_basics()
class TPE_Advisor: # TODO:Add warm start def __init__(self, config_space, min_points_in_model=None, top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3, min_bandwidth=1e-3, task_id=None, output_dir='logs', random_state=1): self.top_n_percent = top_n_percent self.config_space = config_space self.config_space.seed(random_state) self.bw_factor = bandwidth_factor self.min_bandwidth = min_bandwidth self.history_container = HistoryContainer(task_id, config_space=config_space) self.output_dir = output_dir self.min_points_in_model = min_points_in_model if min_points_in_model is None: self.min_points_in_model = len( self.config_space.get_hyperparameters()) + 1 if self.min_points_in_model < len( self.config_space.get_hyperparameters()) + 1: self.min_points_in_model = len( self.config_space.get_hyperparameters()) + 1 self.num_samples = num_samples self.random_fraction = random_fraction self.random_state = random_state self.rng = np.random.RandomState(random_state) hps = self.config_space.get_hyperparameters() self.kde_vartypes = "" self.vartypes = [] for h in hps: if hasattr(h, 'choices'): self.kde_vartypes += 'u' self.vartypes += [len(h.choices)] else: self.kde_vartypes += 'c' self.vartypes += [0] self.vartypes = np.array(self.vartypes, dtype=int) # store precomputed probs for the categorical parameters self.cat_probs = [] self.good_config_rankings = dict() self.kde_models = dict() self.logger = logging.getLogger(self.__class__.__name__) def update_observation(self, observation: Observation): self.history_container.update_observation(observation) def get_suggestion(self, history_container=None): if history_container is None: history_container = self.history_container # use default as first config num_config_evaluated = len(history_container.configurations) if num_config_evaluated == 0: return self.config_space.get_default_configuration() # fit self.fit_kde_models(history_container) # If no model is available, sample random config if len(self.kde_models.keys() ) == 0 or self.rng.rand() < self.random_fraction: return self.sample_random_configs(1, history_container)[0] best = np.inf best_vector = None try: l = self.kde_models['good'].pdf g = self.kde_models['bad'].pdf minimize_me = lambda x: max(1e-32, g(x)) / max(l(x), 1e-32) kde_good = self.kde_models['good'] kde_bad = self.kde_models['bad'] for i in range(self.num_samples): idx = self.rng.randint(0, len(kde_good.data)) datum = kde_good.data[idx] vector = [] for m, bw, t in zip(datum, kde_good.bw, self.vartypes): bw = max(bw, self.min_bandwidth) if t == 0: bw = self.bw_factor * bw try: vector.append( sps.truncnorm.rvs(-m / bw, (1 - m) / bw, loc=m, scale=bw)) except: self.logger.warning( "Truncated Normal failed for:\ndatum=%s\nbandwidth=%s\nfor entry with value %s" % (datum, kde_good.bw, m)) self.logger.warning("data in the KDE:\n%s" % kde_good.data) else: if self.rng.rand() < (1 - bw): vector.append(int(m)) else: vector.append(self.rng.randint(t)) val = minimize_me(vector) if not np.isfinite(val): self.logger.warning('sampled vector: %s has EI value %s' % (vector, val)) self.logger.warning("data in the KDEs:\n%s\n%s" % (kde_good.data, kde_bad.data)) self.logger.warning("bandwidth of the KDEs:\n%s\n%s" % (kde_good.bw, kde_bad.bw)) self.logger.warning("l(x) = %s" % (l(vector))) self.logger.warning("g(x) = %s" % (g(vector))) # right now, this happens because a KDE does not contain all values for a categorical parameter # this cannot be fixed with the statsmodels KDE, so for now, we are just going to evaluate this one # if the good_kde has a finite value, i.e. there is no config with that value in the bad kde, so it shouldn't be terrible. if np.isfinite(l(vector)): best_vector = vector break if val < best: best = val best_vector = vector if best_vector is None: self.logger.debug( "Sampling based optimization with %i samples failed -> using random configuration" % self.num_samples) sample = self.sample_random_configs( 1, history_container)[0].get_dictionary() else: self.logger.debug('best_vector: {}, {}, {}, {}'.format( best_vector, best, l(best_vector), g(best_vector))) for i, hp_value in enumerate(best_vector): if isinstance( self.config_space.get_hyperparameter( self.config_space.get_hyperparameter_by_idx( i)), ConfigSpace.hyperparameters. CategoricalHyperparameter): best_vector[i] = int(np.rint(best_vector[i])) sample = ConfigSpace.Configuration( self.config_space, vector=best_vector).get_dictionary() try: sample = ConfigSpace.util.deactivate_inactive_hyperparameters( configuration_space=self.config_space, configuration=sample) except Exception as e: self.logger.warning(("=" * 50 + "\n") * 3 + \ "Error converting configuration:\n%s" % sample + \ "\n here is a traceback:" + \ traceback.format_exc()) raise e except: self.logger.warning( "Sampling based optimization with %i samples failed\n %s \nUsing random configuration" % (self.num_samples, traceback.format_exc())) sample = self.sample_random_configs(1, history_container)[0] return sample def impute_conditional_data(self, array): return_array = np.empty_like(array) for i in range(array.shape[0]): datum = np.copy(array[i]) nan_indices = np.argwhere(np.isnan(datum)).flatten() while np.any(nan_indices): nan_idx = nan_indices[0] valid_indices = np.argwhere(np.isfinite( array[:, nan_idx])).flatten() if len(valid_indices) > 0: # pick one of them at random and overwrite all NaN values row_idx = self.rng.choice(valid_indices) datum[nan_indices] = array[row_idx, nan_indices] else: # no good point in the data has this value activated, so fill it with a valid but random value t = self.vartypes[nan_idx] if t == 0: datum[nan_idx] = self.rng.rand() else: datum[nan_idx] = self.rng.randint(t) nan_indices = np.argwhere(np.isnan(datum)).flatten() return_array[i, :] = datum return return_array def fit_kde_models(self, history_container): num_config_successful = len(history_container.successful_perfs) if num_config_successful <= self.min_points_in_model - 1: self.logger.debug( "Only %i run(s) available, need more than %s -> can't build model!" % (num_config_successful, self.min_points_in_model + 1)) return train_configs = convert_configurations_to_array( history_container.configurations) train_losses = history_container.get_transformed_perfs() n_good = max(self.min_points_in_model, (self.top_n_percent * train_configs.shape[0]) // 100) # n_bad = min(max(self.min_points_in_model, ((100-self.top_n_percent)*train_configs.shape[0])//100), 10) n_bad = max(self.min_points_in_model, ((100 - self.top_n_percent) * train_configs.shape[0]) // 100) # Refit KDE for the current budget idx = np.argsort(train_losses) train_data_good = self.impute_conditional_data( train_configs[idx[:n_good]]) train_data_bad = self.impute_conditional_data( train_configs[idx[n_good:n_good + n_bad]]) if train_data_good.shape[0] <= train_data_good.shape[1]: return if train_data_bad.shape[0] <= train_data_bad.shape[1]: return # more expensive crossvalidation method # bw_estimation = 'cv_ls' # quick rule of thumb bw_estimation = 'normal_reference' bad_kde = sm.nonparametric.KDEMultivariate(data=train_data_bad, var_type=self.kde_vartypes, bw=bw_estimation) good_kde = sm.nonparametric.KDEMultivariate(data=train_data_good, var_type=self.kde_vartypes, bw=bw_estimation) bad_kde.bw = np.clip(bad_kde.bw, self.min_bandwidth, None) good_kde.bw = np.clip(good_kde.bw, self.min_bandwidth, None) self.kde_models = {'good': good_kde, 'bad': bad_kde} # update probs for the categorical parameters for later sampling self.logger.debug( 'done building a new model based on %i/%i split\nBest loss for this budget:%f\n\n\n\n\n' % (n_good, n_bad, np.min(train_losses))) def sample_random_configs(self, num_configs=1, history_container=None): """ Sample a batch of random configurations. Parameters ---------- num_configs history_container Returns ------- """ if history_container is None: history_container = self.history_container configs = list() sample_cnt = 0 max_sample_cnt = 1000 while len(configs) < num_configs: config = self.config_space.sample_configuration() sample_cnt += 1 if config not in (history_container.configurations + configs): configs.append(config) sample_cnt = 0 continue if sample_cnt >= max_sample_cnt: self.logger.warning( 'Cannot sample non duplicate configuration after %d iterations.' % max_sample_cnt) configs.append(config) sample_cnt = 0 return configs
def __init__(self, objective_func, config_space: ConfigurationSpace, R, eta=3, skip_outer_loop=0, rand_prob=0.3, use_bohb=False, init_weight=None, update_enable=True, weight_method='rank_loss_p_norm', fusion_method='idp', power_num=3, random_state=1, method_id='mqAsyncMFES', restart_needed=True, time_limit_per_trial=600, runtime_limit=None, seed=1, ip='', port=13579, authkey=b'abc'): super().__init__(objective_func, config_space, R, eta=eta, skip_outer_loop=skip_outer_loop, random_state=random_state, method_id=method_id, restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, ip=ip, port=port, authkey=authkey) self.seed = seed self.last_n_iteration = None self.use_bohb_strategy = use_bohb self.update_enable = update_enable self.fusion_method = fusion_method # Parameter for weight method `rank_loss_p_norm`. self.power_num = power_num # Specify the weight learning method. self.weight_method = weight_method self.weight_update_id = 0 self.weight_changed_cnt = 0 if init_weight is None: init_weight = [1. / self.s_max] * self.s_max + [0.] assert len(init_weight) == (self.s_max + 1) self.logger.info("Initialize weight to %s" % init_weight[:self.s_max + 1]) types, bounds = get_types(config_space) if not self.use_bohb_strategy: self.surrogate = RandomForestEnsemble(types, bounds, self.s_max, self.eta, init_weight, self.fusion_method) else: self.surrogate = RandomForestWithInstances(types, bounds, seed=self.seed) self.acquisition_function = EI(model=self.surrogate) self.iterate_id = 0 self.iterate_r = list() self.hist_weights = list() # Saving evaluation statistics in Hyperband. self.target_x = dict() self.target_y = dict() for index, item in enumerate( np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = list() self.target_y[r] = list() # BO optimizer settings. self.history_container = HistoryContainer(task_id=self.method_name) self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.rng = np.random.RandomState(seed=self.seed) self.acq_optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=self.rng, max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations, rand_prob=0.0, ) self.random_configuration_chooser = ChooserProb(prob=rand_prob, rng=self.rng) self.random_check_idx = 0
class async_mqMFES(async_mqHyperband): """ The implementation of Asynchronous MFES (combine ASHA and MFES) """ def __init__(self, objective_func, config_space: ConfigurationSpace, R, eta=3, skip_outer_loop=0, rand_prob=0.3, use_bohb=False, init_weight=None, update_enable=True, weight_method='rank_loss_p_norm', fusion_method='idp', power_num=3, random_state=1, method_id='mqAsyncMFES', restart_needed=True, time_limit_per_trial=600, runtime_limit=None, seed=1, ip='', port=13579, authkey=b'abc'): super().__init__(objective_func, config_space, R, eta=eta, skip_outer_loop=skip_outer_loop, random_state=random_state, method_id=method_id, restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, ip=ip, port=port, authkey=authkey) self.seed = seed self.last_n_iteration = None self.use_bohb_strategy = use_bohb self.update_enable = update_enable self.fusion_method = fusion_method # Parameter for weight method `rank_loss_p_norm`. self.power_num = power_num # Specify the weight learning method. self.weight_method = weight_method self.weight_update_id = 0 self.weight_changed_cnt = 0 if init_weight is None: init_weight = [1. / self.s_max] * self.s_max + [0.] assert len(init_weight) == (self.s_max + 1) self.logger.info("Initialize weight to %s" % init_weight[:self.s_max + 1]) types, bounds = get_types(config_space) if not self.use_bohb_strategy: self.surrogate = RandomForestEnsemble(types, bounds, self.s_max, self.eta, init_weight, self.fusion_method) else: self.surrogate = RandomForestWithInstances(types, bounds, seed=self.seed) self.acquisition_function = EI(model=self.surrogate) self.iterate_id = 0 self.iterate_r = list() self.hist_weights = list() # Saving evaluation statistics in Hyperband. self.target_x = dict() self.target_y = dict() for index, item in enumerate( np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = list() self.target_y[r] = list() # BO optimizer settings. self.history_container = HistoryContainer(task_id=self.method_name) self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.rng = np.random.RandomState(seed=self.seed) self.acq_optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=self.rng, max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations, rand_prob=0.0, ) self.random_configuration_chooser = ChooserProb(prob=rand_prob, rng=self.rng) self.random_check_idx = 0 def update_observation(self, config, perf, n_iteration): rung_id = self.get_rung_id(self.bracket, n_iteration) updated = False for job in self.bracket[rung_id]['jobs']: _job_status, _config, _perf, _extra_conf = job if _config == config: assert _job_status == RUNNING job[0] = COMPLETED job[2] = perf updated = True break assert updated # print('=== bracket after update_observation:', self.get_bracket_status(self.bracket)) configs_running = list() for _config in self.bracket[rung_id]['configs']: if _config not in self.target_x[n_iteration]: configs_running.append(_config) value_imputed = np.median(self.target_y[n_iteration]) n_iteration = int(n_iteration) self.target_x[n_iteration].append(config) self.target_y[n_iteration].append(perf) if n_iteration == self.R: self.incumbent_configs.append(config) self.incumbent_perfs.append(perf) # Update history container. self.history_container.add(config, perf) # Refit the ensemble surrogate model. configs_train = self.target_x[n_iteration] + configs_running results_train = self.target_y[n_iteration] + [value_imputed ] * len(configs_running) results_train = np.array(std_normalization(results_train), dtype=np.float64) if not self.use_bohb_strategy: self.surrogate.train( convert_configurations_to_array(configs_train), results_train, r=n_iteration) else: if n_iteration == self.R: self.surrogate.train( convert_configurations_to_array(configs_train), results_train) def choose_next(self): """ sample a config according to MFES. give iterations according to Hyperband strategy. """ next_config = None next_n_iteration = self.get_next_n_iteration() next_rung_id = self.get_rung_id(self.bracket, next_n_iteration) # Update weight when the inner loop of hyperband is finished if self.last_n_iteration != next_n_iteration and not self.use_bohb_strategy: if self.update_enable and self.weight_update_id > self.s_max: self.update_weight() self.weight_update_id += 1 self.last_n_iteration = next_n_iteration # sample config excluded_configs = self.bracket[next_rung_id]['configs'] if len(self.target_y[self.iterate_r[-1]]) == 0: next_config = sample_configuration( self.config_space, excluded_configs=excluded_configs) else: # Like BOHB, sample a fixed percentage of random configurations. self.random_check_idx += 1 if self.random_configuration_chooser.check(self.random_check_idx): next_config = sample_configuration( self.config_space, excluded_configs=excluded_configs) else: acq_configs = self.get_bo_candidates() for config in acq_configs: if config not in self.bracket[next_rung_id]['configs']: next_config = config break if next_config is None: self.logger.warning( 'Cannot get a non duplicate configuration from bo candidates. ' 'Sample a random one.') next_config = sample_configuration( self.config_space, excluded_configs=excluded_configs) next_extra_conf = {} return next_config, next_n_iteration, next_extra_conf def get_bo_candidates(self): std_incumbent_value = np.min( std_normalization(self.target_y[self.iterate_r[-1]])) # Update surrogate model in acquisition function. self.acquisition_function.update(model=self.surrogate, eta=std_incumbent_value, num_data=len(self.incumbent_configs)) challengers = self.acq_optimizer.maximize( runhistory=self.history_container, num_points=5000, ) return challengers.challengers @staticmethod def calculate_preserving_order_num(y_pred, y_true): array_size = len(y_pred) assert len(y_true) == array_size total_pair_num, order_preserving_num = 0, 0 for idx in range(array_size): for inner_idx in range(idx + 1, array_size): if bool(y_true[idx] > y_true[inner_idx]) == bool( y_pred[idx] > y_pred[inner_idx]): order_preserving_num += 1 total_pair_num += 1 return order_preserving_num, total_pair_num def update_weight(self): start_time = time.time() max_r = self.iterate_r[-1] incumbent_configs = self.target_x[max_r] test_x = convert_configurations_to_array(incumbent_configs) test_y = np.array(self.target_y[max_r], dtype=np.float64) r_list = self.surrogate.surrogate_r K = len(r_list) if len(test_y) >= 3: # Get previous weights if self.weight_method == 'rank_loss_p_norm': preserving_order_p = list() preserving_order_nums = list() for i, r in enumerate(r_list): fold_num = 5 if i != K - 1: mean, var = self.surrogate.surrogate_container[ r].predict(test_x) tmp_y = np.reshape(mean, -1) preorder_num, pair_num = self.calculate_preserving_order_num( tmp_y, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) else: if len(test_y) < 2 * fold_num: preserving_order_p.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[ train_idx], test_y[train_idx] valid_configs, valid_y = test_x[ valid_idx], test_y[valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) pred, _ = _surrogate.predict(valid_configs) cv_pred[valid_idx] = pred.reshape(-1) preorder_num, pair_num = self.calculate_preserving_order_num( cv_pred, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) trans_order_weight = np.array(preserving_order_p) power_sum = np.sum(np.power(trans_order_weight, self.power_num)) new_weights = np.power(trans_order_weight, self.power_num) / power_sum elif self.weight_method == 'rank_loss_prob': # For basic surrogate i=1:K-1. mean_list, var_list = list(), list() for i, r in enumerate(r_list[:-1]): mean, var = self.surrogate.surrogate_container[r].predict( test_x) mean_list.append(np.reshape(mean, -1)) var_list.append(np.reshape(var, -1)) sample_num = 100 min_probability_array = [0] * K for _ in range(sample_num): order_preseving_nums = list() # For basic surrogate i=1:K-1. for idx in range(K - 1): sampled_y = self.rng.normal(mean_list[idx], var_list[idx]) _num, _ = self.calculate_preserving_order_num( sampled_y, test_y) order_preseving_nums.append(_num) fold_num = 5 # For basic surrogate i=K. cv if len(test_y) < 2 * fold_num: order_preseving_nums.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[train_idx], test_y[ train_idx] valid_configs, valid_y = test_x[valid_idx], test_y[ valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) _pred, _var = _surrogate.predict(valid_configs) sampled_pred = self.rng.normal( _pred.reshape(-1), _var.reshape(-1)) cv_pred[valid_idx] = sampled_pred _num, _ = self.calculate_preserving_order_num( cv_pred, test_y) order_preseving_nums.append(_num) max_id = np.argmax(order_preseving_nums) min_probability_array[max_id] += 1 new_weights = np.array(min_probability_array) / sample_num else: raise ValueError('Invalid weight method: %s!' % self.weight_method) else: old_weights = list() for i, r in enumerate(r_list): _weight = self.surrogate.surrogate_weight[r] old_weights.append(_weight) new_weights = old_weights.copy() self.logger.info( '[%s] %d-th Updating weights: %s' % (self.weight_method, self.weight_changed_cnt, str(new_weights))) # Assign the weight to each basic surrogate. for i, r in enumerate(r_list): self.surrogate.surrogate_weight[r] = new_weights[i] self.weight_changed_cnt += 1 # Save the weight data. self.hist_weights.append(new_weights) dir_path = os.path.join(self.data_directory, 'saved_weights') file_name = 'mfes_weights_%s.npy' % (self.method_name, ) if not os.path.exists(dir_path): os.makedirs(dir_path) np.save(os.path.join(dir_path, file_name), np.asarray(self.hist_weights)) self.logger.info( 'update_weight() cost %.2fs. new weights are saved to %s' % (time.time() - start_time, os.path.join(dir_path, file_name))) def get_weights(self): return self.hist_weights
def __init__(self, objective_func, config_space: ConfigurationSpace, R, eta=3, num_iter=10000, rand_prob=0.3, init_weight=None, update_enable=True, weight_method='rank_loss_p_norm', fusion_method='idp', power_num=3, random_state=1, method_id='mqMFES', restart_needed=True, time_limit_per_trial=600, runtime_limit=None, ip='', port=13579, authkey=b'abc',): max_queue_len = 3 * R # conservative design super().__init__(objective_func, method_name=method_id, restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) self.seed = random_state self.config_space = config_space self.config_space.seed(self.seed) self.R = R self.eta = eta self.logeta = lambda x: log(x) / log(self.eta) self.s_max = int(self.logeta(self.R)) self.B = (self.s_max + 1) * self.R self.num_iter = num_iter self.update_enable = update_enable self.fusion_method = fusion_method # Parameter for weight method `rank_loss_p_norm`. self.power_num = power_num # Specify the weight learning method. self.weight_method = weight_method self.weight_update_id = 0 self.weight_changed_cnt = 0 if init_weight is None: init_weight = [0.] init_weight.extend([1. / self.s_max] * self.s_max) assert len(init_weight) == (self.s_max + 1) self.logger.info('Weight method & flag: %s-%s' % (self.weight_method, str(self.update_enable))) self.logger.info("Initial weight is: %s" % init_weight[:self.s_max + 1]) types, bounds = get_types(config_space) self.weighted_surrogate = WeightedRandomForestCluster( types, bounds, self.s_max, self.eta, init_weight, self.fusion_method ) self.acquisition_function = EI(model=self.weighted_surrogate) self.incumbent_configs = [] self.incumbent_perfs = [] self.iterate_id = 0 self.iterate_r = [] self.hist_weights = list() # Saving evaluation statistics in Hyperband. self.target_x = dict() self.target_y = dict() for index, item in enumerate(np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = [] self.target_y[r] = [] # BO optimizer settings. self.configs = list() self.history_container = HistoryContainer(task_id=self.method_name) self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.rng = np.random.RandomState(seed=self.seed) self.acq_optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=self.rng, max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations, rand_prob=0.0, ) self.random_configuration_chooser = ChooserProb(prob=rand_prob, rng=self.rng)
class mqMFES(mqBaseFacade): """ MFES-HB: https://arxiv.org/abs/2012.03011 """ def __init__(self, objective_func, config_space: ConfigurationSpace, R, eta=3, num_iter=10000, rand_prob=0.3, init_weight=None, update_enable=True, weight_method='rank_loss_p_norm', fusion_method='idp', power_num=3, random_state=1, method_id='mqMFES', restart_needed=True, time_limit_per_trial=600, runtime_limit=None, ip='', port=13579, authkey=b'abc',): max_queue_len = 3 * R # conservative design super().__init__(objective_func, method_name=method_id, restart_needed=restart_needed, time_limit_per_trial=time_limit_per_trial, runtime_limit=runtime_limit, max_queue_len=max_queue_len, ip=ip, port=port, authkey=authkey) self.seed = random_state self.config_space = config_space self.config_space.seed(self.seed) self.R = R self.eta = eta self.logeta = lambda x: log(x) / log(self.eta) self.s_max = int(self.logeta(self.R)) self.B = (self.s_max + 1) * self.R self.num_iter = num_iter self.update_enable = update_enable self.fusion_method = fusion_method # Parameter for weight method `rank_loss_p_norm`. self.power_num = power_num # Specify the weight learning method. self.weight_method = weight_method self.weight_update_id = 0 self.weight_changed_cnt = 0 if init_weight is None: init_weight = [0.] init_weight.extend([1. / self.s_max] * self.s_max) assert len(init_weight) == (self.s_max + 1) self.logger.info('Weight method & flag: %s-%s' % (self.weight_method, str(self.update_enable))) self.logger.info("Initial weight is: %s" % init_weight[:self.s_max + 1]) types, bounds = get_types(config_space) self.weighted_surrogate = WeightedRandomForestCluster( types, bounds, self.s_max, self.eta, init_weight, self.fusion_method ) self.acquisition_function = EI(model=self.weighted_surrogate) self.incumbent_configs = [] self.incumbent_perfs = [] self.iterate_id = 0 self.iterate_r = [] self.hist_weights = list() # Saving evaluation statistics in Hyperband. self.target_x = dict() self.target_y = dict() for index, item in enumerate(np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = [] self.target_y[r] = [] # BO optimizer settings. self.configs = list() self.history_container = HistoryContainer(task_id=self.method_name) self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 self.rng = np.random.RandomState(seed=self.seed) self.acq_optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=self.rng, max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations, rand_prob=0.0, ) self.random_configuration_chooser = ChooserProb(prob=rand_prob, rng=self.rng) def iterate(self, skip_last=0): for s in reversed(range(self.s_max + 1)): if self.update_enable and self.weight_update_id > self.s_max: self.update_weight() self.weight_update_id += 1 # Set initial number of configurations n = int(ceil(self.B / self.R / (s + 1) * self.eta ** s)) # initial number of iterations per config r = int(self.R * self.eta ** (-s)) # Choose a batch of configurations in different mechanisms. start_time = time.time() T = self.choose_next(n) time_elapsed = time.time() - start_time self.logger.info("[%s] Choosing next configurations took %.2f sec." % (self.method_name, time_elapsed)) extra_info = None last_run_num = None for i in range((s + 1) - int(skip_last)): # changed from s + 1 # Run each of the n configs for <iterations> # and keep best (n_configs / eta) configurations n_configs = n * self.eta ** (-i) n_iteration = r * self.eta ** (i) n_iter = n_iteration if last_run_num is not None and not self.restart_needed: n_iter -= last_run_num last_run_num = n_iteration self.logger.info("%s: %d configurations x %d iterations each" % (self.method_name, int(n_configs), int(n_iteration))) ret_val, early_stops = self.run_in_parallel(T, n_iter, extra_info) val_losses = [item['loss'] for item in ret_val] ref_list = [item['ref_id'] for item in ret_val] self.target_x[int(n_iteration)].extend(T) self.target_y[int(n_iteration)].extend(val_losses) if int(n_iteration) == self.R: self.incumbent_configs.extend(T) self.incumbent_perfs.extend(val_losses) # Update history container. for _config, _perf in zip(T, val_losses): self.history_container.add(_config, _perf) # Select a number of best configurations for the next loop. # Filter out early stops, if any. indices = np.argsort(val_losses) if len(T) == sum(early_stops): break if len(T) >= self.eta: indices = [i for i in indices if not early_stops[i]] T = [T[i] for i in indices] extra_info = [ref_list[i] for i in indices] reduced_num = int(n_configs / self.eta) T = T[0:reduced_num] extra_info = extra_info[0:reduced_num] else: T = [T[indices[0]]] # todo: confirm no filter early stops? extra_info = [ref_list[indices[0]]] val_losses = [val_losses[i] for i in indices][0:len(T)] # update: sorted incumbent_loss = val_losses[0] self.add_stage_history(self.stage_id, min(self.global_incumbent, incumbent_loss)) self.stage_id += 1 # self.remove_immediate_model() for item in self.iterate_r[self.iterate_r.index(r):]: # NORMALIZE Objective value: normalization normalized_y = std_normalization(self.target_y[item]) self.weighted_surrogate.train(convert_configurations_to_array(self.target_x[item]), np.array(normalized_y, dtype=np.float64), r=item) def run(self, skip_last=0): try: for iter in range(1, 1 + self.num_iter): self.logger.info('-' * 50) self.logger.info("%s algorithm: %d/%d iteration starts" % (self.method_name, iter, self.num_iter)) start_time = time.time() self.iterate(skip_last=skip_last) time_elapsed = (time.time() - start_time) / 60 self.logger.info("%d/%d-Iteration took %.2f min." % (iter, self.num_iter, time_elapsed)) self.iterate_id += 1 self.save_intemediate_statistics() except Exception as e: print(e) self.logger.error(str(e)) # Clean the immediate results. # self.remove_immediate_model() def get_bo_candidates(self, num_configs): # todo: parallel methods std_incumbent_value = np.min(std_normalization(self.target_y[self.iterate_r[-1]])) # Update surrogate model in acquisition function. self.acquisition_function.update(model=self.weighted_surrogate, eta=std_incumbent_value, num_data=len(self.history_container.data)) challengers = self.acq_optimizer.maximize( runhistory=self.history_container, num_points=5000, ) return challengers.challengers[:num_configs] def choose_next(self, num_config): if len(self.target_y[self.iterate_r[-1]]) == 0: configs = sample_configurations(self.config_space, num_config) self.configs.extend(configs) return configs config_candidates = list() acq_configs = self.get_bo_candidates(num_configs=2 * num_config) acq_idx = 0 for idx in range(1, 1 + 2 * num_config): # Like BOHB, sample a fixed percentage of random configurations. if self.random_configuration_chooser.check(idx): _config = self.config_space.sample_configuration() else: _config = acq_configs[acq_idx] acq_idx += 1 if _config not in config_candidates: config_candidates.append(_config) if len(config_candidates) >= num_config: break if len(config_candidates) < num_config: config_candidates = expand_configurations(config_candidates, self.config_space, num_config) _config_candidates = [] for config in config_candidates: if config not in self.configs: # Check if evaluated _config_candidates.append(config) self.configs.extend(_config_candidates) return _config_candidates @staticmethod def calculate_ranking_loss(y_pred, y_true): length = len(y_pred) y_pred = np.reshape(y_pred, -1) y_pred1 = np.tile(y_pred, (length, 1)) y_pred2 = np.transpose(y_pred1) diff = y_pred1 - y_pred2 y_true = np.reshape(y_true, -1) y_true1 = np.tile(y_true, (length, 1)) y_true2 = np.transpose(y_true1) y_mask = (y_true1 - y_true2 > 0) + 0 loss = np.sum(np.log(1 + np.exp(-diff)) * y_mask) / length return loss @staticmethod def calculate_preserving_order_num(y_pred, y_true): array_size = len(y_pred) assert len(y_true) == array_size total_pair_num, order_preserving_num = 0, 0 for idx in range(array_size): for inner_idx in range(idx + 1, array_size): if bool(y_true[idx] > y_true[inner_idx]) == bool(y_pred[idx] > y_pred[inner_idx]): order_preserving_num += 1 total_pair_num += 1 return order_preserving_num, total_pair_num def update_weight(self): start_time = time.time() max_r = self.iterate_r[-1] incumbent_configs = self.target_x[max_r] test_x = convert_configurations_to_array(incumbent_configs) test_y = np.array(self.target_y[max_r], dtype=np.float64) r_list = self.weighted_surrogate.surrogate_r K = len(r_list) if len(test_y) >= 3: # Get previous weights if self.weight_method == 'rank_loss_p_norm': preserving_order_p = list() preserving_order_nums = list() for i, r in enumerate(r_list): fold_num = 5 if i != K - 1: mean, var = self.weighted_surrogate.surrogate_container[r].predict(test_x) tmp_y = np.reshape(mean, -1) preorder_num, pair_num = self.calculate_preserving_order_num(tmp_y, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) else: if len(test_y) < 2 * fold_num: preserving_order_p.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[train_idx], test_y[train_idx] valid_configs, valid_y = test_x[valid_idx], test_y[valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances(types=types, bounds=bounds) _surrogate.train(train_configs, train_y) pred, _ = _surrogate.predict(valid_configs) cv_pred[valid_idx] = pred.reshape(-1) preorder_num, pair_num = self.calculate_preserving_order_num(cv_pred, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) trans_order_weight = np.array(preserving_order_p) power_sum = np.sum(np.power(trans_order_weight, self.power_num)) new_weights = np.power(trans_order_weight, self.power_num) / power_sum elif self.weight_method == 'rank_loss_prob': # For basic surrogate i=1:K-1. mean_list, var_list = list(), list() for i, r in enumerate(r_list[:-1]): mean, var = self.weighted_surrogate.surrogate_container[r].predict(test_x) mean_list.append(np.reshape(mean, -1)) var_list.append(np.reshape(var, -1)) sample_num = 100 min_probability_array = [0] * K for _ in range(sample_num): order_preseving_nums = list() # For basic surrogate i=1:K-1. for idx in range(K - 1): sampled_y = self.rng.normal(mean_list[idx], var_list[idx]) _num, _ = self.calculate_preserving_order_num(sampled_y, test_y) order_preseving_nums.append(_num) fold_num = 5 # For basic surrogate i=K. cv if len(test_y) < 2 * fold_num: order_preseving_nums.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[train_idx], test_y[train_idx] valid_configs, valid_y = test_x[valid_idx], test_y[valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances(types=types, bounds=bounds) _surrogate.train(train_configs, train_y) _pred, _var = _surrogate.predict(valid_configs) sampled_pred = self.rng.normal(_pred.reshape(-1), _var.reshape(-1)) cv_pred[valid_idx] = sampled_pred _num, _ = self.calculate_preserving_order_num(cv_pred, test_y) order_preseving_nums.append(_num) max_id = np.argmax(order_preseving_nums) min_probability_array[max_id] += 1 new_weights = np.array(min_probability_array) / sample_num else: raise ValueError('Invalid weight method: %s!' % self.weight_method) else: old_weights = list() for i, r in enumerate(r_list): _weight = self.weighted_surrogate.surrogate_weight[r] old_weights.append(_weight) new_weights = old_weights.copy() self.logger.info('[%s] %d-th Updating weights: %s' % ( self.weight_method, self.weight_changed_cnt, str(new_weights))) # Assign the weight to each basic surrogate. for i, r in enumerate(r_list): self.weighted_surrogate.surrogate_weight[r] = new_weights[i] self.weight_changed_cnt += 1 # Save the weight data. self.hist_weights.append(new_weights) dir_path = os.path.join(self.data_directory, 'saved_weights') file_name = 'mfes_weights_%s.npy' % (self.method_name,) if not os.path.exists(dir_path): os.makedirs(dir_path) np.save(os.path.join(dir_path, file_name), np.asarray(self.hist_weights)) self.logger.info('update_weight() cost %.2fs. new weights are saved to %s' % (time.time()-start_time, os.path.join(dir_path, file_name))) def get_incumbent(self, num_inc=1): assert (len(self.incumbent_perfs) == len(self.incumbent_configs)) indices = np.argsort(self.incumbent_perfs) configs = [self.incumbent_configs[i] for i in indices[0:num_inc]] perfs = [self.incumbent_perfs[i] for i in indices[0: num_inc]] return configs, perfs def get_weights(self): return self.hist_weights
def __init__(self, config_space, num_objs=1, num_constraints=0, initial_trials=3, initial_configurations=None, init_strategy='random_explore_first', history_bo_data=None, rand_prob=0.1, optimization_strategy='bo', surrogate_type='auto', acq_type='auto', acq_optimizer_type='auto', ref_point=None, output_dir='logs', task_id='default_task_id', random_state=None, **kwargs): # Create output (logging) directory. # Init logging module. # Random seed generator. self.num_objs = num_objs self.num_constraints = num_constraints self.init_strategy = init_strategy self.output_dir = output_dir self.task_id = task_id self.rng = check_random_state(random_state) self.logger = get_logger(self.__class__.__name__) # Basic components in Advisor. self.rand_prob = rand_prob self.optimization_strategy = optimization_strategy # Init the basic ingredients in Bayesian optimization. self.history_bo_data = history_bo_data self.surrogate_type = surrogate_type self.constraint_surrogate_type = None self.acq_type = acq_type self.acq_optimizer_type = acq_optimizer_type self.init_num = initial_trials self.config_space = config_space self.config_space_seed = self.rng.randint(MAXINT) self.config_space.seed(self.config_space_seed) self.ref_point = ref_point # init history container if self.num_objs == 1: self.history_container = HistoryContainer( task_id, self.num_constraints, config_space=self.config_space) else: # multi-objectives self.history_container = MOHistoryContainer( task_id, self.num_objs, self.num_constraints, ref_point) # initial design if initial_configurations is not None and len( initial_configurations) > 0: self.initial_configurations = initial_configurations self.init_num = len(initial_configurations) else: self.initial_configurations = self.create_initial_design( self.init_strategy) self.init_num = len(self.initial_configurations) self.surrogate_model = None self.constraint_models = None self.acquisition_function = None self.optimizer = None self.auto_alter_model = False self.algo_auto_selection() self.check_setup() self.setup_bo_basics()
class Advisor(object, metaclass=abc.ABCMeta): def __init__(self, config_space, initial_trials=10, initial_configurations=None, init_strategy='random_explore_first', history_bo_data=None, optimization_strategy='bo', surrogate_type='prf', output_dir='logs', task_id=None, rng=None): # Create output (logging) directory. # Init logging module. # Random seed generator. self.init_strategy = init_strategy self.output_dir = output_dir if rng is None: run_id, rng = get_rng() self.rng = rng self.logger = get_logger(self.__class__.__name__) # Basic components in Advisor. self.optimization_strategy = optimization_strategy self.configurations = list() self.failed_configurations = list() self.perfs = list() self.scale_perc = 5 self.perc = None self.min_y = None self.max_y = None # Init the basic ingredients in Bayesian optimization. self.history_bo_data = history_bo_data self.surrogate_type = surrogate_type self.init_num = initial_trials self.config_space = config_space self.config_space.seed(rng.randint(MAXINT)) if initial_configurations is not None and len( initial_configurations) > 0: self.initial_configurations = initial_configurations self.init_num = len(initial_configurations) else: self.initial_configurations = self.create_initial_design( self.init_strategy) self.init_num = len(self.initial_configurations) self.history_container = HistoryContainer(task_id) self.surrogate_model = None self.acquisition_function = None self.optimizer = None self.setup_bo_basics() def setup_bo_basics(self, acq_type='ei', acq_optimizer_type='local_random'): self.surrogate_model = build_surrogate( func_str=self.surrogate_type, config_space=self.config_space, rng=self.rng, history_hpo_data=self.history_bo_data) self.acquisition_function = build_acq_func(func_str=acq_type, model=self.surrogate_model) self.optimizer = build_optimizer(func_str=acq_optimizer_type, acq_func=self.acquisition_function, config_space=self.config_space, rng=self.rng) def create_initial_design(self, init_strategy='random'): default_config = self.config_space.get_default_configuration() if init_strategy == 'random': num_random_config = self.init_num - 1 initial_configs = [ default_config ] + self.sample_random_configs(num_random_config) return initial_configs elif init_strategy == 'random_explore_first': num_random_config = self.init_num - 1 candidate_configs = self.sample_random_configs(100) return self.max_min_distance(default_config, candidate_configs, num_random_config) else: raise ValueError('Unknown initial design strategy: %s.' % init_strategy) def max_min_distance(self, default_config, src_configs, num): min_dis = list() initial_configs = list() initial_configs.append(default_config) for config in src_configs: dis = np.linalg.norm( config.get_array() - default_config.get_array()) # get_array may have NaN problems min_dis.append(dis) min_dis = np.array(min_dis) for i in range(num): furthest_config = src_configs[np.argmax(min_dis)] initial_configs.append(furthest_config) min_dis[np.argmax(min_dis)] = -1 for j in range(len(src_configs)): if src_configs[j] in initial_configs: continue updated_dis = np.linalg.norm(src_configs[j].get_array() - furthest_config.get_array()) min_dis[j] = min(updated_dis, min_dis[j]) return initial_configs def get_suggestion(self): if len(self.configurations) == 0: X = np.array([]) else: failed_configs = list( ) if self.max_y is None else self.failed_configurations.copy() X = convert_configurations_to_array(self.configurations + failed_configs) num_failed_trial = len(self.failed_configurations) failed_perfs = list() if self.max_y is None else [self.max_y ] * num_failed_trial Y = np.array(self.perfs + failed_perfs, dtype=np.float64) num_config_evaluated = len(self.perfs + self.failed_configurations) if num_config_evaluated < self.init_num: return self.initial_configurations[num_config_evaluated] if self.optimization_strategy == 'random': return self.sample_random_configs(1)[0] elif self.optimization_strategy == 'bo': self.surrogate_model.train(X, Y) incumbent_value = self.history_container.get_incumbents()[0][1] self.acquisition_function.update(model=self.surrogate_model, eta=incumbent_value, num_data=num_config_evaluated) challengers = self.optimizer.maximize( runhistory=self.history_container, num_points=5000) is_repeated_config = True repeated_time = 0 cur_config = None while is_repeated_config: cur_config = challengers.challengers[repeated_time] if cur_config in (self.configurations + self.failed_configurations): repeated_time += 1 else: is_repeated_config = False return cur_config else: raise ValueError('Unknown optimization strategy: %s.' % self.optimization_strategy) def update_observation(self, observation): config, perf, trial_state = observation if trial_state == SUCCESS and perf < MAXINT: self.configurations.append(config) self.perfs.append(perf) self.history_container.add(config, perf) self.perc = np.percentile(self.perfs, self.scale_perc) self.min_y = np.min(self.perfs) self.max_y = np.max(self.perfs) else: self.failed_configurations.append(config) def sample_random_configs(self, num_configs=1): configs = list() sample_cnt = 0 max_sample_cnt = 1000 while len(configs) < num_configs: config = self.config_space.sample_configuration() sample_cnt += 1 if config not in (self.configurations + self.failed_configurations + configs): configs.append(config) sample_cnt = 0 continue if sample_cnt >= max_sample_cnt: self.logger.warning( 'Cannot sample non duplicate configuration after %d iterations.' % max_sample_cnt) configs.append(config) sample_cnt = 0 return configs def get_suggestions(self): raise NotImplementedError