def process_config_info_pair(self, config: Configuration, info_dict: dict, budget): self.budget2obvs[budget]["locks"].append(config.get_array().copy()) info_dict = deepcopy(info_dict) if config.origin is None: config.origin = "unknown" info_dict.update({ "origin": config.origin }) return config.get_dictionary(), info_dict
def merge_configurations( partial_configs, # type: List[PartialConfig] cs: ConfigurationSpace) -> Configuration: complete = {} for partial_config in partial_configs: for param, value in partial_config.config.get_dictionary().items(): param = prefixed_name(partial_config.name, param) complete[param] = value config = Configuration(cs, complete) config.origin = Counter([ p.config.origin for p in partial_configs if not p.is_empty() ]).most_common(1)[0][0] return config
def get_config(self, budget) -> Tuple[dict, dict]: # get max_budget # calc by budget2epm start_time = time() max_budget = self.get_available_max_budget() # initial points if self.initial_points is not None and self.initial_points_index < len(self.initial_points): while True: if self.initial_points_index >= len(self.initial_points): break initial_point_dict = self.initial_points[self.initial_points_index] initial_point = Configuration(self.config_space, initial_point_dict) self.initial_points_index += 1 initial_point.origin = "User Defined" if not self.is_config_exist(budget, initial_point): self.logger.debug(f"Using initial points [{self.initial_points_index - 1}]") return self.process_config_info_pair(initial_point, {}, budget) config, config_info = self._get_config(budget, max_budget) self.register_config(config, budget, start_time) return config, config_info
def get_config(self, budget): if self.initial_points is not None and self.initial_points_index < len( self.initial_points): initial_point_dict = self.initial_points[self.initial_points_index] initial_point = Configuration(self.config_space, initial_point_dict) initial_point.origin = "User Defined" self.initial_points_index += 1 return self.process_config_info_pair(initial_point, {}, budget) max_budget = self.get_available_max_budget() epm = self.budget2epm[max_budget] if epm is None: max_sample = 1000 i = 0 info_dict = {"model_based_pick": False} while i < max_sample: i += 1 config = self.config_space.sample_configuration() add_configs_origin(config, "Initial Design") if self.is_config_exist(max_budget, config): self.logger.info( f"The sample already exists and needs to be resampled. " f"It's the {i}-th time sampling in random sampling. ") else: return self.process_config_info_pair( config, info_dict, budget) # todo: 收纳这个代码块 seed = self.rng.randint(1, 8888) self.config_space.seed() config = self.config_space.sample_configuration() add_configs_origin(config, "Initial Design") info_dict.update({ "sampling_different_samples_failed": True, "seed": seed }) return self.process_config_info_pair(config, info_dict, budget) info_dict = {"model_based_pick": True} # thompson sampling if self.use_thompson_sampling: ts_config, ts_info_dict = self.thompson_sampling( max_budget, info_dict) if ts_config is not None: self.logger.info( "Using thompson sampling near the dominant samples.") return self.process_config_info_pair(ts_config, ts_info_dict, budget) # 让config_evaluator给所有的随机样本打分 configs = self.config_space.sample_configuration(self.n_samples) losses, configs_sorted = self.evaluate(configs, max_budget, return_loss_config=True) add_configs_origin(configs_sorted, "Random Search (Sorted)") if self.use_local_search: start_points = self.get_local_search_initial_points( max_budget, 10, configs_sorted) # todo 最后把以前跑过的样本删掉 local_losses, local_configs = self.local_search( start_points, max_budget) # todo: 判断start_points 与local_configs的关系 add_configs_origin(local_configs, "Local Search") concat_losses = np.hstack( [losses.flatten(), local_losses.flatten()]) concat_configs = configs + local_configs random_var = self.rng.rand(len(concat_losses)) indexes = np.lexsort((random_var.flatten(), concat_losses)) concat_configs_sorted = [concat_configs[i] for i in indexes] concat_losses = concat_losses[indexes] else: concat_losses, concat_configs_sorted = losses, configs_sorted # 选取获益最大,且没有出现过的一个配置 # todo: 考虑多worker条件下的config加锁 for i, config in enumerate(concat_configs_sorted): if self.is_config_exist(max_budget, config): self.logger.info( f"The sample already exists and needs to be resampled. " f"It's the {i}-th time sampling in bayesian sampling. ") # 超过 max_repeated_samples , 用TS算法采样 if i >= self.max_repeated_samples and self.use_thompson_sampling: ts_config, ts_info_dict = self.thompson_sampling( max_budget, info_dict, True) return self.process_config_info_pair( ts_config, ts_info_dict, budget) else: return self.process_config_info_pair(config, info_dict, budget) # todo: 收纳这个代码块 seed = self.rng.randint(1, 8888) self.config_space.seed(seed) config = self.config_space.sample_configuration() add_configs_origin(config, "Initial Design") info_dict.update({ "sampling_different_samples_failed": True, "seed": seed }) return self.process_config_info_pair(config, info_dict, budget)