def build_single_surrogate(self, X: np.ndarray, y: np.array, normalize): assert normalize in ['standardize', 'scale', 'none'] # Construct hyperspace with meta-features. config_space = ConfigurationSpace() for hp in self.config_space.get_hyperparameters(): config_space.add_hyperparameter(hp) for cond in self.config_space.get_conditions(): config_space.add_condition(cond) for bid in self.config_space.get_forbiddens(): config_space.add_forbidden_clause(bid) _meta_feature_size = X.shape[1] - len( self.config_space.get_hyperparameters()) for _idx in range(_meta_feature_size): _meta_hp = UniformFloatHyperparameter("meta_feature_%d" % _idx, 0., 1., default_value=0., log=False) config_space.add_hyperparameter(_meta_hp) model = build_model(self.surrogate_type, config_space, np.random.RandomState(self.random_seed)) if normalize == 'standardize': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_mean_unit_var_normalization(y) elif normalize == 'scale': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_one_normalization(y) else: pass model.train(X, y) return model
def build_source_surrogates(self, normalize): print('start to train base surrogates.') start_time = time.time() self.source_surrogates = list() for hpo_evaluation_data in self.source_hpo_data: print('.', end='') model = build_model(self.surrogate_type, self.config_space, np.random.RandomState(self.random_seed)) _X, _y = list(), list() for _config, _config_perf in hpo_evaluation_data.items(): _X.append(_config) _y.append(_config_perf) X = convert_configurations_to_array(_X) y = np.array(_y, dtype=np.float64) X = X[:self.num_src_hpo_trial] y = y[:self.num_src_hpo_trial] if normalize == 'standardize': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_mean_unit_var_normalization(y) elif normalize == 'scale': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_one_normalization(y) y = 2 * y - 1. else: raise ValueError('Invalid parameter in norm.') self.eta_list.append(np.min(y)) model.train(X, y) self.source_surrogates.append(model) print() print('Building base surrogates took %.3fs.' % (time.time() - start_time))
def build_single_surrogate(self, X: np.ndarray, y: np.array, normalize): assert normalize in ['standardize', 'scale', 'none'] model = build_model(self.surrogate_type, self.config_space, np.random.RandomState(self.random_seed)) if normalize == 'standardize': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_mean_unit_var_normalization(y) elif normalize == 'scale': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_one_normalization(y) else: pass model.train(X, y) return model
def build_classifier(self): # Train the binary classifier. print('start to train binary classifier.') start_time = time.time() self.space_classifier = list() normalize = 'standardize' for hpo_evaluation_data in self.source_hpo_data: print('.', end='') _X, _y = list(), list() for _config, _config_perf in hpo_evaluation_data.items(): _X.append(_config) _y.append(_config_perf) X = convert_configurations_to_array(_X) y = np.array(_y, dtype=np.float64) X = X[:self.num_src_hpo_trial] y = y[:self.num_src_hpo_trial] if normalize == 'standardize': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_mean_unit_var_normalization(y) elif normalize == 'scale': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_one_normalization(y) y = 2 * y - 1. else: raise ValueError('Invalid parameter in norm.') percentile = np.percentile(y, 30) space_label = np.array(np.array(y) < percentile) if (np.array(y) == percentile).all(): raise ValueError('assertion violation: the same eval values!') if (space_label[0] == space_label).all(): space_label = np.array(np.array(y) < np.mean(y)) print('Label treatment triggers!') from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC clf = make_pipeline(StandardScaler(), SVC(gamma='auto')) clf.fit(X, space_label) self.space_classifier.append(clf) print('Building base classifier took %.3fs.' % (time.time() - start_time))
def prepare_classifier(self, task_ids, percentiles): # Train the binary classifier. print('Train binary classifiers.') start_time = time.time() self.space_classifier = [None] * len(self.source_hpo_data) normalize = 'standardize' for _task_id in task_ids: hpo_evaluation_data = self.source_hpo_data[_task_id] percentile_v = percentiles[_task_id] print('.', end='') _X, _y = list(), list() for _config, _config_perf in hpo_evaluation_data.items(): _X.append(_config) _y.append(_config_perf) X = convert_configurations_to_array(_X) y = np.array(_y, dtype=np.float64) X = X[:self.num_src_hpo_trial] y = y[:self.num_src_hpo_trial] if normalize == 'standardize': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_mean_unit_var_normalization(y) elif normalize == 'scale': if (y == y[0]).all(): y[0] += 1e-4 y, _, _ = zero_one_normalization(y) y = 2 * y - 1. else: raise ValueError('Invalid parameter in norm.') percentile = np.percentile(y, percentile_v) unique_ys = sorted(list(set(y))) if len(unique_ys) >= 2 and percentile <= unique_ys[0]: percentile = unique_ys[1] space_label = np.array(np.array(y) < percentile) if (np.array(y) == percentile).all(): raise ValueError('Assertion violation: The same eval values!') if (space_label[0] == space_label).all(): space_label = np.array(np.array(y) < np.mean(y)) if (space_label[0] == space_label).all(): raise ValueError('Warning: Label treatment triggers!') else: print('Warning: Label treatment triggers!') if self.clf_type == 'svm': clf = make_pipeline(StandardScaler(), SVC(gamma='auto')) elif self.clf_type == 'rf': clf = make_pipeline( StandardScaler(), RandomForestClassifier(n_estimators=50, max_depth=4)) elif self.clf_type == 'knn': clf = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=5)) elif self.clf_type == 'gp': clf = make_pipeline(StandardScaler(), GaussianProcessClassifier()) # print('Labels', space_label) # print('sum', np.sum(space_label)) clf.fit(X, space_label) self.space_classifier[_task_id] = clf print('Building base classifier took %.3fs.' % (time.time() - start_time))
def choose_next(self, X: np.ndarray, Y: np.ndarray): """ Step 1. sample a batch of random configs. Step 2. identify and preserve the configs in the good regions (union) Step 3. calculate their acquisition functions and choose the config with the largest value. Parameters ---------- X Y Returns ------- the config to evaluate next. """ _config_num = X.shape[0] if _config_num < self.init_num: if self.initial_configurations is None: default_config = self.config_space.get_default_configuration() if default_config not in self.configuration_list: default_config = self.configuration_list[0] if default_config not in (self.configurations + self.failed_configurations): config = default_config else: config = self.sample_random_config()[0] return config else: print('This is a config for warm-start!') return self.initial_configurations[_config_num] start_time = time.time() self.model.train(X, Y) print('Training surrogate model took %.3f' % (time.time() - start_time)) if self.model.method_id in ['tst', 'tstm', 'pogpe']: y_, _, _ = zero_one_normalization(Y) elif self.model.method_id in ['scot']: y_ = Y.copy() else: y_, _, _ = zero_mean_unit_var_normalization(Y) incumbent_value = np.min(y_) if self.acq_func == 'ei': self.acquisition_function.update(model=self.model, eta=incumbent_value, num_data=len( self.history_container.data)) else: raise ValueError('invalid acquisition function ~ %s.' % self.acq_func) # Select space X_candidate = self.get_X_candidate() # Check space self.check_space(X_candidate) if self.rng.rand() < self.get_random_prob(self.iteration_id): excluded_set = list() candidate_set = set(X_candidate) for _config in self.configuration_list: if _config not in candidate_set: excluded_set.append(_config) if len(excluded_set) == 0: excluded_set = self.configuration_list config = self.sample_random_config(config_set=excluded_set)[0] if len(self.model.target_weight) == 0: self.model.target_weight.append(0.) else: self.model.target_weight.append(self.model.target_weight[-1]) print('Config sampled randomly.') return config acq_optimizer = OfflineSearch(X_candidate, self.acquisition_function, self.config_space, rng=np.random.RandomState( self.random_seed)) start_time = time.time() sorted_configs = acq_optimizer.maximize( runhistory=self.history_container, num_points=5000) print('Optimizing Acq. func took %.3f' % (time.time() - start_time)) for _config in sorted_configs: if _config not in (self.configurations + self.failed_configurations): return _config print('[Warning] Reach unexpected?') excluded_set = list() candidate_set = set(X_candidate) for _config in self.configuration_list: if _config not in candidate_set and _config not in ( self.configurations + self.failed_configurations): excluded_set.append(_config) if len(excluded_set) == 0: excluded_set = self.configuration_list return self.sample_random_config(config_set=excluded_set)[0]
def choose_next(self, X: np.ndarray, Y: np.ndarray): _config_num = X.shape[0] if _config_num < self.init_num: if self.initial_configurations is None: default_config = self.config_space.get_default_configuration() if default_config not in self.configuration_list: default_config = self.configuration_list[0] if default_config not in (self.configurations + self.failed_configurations): config = default_config else: config = self.sample_random_config()[0] return config else: print('This is a config for warm-start!') return self.initial_configurations[_config_num] if self.random_configuration_chooser.check(self.iteration_id): config = self.sample_random_config()[0] if len(self.model.target_weight) == 0: self.model.target_weight.append(0.) else: self.model.target_weight.append(self.model.target_weight[-1]) return config else: start_time = time.time() self.model.train(X, Y) print('Training surrogate model took %.3f' % (time.time() - start_time)) incumbent_value = self.history_container.get_incumbents()[0][1] if self.model.method_id in ['tst', 'tstm']: y_, _, _ = zero_one_normalization(Y) incumbent_value = np.min(y_) else: y_, _, _ = zero_mean_unit_var_normalization(Y) incumbent_value = np.min(y_) if self.acq_func == 'ei': self.acquisition_function.update( model=self.model, eta=incumbent_value, num_data=len(self.history_container.data)) elif self.acq_func == 'taf': self.acquisition_function.update_target_model( self.model.target_surrogate, incumbent_value, num_data=len(self.history_container.data), model_weights=self.model.w) else: raise ValueError('invalid acquisition function ~ %s.' % self.acq_func) start_time = time.time() sorted_configs = self.acq_optimizer.maximize( runhistory=self.history_container, num_points=5000, ) print('optimizing acq func took', time.time() - start_time) for _config in sorted_configs: if _config not in (self.configurations + self.failed_configurations): return _config raise ValueError('The configuration in the SET (%d) is over' % len(self.configuration_list))