def _get_incumbent_value(self): ''' get incumbent value either from runhistory or from best predicted performance on configs in runhistory (depends on self.predict_incumbent)" Return ------ float ''' if self.predict_incumbent: configs = convert_configurations_to_array( self.runhistory.get_all_configs()) costs = list( map( lambda config: self.model. predict_marginalized_over_instances(config.reshape( (1, -1)))[0][0][0], configs, )) incumbent_value = np.min(costs) # won't need log(y) if EPM was already trained on log(y) else: if self.runhistory.empty(): raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.") incumbent_value = self.runhistory.get_cost(self.incumbent) if isinstance(self.rh2EPM, RunHistory2EPM4LogCost): incumbent_value = np.log(incumbent_value) return incumbent_value
def getPrediction(config): pred = smac.solver.epm_chooser.model.predict( convert_configurations_to_array([config]))[0][0][0] rh2epm = smac.solver.epm_chooser.rh2EPM min_y = rh2epm.min_y - ( rh2epm.perc - rh2epm.min_y ) # Subtract the difference between the percentile and the minimum min_y -= constants.VERY_SMALL_NUMBER # Minimal value to avoid numerical issues in the log scaling below if min_y == rh2epm.max_y: min_y *= 1 - 10**-10 pred = np.exp(pred) pred = pred * (rh2epm.max_y - min_y) + min_y return pred
def _get_x_best(self, predict: bool, X: np.ndarray) -> typing.Tuple[float, np.ndarray]: """Get value, configuration, and array representation of the "best" configuration. The definition of best varies depending on the argument ``predict``. If set to ``True``, this function will return the stats of the best configuration as predicted by the model, otherwise it will return the stats for the best observed configuration. Parameters ---------- predict : bool Whether to use the predicted or observed best. Returns ------- float np.ndarry Configuration """ if predict: costs = list( map( lambda x: ( self.model.predict_marginalized_over_instances( x.reshape((1, -1)))[0][0][0], x, ), X, )) costs = sorted(costs, key=lambda t: t[0]) x_best_array = costs[0][1] best_observation = costs[0][0] # won't need log(y) if EPM was already trained on log(y) else: all_configs = self.runhistory.get_all_configs_per_budget( budget_subset=self.currently_considered_budgets) x_best = self.incumbent x_best_array = convert_configurations_to_array(all_configs) best_observation = self.runhistory.get_cost(x_best) best_observation_as_array = np.array(best_observation).reshape( (1, 1)) # It's unclear how to do this for inv scaling and potential future scaling. # This line should be changed if necessary best_observation = self.rh2EPM.transform_response_values( best_observation_as_array) best_observation = best_observation[0][0] return x_best_array, best_observation
def __call__(self, configurations: List[Configuration]): """Computes the acquisition value for a given X Parameters ---------- configurations : list The configurations where the acquisition function should be evaluated. Returns ------- np.ndarray(N, 1) acquisition values for X """ X = convert_configurations_to_array(configurations) if len(X.shape) == 1: X = X[np.newaxis, :] acq = self._compute(X) if np.any(np.isnan(acq)): idx = np.where(np.isnan(acq))[0] acq[idx, :] = -np.finfo(np.float).max return acq
def to_str(self) -> str: """Convert the ConfigHistory object to a string. Returns ------- return : str A str contains the Configuration and its related runhistories. For example, "0.8(config) 1(#runhistory) 0.6 1.2 1234" """ # 取得config对应的runhistory runhistory = self.runhistory.get_history_for_config(self.config) # 将config转化为ndarray config_list = [str(param) for param in convert_configurations_to_array([self.config])[0]] # 将runhistory转化为字符串list,每个元素为"$cost $time $seed"形式 runhistory_list = [" ".join([str(item) for item in history]) for history in runhistory] # 返回由config,runhistory数量和各个runhistory字符串组成的list return " ".join(config_list + [str(len(runhistory_list))] + runhistory_list)
def _preprocess(self, runhistory): """ Method to marginalize over instances such that fANOVA can determine the parameter importance without having to deal with instance features. :param runhistory: RunHistory that knows all configurations that were run. For all these configurations we have to marginalize away the instance features with which fANOVA will make it's predictions """ self.logger.info( 'PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING PREPROCESSING' ) self.logger.info('Marginalizing away all instances!') configs = runhistory.get_all_configs() X_prime = np.array(convert_configurations_to_array(configs)) y_prime = np.array( self.model.predict_marginalized_over_instances(X_prime)[0]) self.X = X_prime self.y = y_prime self.logger.info('Size of training X after preprocessing: %s' % str(self.X.shape)) self.logger.info('Size of training y after preprocessing: %s' % str(self.y.shape)) self.logger.info('Finished Preprocessing')
def _get_incumbent_value(self): ''' get incumbent value either from runhistory or from best predicted performance on configs in runhistory (depends on self.predict_incumbent)" Return ------ float ''' if self.predict_incumbent: configs = convert_configurations_to_array( self.runhistory.get_all_configs()) costs = list( map( lambda config: self.model. predict_marginalized_over_instances(config.reshape( (1, -1)))[0][0][0], configs, )) incumbent_value = np.min(costs) # won't need log(y) if EPM was already trained on log(y) else: if self.runhistory.empty(): raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.") incumbent_value = self.runhistory.get_cost(self.incumbent) # It's unclear how to do this for inv scaling and potential future scaling. This line should be changed if # necessary incumbent_value_as_array = np.array(incumbent_value).reshape( (1, 1)) incumbent_value = self.rh2EPM.transform_response_values( incumbent_value_as_array) incumbent_value = incumbent_value[0][0] return incumbent_value
def build_matrix(feature_pd:pd.DataFrame, perf_pd:pd.DataFrame, configs:list, cs:ConfigurationSpace, n_insts:int=None): insts = list(feature_pd.index) if n_insts is not None and n_insts < len(insts): insts = random.sample(insts, n_insts) config_matrix = convert_configurations_to_array(configs) # one hot encode categorical parameters n_values = [] mask_array = [] parameters = cs.get_hyperparameters() for param in parameters: if isinstance(param, (CategoricalHyperparameter)): n_values.append(len(param.choices)) mask_array.append(True) else: mask_array.append(False) n_values = np.array(n_values) mask_array = np.array(mask_array) ohe = OneHotEncoder(n_values=n_values, categorical_features=mask_array, sparse=False) config_matrix = ohe.fit_transform(config_matrix) train_config_indices = random.sample(range(len(configs)), int(len(configs)/2)) valid_config_indices = random.sample(train_config_indices, int(len(train_config_indices)/2)) train_inst_indices = random.sample(range(len(insts)), int(len(insts)/2)) valid_inst_indices = random.sample(train_inst_indices, int(len(train_inst_indices)/2)) # convert in X matrix and y vector X_I, X_II, X_III, X_IV = [[],[],[],[]], [], [], [] y_I, y_II, y_III, y_IV = [[],[],[],[]], [], [], [] for i_idx, inst in enumerate(insts): feat_vector = feature_pd.loc[inst].values perf_vector = perf_pd.loc[inst].values for c_idx in range(len(configs)): config_vec = config_matrix[c_idx,:] perf = perf_vector[c_idx] if i_idx in train_inst_indices and c_idx in train_config_indices: if i_idx in valid_inst_indices and c_idx in valid_config_indices: X_I[3].append(np.concatenate((config_vec, feat_vector))) y_I[3].append(perf) elif i_idx not in valid_inst_indices and c_idx in valid_config_indices: X_I[2].append(np.concatenate((config_vec, feat_vector))) y_I[2].append(perf) elif i_idx in valid_inst_indices and c_idx not in valid_config_indices: X_I[1].append(np.concatenate((config_vec, feat_vector))) y_I[1].append(perf) else: X_I[0].append(np.concatenate((config_vec, feat_vector))) y_I[0].append(perf) elif i_idx not in train_inst_indices and c_idx in train_config_indices: X_II.append(np.concatenate((config_vec, feat_vector))) y_II.append(perf) elif i_idx in train_inst_indices and c_idx not in train_config_indices: X_III.append(np.concatenate((config_vec, feat_vector))) y_III.append(perf) else: X_IV.append(np.concatenate((config_vec, feat_vector))) y_IV.append(perf) X_II, X_III, X_IV = np.array(X_II), np.array(X_III), np.array(X_IV) y_II, y_III, y_IV = np.array(y_II), np.array(y_III), np.array(y_IV) X_I = np.array([np.array(X_I[0]),np.array(X_I[1]),np.array(X_I[2]),np.array(X_I[3])]) y_I = np.array([np.array(y_I[0]),np.array(y_I[1]),np.array(y_I[2]),np.array(y_I[3])]) print(X_I.shape, X_II.shape, X_III.shape, X_IV.shape) print(y_I.shape, y_II.shape, y_III.shape, y_IV.shape) return X_I, X_II, X_III, X_IV, y_I, y_II, y_III, y_IV