def get_id_of_config(config: Configuration): # todo:, instance="", seed=0 X: np.ndarray = config.get_array() m = hashlib.md5() if X.flags['C_CONTIGUOUS']: m.update(X.data) m.update(str(X.shape).encode('utf8')) else: X_tmp = np.ascontiguousarray(X.T) m.update(X_tmp.data) m.update(str(X_tmp.shape).encode('utf8')) # m.update(instance.encode()) # m.update(str(seed).encode()) hash_value = m.hexdigest() return hash_value
def register_result(self, config: Configuration, loss: float, status: StatusType, update_model: bool = True, **kwargs) -> None: super().register_result(config, loss, status) # noinspection PyUnresolvedReferences actual_size = config.get_array().size if actual_size != self.expected_size: return if loss is None or not np.isfinite(loss): loss = self.worst_score self.kde.losses.append(loss) # noinspection PyTypeChecker self.kde.configs.append(config.get_array()) min_points_in_model = max( int(1.5 * len(self.configspace.get_hyperparameters())) + 1, self.min_points_in_model) # skip model building if not enough points are available if len(self.kde.losses) < min_points_in_model: return train_losses = np.array(self.kde.losses) n_good = max(min_points_in_model, (self.top_n_percent * train_losses.shape[0]) // 100) n_bad = max(min_points_in_model, ((100 - self.top_n_percent) * train_losses.shape[0]) // 100) idx = np.argsort(train_losses) train_configs = np.array(self.kde.configs) train_data_good = self._fix_identical_cat_input( train_configs[idx[:n_good]]) train_data_bad = self._fix_identical_cat_input( train_configs[idx[-n_bad:]]) train_data_good = self._impute_conditional_data( train_data_good, self.kde.vartypes) train_data_bad = self._impute_conditional_data(train_data_bad, self.kde.vartypes) if train_data_good.shape[0] <= train_data_good.shape[1]: return if train_data_bad.shape[0] <= train_data_bad.shape[1]: return # more expensive cross-validation method # bw_estimation = 'cv_ls' # quick rule of thumb bw_estimation = 'normal_reference' bad_kde = sm.nonparametric.KDEMultivariate( data=train_data_bad, var_type=self.kde.kde_vartypes, bw=bw_estimation) good_kde = sm.nonparametric.KDEMultivariate( data=train_data_good, var_type=self.kde.kde_vartypes, bw=bw_estimation) bad_kde.bw = np.clip(bad_kde.bw, self.min_bandwidth, None) good_kde.bw = np.clip(good_kde.bw, self.min_bandwidth, None) self.kde.kde_models = { 'good': good_kde, 'bad': bad_kde, }