def setUp(self): self.configspace = CS.ConfigurationSpace(42) self.add_hyperparameters() x_train_confs = [ self.configspace.sample_configuration() for i in range(self.n_train) ] self.x_train = np.array([c.get_array() for c in x_train_confs]) x_test_confs = [ self.configspace.sample_configuration() for i in range(self.n_test) ] self.x_test = np.array([c.get_array() for c in x_test_confs]) self.sm_x_train = self.sm_transform_data(self.x_train) self.sm_x_test = self.sm_transform_data(self.x_test) self.sm_kde = sm.nonparametric.KDEMultivariate(data=self.sm_x_train, var_type=self.var_types, bw='cv_ml') self.hp_kde_full = MultivariateKDE(self.configspace, fully_dimensional=True, fix_boundary=False) self.hp_kde_factor = MultivariateKDE(self.configspace, fully_dimensional=False, fix_boundary=False) self.hp_kde_full.fit(self.x_train, bw_estimator='mlcv') self.hp_kde_factor.fit(self.x_train, bw_estimator='mlcv')
def new_result(self, job, update_model=True): """ function to register finished runs Every time a run has finished, this function should be called to register it with the result logger. If overwritten, make sure to call this method from the base class to ensure proper logging. Parameters: ----------- job: hpbandster.distributed.dispatcher.Job object contains all the info about the run """ super().new_result(job) if job.result is None: # One could skip crashed results, but we decided # assign a +inf loss and count them as bad configurations loss = np.inf else: loss = job.result["loss"] budget = job.kwargs["budget"] if budget not in self.configs.keys(): self.configs[budget] = [] self.losses[budget] = [] if len(self.configs.keys()) == 1: min_num_points = 6 else: min_num_points = self.min_points_in_model # skip model building if we already have a bigger model if max(list(self.kde_models.keys()) + [-np.inf]) > budget: return # We want to get a numerical representation of the configuration in the original space conf = ConfigSpace.Configuration( self.configspace, job.kwargs["config"]).get_array().tolist() #import pdb; pdb.set_trace() if conf in self.configs[budget]: i = self.configs[budget].index(conf) self.losses[budget][i].append(loss) print('-' * 50) print('ran config %s with loss %f again' % (conf, loss)) else: self.configs[budget].append(conf) self.losses[budget].append([loss]) # skip model building: # a) if not enough points are available if len(self.configs[budget]) < min_num_points: self.logger.debug( "Only %i run(s) for budget %f available, need more than %s -> can't build model!" % (len(self.configs[budget]), budget, min_num_points)) return # b) during warnm starting when we feed previous results in and only update once if not update_model: return if budget not in self.kde_models.keys(): self.kde_models[budget] = { 'good': MultivariateKDE(self.configspace, min_bandwidth=self.min_bandwidth, fully_dimensional=self.fully_dimensional), 'bad': MultivariateKDE(self.configspace, min_bandwidth=self.min_bandwidth, fully_dimensional=self.fully_dimensional) } #import pdb; pdb.set_trace() num_configs = len(self.losses[budget]) train_configs = np.array(self.configs[budget][-num_configs:]) train_losses = np.array( list(map(np.mean, self.losses[budget][-num_configs:]))) n_good = max(3, (num_configs * self.top_n_percent) // 100) n_bad = num_configs - n_good # Refit KDE for the current budget idx = np.argsort(train_losses) train_data_good = self.impute_conditional_data( train_configs[idx[:n_good]]) train_data_bad = self.impute_conditional_data( train_configs[idx[n_good:n_good + n_bad + 1]]) self.kde_models[budget]['bad'].fit(train_data_bad, bw_estimator=self.bw_estimator) self.kde_models[budget]['good'].fit(train_data_good, bw_estimator=self.bw_estimator) if self.bw_estimator in ['mlcv'] and n_good < 3: self.kde_models[budget]['good'].bandwidths[:] = self.kde_models[ budget]['bad'].bandwidths print('=' * 50) print(self.kde_models[budget]['good'].bandwidths) #print('best:\n',self.kde_models[budget]['good'].data[0]) print(self.kde_models[budget]['good'].data.mean(axis=0)) print(self.kde_models[budget]['good'].data.std(axis=0)) print((train_losses[idx])[:n_good]) print(self.kde_models[budget]['bad'].bandwidths) # update probs for the categorical parameters for later sampling self.logger.debug( 'done building a new model for budget %f based on %i/%i split\nBest loss for this budget:%f\n\n\n\n\n' % (budget, n_good, n_bad, np.min(train_losses)))