예제 #1
0
    def setUp(self):
        self.configspace = CS.ConfigurationSpace(42)

        self.add_hyperparameters()

        x_train_confs = [
            self.configspace.sample_configuration()
            for i in range(self.n_train)
        ]
        self.x_train = np.array([c.get_array() for c in x_train_confs])

        x_test_confs = [
            self.configspace.sample_configuration() for i in range(self.n_test)
        ]
        self.x_test = np.array([c.get_array() for c in x_test_confs])

        self.sm_x_train = self.sm_transform_data(self.x_train)
        self.sm_x_test = self.sm_transform_data(self.x_test)

        self.sm_kde = sm.nonparametric.KDEMultivariate(data=self.sm_x_train,
                                                       var_type=self.var_types,
                                                       bw='cv_ml')
        self.hp_kde_full = MultivariateKDE(self.configspace,
                                           fully_dimensional=True,
                                           fix_boundary=False)
        self.hp_kde_factor = MultivariateKDE(self.configspace,
                                             fully_dimensional=False,
                                             fix_boundary=False)
        self.hp_kde_full.fit(self.x_train, bw_estimator='mlcv')
        self.hp_kde_factor.fit(self.x_train, bw_estimator='mlcv')
예제 #2
0
    def new_result(self, job, update_model=True):
        """
			function to register finished runs

			Every time a run has finished, this function should be called
			to register it with the result logger. If overwritten, make
			sure to call this method from the base class to ensure proper
			logging.


			Parameters:
			-----------
			job: hpbandster.distributed.dispatcher.Job object
				contains all the info about the run
		"""

        super().new_result(job)

        if job.result is None:
            # One could skip crashed results, but we decided
            # assign a +inf loss and count them as bad configurations
            loss = np.inf
        else:
            loss = job.result["loss"]

        budget = job.kwargs["budget"]

        if budget not in self.configs.keys():
            self.configs[budget] = []
            self.losses[budget] = []

        if len(self.configs.keys()) == 1:
            min_num_points = 6
        else:
            min_num_points = self.min_points_in_model

        # skip model building if we already have a bigger model
        if max(list(self.kde_models.keys()) + [-np.inf]) > budget:
            return

        # We want to get a numerical representation of the configuration in the original space

        conf = ConfigSpace.Configuration(
            self.configspace, job.kwargs["config"]).get_array().tolist()

        #import pdb; pdb.set_trace()

        if conf in self.configs[budget]:
            i = self.configs[budget].index(conf)
            self.losses[budget][i].append(loss)
            print('-' * 50)
            print('ran config %s with loss %f again' % (conf, loss))
        else:
            self.configs[budget].append(conf)
            self.losses[budget].append([loss])

        # skip model building:
        #		a) if not enough points are available
        if len(self.configs[budget]) < min_num_points:
            self.logger.debug(
                "Only %i run(s) for budget %f available, need more than %s -> can't build model!"
                % (len(self.configs[budget]), budget, min_num_points))
            return

        #		b) during warnm starting when we feed previous results in and only update once
        if not update_model:
            return

        if budget not in self.kde_models.keys():
            self.kde_models[budget] = {
                'good':
                MultivariateKDE(self.configspace,
                                min_bandwidth=self.min_bandwidth,
                                fully_dimensional=self.fully_dimensional),
                'bad':
                MultivariateKDE(self.configspace,
                                min_bandwidth=self.min_bandwidth,
                                fully_dimensional=self.fully_dimensional)
            }

        #import pdb; pdb.set_trace()
        num_configs = len(self.losses[budget])

        train_configs = np.array(self.configs[budget][-num_configs:])
        train_losses = np.array(
            list(map(np.mean, self.losses[budget][-num_configs:])))

        n_good = max(3, (num_configs * self.top_n_percent) // 100)
        n_bad = num_configs - n_good

        # Refit KDE for the current budget
        idx = np.argsort(train_losses)

        train_data_good = self.impute_conditional_data(
            train_configs[idx[:n_good]])
        train_data_bad = self.impute_conditional_data(
            train_configs[idx[n_good:n_good + n_bad + 1]])

        self.kde_models[budget]['bad'].fit(train_data_bad,
                                           bw_estimator=self.bw_estimator)
        self.kde_models[budget]['good'].fit(train_data_good,
                                            bw_estimator=self.bw_estimator)

        if self.bw_estimator in ['mlcv'] and n_good < 3:
            self.kde_models[budget]['good'].bandwidths[:] = self.kde_models[
                budget]['bad'].bandwidths

        print('=' * 50)
        print(self.kde_models[budget]['good'].bandwidths)
        #print('best:\n',self.kde_models[budget]['good'].data[0])
        print(self.kde_models[budget]['good'].data.mean(axis=0))
        print(self.kde_models[budget]['good'].data.std(axis=0))
        print((train_losses[idx])[:n_good])

        print(self.kde_models[budget]['bad'].bandwidths)

        # update probs for the categorical parameters for later sampling
        self.logger.debug(
            'done building a new model for budget %f based on %i/%i split\nBest loss for this budget:%f\n\n\n\n\n'
            % (budget, n_good, n_bad, np.min(train_losses)))