예제 #1
0
    def setUp(self):
        self.configspace = CS.ConfigurationSpace(42)

        self.add_hyperparameters()

        x_train_confs = [
            self.configspace.sample_configuration()
            for i in range(self.n_train)
        ]
        self.x_train = np.array([c.get_array() for c in x_train_confs])

        x_test_confs = [
            self.configspace.sample_configuration() for i in range(self.n_test)
        ]
        self.x_test = np.array([c.get_array() for c in x_test_confs])

        self.sm_x_train = self.sm_transform_data(self.x_train)
        self.sm_x_test = self.sm_transform_data(self.x_test)

        self.sm_kde = sm.nonparametric.KDEMultivariate(data=self.sm_x_train,
                                                       var_type=self.var_types,
                                                       bw='cv_ml')
        self.hp_kde_full = MultivariateKDE(self.configspace,
                                           fully_dimensional=True,
                                           fix_boundary=False)
        self.hp_kde_factor = MultivariateKDE(self.configspace,
                                             fully_dimensional=False,
                                             fix_boundary=False)
        self.hp_kde_full.fit(self.x_train, bw_estimator='mlcv')
        self.hp_kde_factor.fit(self.x_train, bw_estimator='mlcv')
예제 #2
0
    def new_result(self, job, update_model=True):
        """
			function to register finished runs

			Every time a run has finished, this function should be called
			to register it with the result logger. If overwritten, make
			sure to call this method from the base class to ensure proper
			logging.


			Parameters:
			-----------
			job: hpbandster.distributed.dispatcher.Job object
				contains all the info about the run
		"""

        super().new_result(job)

        if job.result is None:
            # One could skip crashed results, but we decided
            # assign a +inf loss and count them as bad configurations
            loss = np.inf
        else:
            loss = job.result["loss"]

        budget = job.kwargs["budget"]

        if budget not in self.configs.keys():
            self.configs[budget] = []
            self.losses[budget] = []

        if len(self.configs.keys()) == 1:
            min_num_points = 6
        else:
            min_num_points = self.min_points_in_model

        # skip model building if we already have a bigger model
        if max(list(self.kde_models.keys()) + [-np.inf]) > budget:
            return

        # We want to get a numerical representation of the configuration in the original space

        conf = ConfigSpace.Configuration(
            self.configspace, job.kwargs["config"]).get_array().tolist()

        #import pdb; pdb.set_trace()

        if conf in self.configs[budget]:
            i = self.configs[budget].index(conf)
            self.losses[budget][i].append(loss)
            print('-' * 50)
            print('ran config %s with loss %f again' % (conf, loss))
        else:
            self.configs[budget].append(conf)
            self.losses[budget].append([loss])

        # skip model building:
        #		a) if not enough points are available
        if len(self.configs[budget]) < min_num_points:
            self.logger.debug(
                "Only %i run(s) for budget %f available, need more than %s -> can't build model!"
                % (len(self.configs[budget]), budget, min_num_points))
            return

        #		b) during warnm starting when we feed previous results in and only update once
        if not update_model:
            return

        if budget not in self.kde_models.keys():
            self.kde_models[budget] = {
                'good':
                MultivariateKDE(self.configspace,
                                min_bandwidth=self.min_bandwidth,
                                fully_dimensional=self.fully_dimensional),
                'bad':
                MultivariateKDE(self.configspace,
                                min_bandwidth=self.min_bandwidth,
                                fully_dimensional=self.fully_dimensional)
            }

        #import pdb; pdb.set_trace()
        num_configs = len(self.losses[budget])

        train_configs = np.array(self.configs[budget][-num_configs:])
        train_losses = np.array(
            list(map(np.mean, self.losses[budget][-num_configs:])))

        n_good = max(3, (num_configs * self.top_n_percent) // 100)
        n_bad = num_configs - n_good

        # Refit KDE for the current budget
        idx = np.argsort(train_losses)

        train_data_good = self.impute_conditional_data(
            train_configs[idx[:n_good]])
        train_data_bad = self.impute_conditional_data(
            train_configs[idx[n_good:n_good + n_bad + 1]])

        self.kde_models[budget]['bad'].fit(train_data_bad,
                                           bw_estimator=self.bw_estimator)
        self.kde_models[budget]['good'].fit(train_data_good,
                                            bw_estimator=self.bw_estimator)

        if self.bw_estimator in ['mlcv'] and n_good < 3:
            self.kde_models[budget]['good'].bandwidths[:] = self.kde_models[
                budget]['bad'].bandwidths

        print('=' * 50)
        print(self.kde_models[budget]['good'].bandwidths)
        #print('best:\n',self.kde_models[budget]['good'].data[0])
        print(self.kde_models[budget]['good'].data.mean(axis=0))
        print(self.kde_models[budget]['good'].data.std(axis=0))
        print((train_losses[idx])[:n_good])

        print(self.kde_models[budget]['bad'].bandwidths)

        # update probs for the categorical parameters for later sampling
        self.logger.debug(
            'done building a new model for budget %f based on %i/%i split\nBest loss for this budget:%f\n\n\n\n\n'
            % (budget, n_good, n_bad, np.min(train_losses)))
예제 #3
0
class BaseNdTest(object):
	n_train = 128
	n_test = 512
	def setUp(self):
		self.configspace = CS.ConfigurationSpace(42)
		
		self.add_hyperparameters()

		x_train_confs = [ self.configspace.sample_configuration() for i in range(self.n_train)]
		self.x_train = np.array(	[c.get_array() for c in x_train_confs])	

		x_test_confs = [ self.configspace.sample_configuration() for i in range(self.n_test)]
		self.x_test= np.array(	[c.get_array() for c in x_test_confs])	
		
		self.sm_x_train = self.sm_transform_data(self.x_train)
		self.sm_x_test = self.sm_transform_data(self.x_test)
	
		self.sm_kde = sm.nonparametric.KDEMultivariate(data=self.sm_x_train,  var_type=self.var_types, bw='cv_ml')
		
		
		self.sm_1d_kdes = [sm.nonparametric.KDEMultivariate(data=self.sm_x_train[:,i],  var_type=self.var_types[i], bw='cv_ml') for i in range(len(self.var_types))]
		
		
		self.hp_kde_full = MultivariateKDE(self.configspace, fully_dimensional=True, fix_boundary=False)
		self.hp_kde_factor = MultivariateKDE(self.configspace, fully_dimensional=False, fix_boundary=False)
		self.hp_kde_full.fit(self.x_train,  bw_estimator='mlcv')
		self.hp_kde_factor.fit(self.x_train,  bw_estimator='mlcv')

	def sm_transform_data(self, data):
		return(data)
	
	def tearDown(self):
		self.configspace = None
		self.x_train = None
		self.x_test = None
		self.sm_kde = None
		self.sm_1d_kdes = None
		self.hp_kde_full = None
		self.hp_kde_factor = None

	@unittest.skipIf(rapid_development, "test skipped to accelerate developing new tests")
	def test_bandwidths_estimation(self):
		# This test sometimes fails, as statsmodels uses a different optimizer with a larger tolerance
		
		for d in range(len(self.var_types)):
			self.assertAlmostEqual(self.sm_kde.bw[d], self.hp_kde_full.bandwidths[d], delta=5e-2)
			self.assertAlmostEqual(self.sm_1d_kdes[d].bw[0], self.hp_kde_factor.bandwidths[d], delta=5e-2)


	@unittest.skipIf(rapid_development, "test skipped to accelerate developing new tests")
	def test_pdfs(self):
		for bw in np.logspace(-0.5,-0.1,5):
			self.sm_kde.bw = np.array([bw]*len(self.var_types))
			self.hp_kde_full.set_bandwidths(np.array([bw]*len(self.var_types)))
			self.hp_kde_factor.set_bandwidths(np.array([bw]*len(self.var_types)))

			p1 = self.sm_kde.pdf(self.sm_x_test)
			p2 = self.hp_kde_full.pdf(self.x_test)
			p3 = self.hp_kde_factor.pdf(self.x_test)
			
			p4_tmp = []
			for i, kde in enumerate(self.sm_1d_kdes):
				kde.bw = np.array([bw])
				p4_tmp.append(kde.pdf(self.sm_x_test[:,i]))

			
			p4_tmp = np.array(p4_tmp)
			p4 = np.array(p4_tmp).prod(axis=0)
			
			self.assertTrue(np.allclose(p1, p2))
			self.assertTrue(np.allclose(p3, p4))

	@unittest.skipIf(rapid_development, "test skipped to accelerate developing new tests")
	def test_loo_likelihood(self):
		for bw in np.logspace(-1,-0.1,5):
			self.sm_kde.bw = np.array([bw]*len(self.var_types))
			self.hp_kde_full.set_bandwidths(np.array([bw]*len(self.var_types)))
			self.hp_kde_factor.set_bandwidths(np.array([bw]*len(self.var_types)))
			
			sm_full_ll = self.sm_kde.loo_likelihood(bw=np.array([bw]*len(self.var_types)), func=np.log)
			hp_full_ll =  self.hp_kde_full.loo_negloglikelihood()
			hp_factor_ll =  self.hp_kde_factor.loo_negloglikelihood()

			sm_factor_ll = []
			for i, kde in enumerate(self.sm_1d_kdes):
				kde.bw = np.array([bw])
				sm_factor_ll.append(kde.loo_likelihood(bw=np.array([bw]), func=np.log))

			
			sm_factor_ll = np.array(sm_factor_ll)
			n = self.x_train.shape[0]
			delta = 1e-2 * np.abs((sm_full_ll + hp_full_ll)/2)
			# note: statsmodels' ll is not normalized, so we have to transform our result to get the same number!
			self.assertAlmostEqual(sm_full_ll, n*(hp_full_ll - np.log(n-1)), delta=delta)
			# same here, but it is easier to apply the normalization to the SM KDE's likelihoods
			delta = 1e-2 * np.abs(hp_factor_ll)
			self.assertAlmostEqual(np.sum((sm_factor_ll/n) + np.log(n-1)), hp_factor_ll , delta=delta)
예제 #4
0
class Base1dTest(object):
	n_train = 128
	n_test = 1024
	def setUp(self):
		self.configspace = CS.ConfigurationSpace(42)
		
		self.add_hyperparameters()

		x_train_confs = [ self.configspace.sample_configuration() for i in range(self.n_train)]
		self.x_train = np.array(	[c.get_array() for c in x_train_confs])	

		x_test_confs = [ self.configspace.sample_configuration() for i in range(self.n_test)]
		self.x_test= np.array(	[c.get_array() for c in x_test_confs])	
		
		self.sm_x_train = self.sm_transform_data(self.x_train)
		self.sm_x_test = self.sm_transform_data(self.x_test)
	
		self.sm_kde = sm.nonparametric.KDEMultivariate(data=self.sm_x_train,  var_type=self.var_types, bw='cv_ml')
		self.hp_kde_full = MultivariateKDE(self.configspace, fully_dimensional=True, fix_boundary=False)
		self.hp_kde_factor = MultivariateKDE(self.configspace, fully_dimensional=False, fix_boundary=False)
		self.hp_kde_full.fit(self.x_train,  bw_estimator='mlcv')
		self.hp_kde_factor.fit(self.x_train,  bw_estimator='mlcv')

	def sm_transform_data(self, data):
		return(data)
	
	def tearDown(self):
		self.configspace = None
		self.x_train = None
		self.x_test = None
		self.sm_kde = None
		self.hp_kde_full = None
		self.hp_kde_factor = None

	@unittest.skipIf(rapid_development, "test skipped to accelerate developing new tests")
	def test_bandwidths_estimation(self):
		# This test sometimes fails, as statsmodels uses a different optimizer with a larger tolerance
		self.assertAlmostEqual(self.sm_kde.bw[0], self.hp_kde_full.bandwidths[0], delta=2e-3)
		self.assertAlmostEqual(self.sm_kde.bw[0], self.hp_kde_factor.bandwidths[0], delta=2e-3)

	@unittest.skipIf(rapid_development, "test skipped to accelerate developing new tests")
	def test_pdfs(self):
		for bw in np.logspace(-0.5,-0.1,5):
			self.sm_kde.bw = np.array([bw])
			self.hp_kde_full.set_bandwidths(np.array([bw]))
			self.hp_kde_factor.set_bandwidths(np.array([bw]))

			p1 = self.sm_kde.pdf(self.sm_x_test)
			p2 = self.hp_kde_full.pdf(self.x_test)
			p3 = self.hp_kde_factor.pdf(self.x_test)

			self.assertTrue(np.allclose(p1, p2))
			self.assertTrue(np.allclose(p1, p3))

	@unittest.skipIf(rapid_development, "test skipped to accelerate developing new tests")
	def test_loo_likelihood(self):
		for bw in np.logspace(-1,-0.1,5):
			self.sm_kde.bw = np.array([bw])
			self.hp_kde_full.set_bandwidths(np.array([bw]))
			self.hp_kde_factor.set_bandwidths(np.array([bw]))
			
			sm_ll = self.sm_kde.loo_likelihood(bw=np.array([bw]), func=np.log)
			hp_full_ll =  self.hp_kde_full.loo_negloglikelihood()
			hp_factor_ll =  self.hp_kde_factor.loo_negloglikelihood()
			
			n = self.x_train.shape[0]
			delta = 1e-3 * np.abs((sm_ll + hp_full_ll)/2)
			# note: statsmodels' ll is not normalized, so we have to transform our result to get the same number!
			self.assertAlmostEqual(sm_ll, n*(hp_full_ll - np.log(n-1)), delta=delta)
			self.assertAlmostEqual(sm_ll, n*(hp_factor_ll - np.log(n-1)), delta=delta)