class TestGaussianProcessMCMC(unittest.TestCase): def setUp(self): self.X = np.random.randn(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) self.model = GaussianProcessMCMC(kernel, n_hypers=6, burnin_steps=100, chain_length=200) self.model.train(self.X, self.y, do_optimize=True) def test_predict(self): X_test = np.random.rand(10, 2) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] def test_loglikelihood(self): theta = np.array([0.2, 0.2, 0.001]) ll = self.model.loglikelihood(theta) def test_get_incumbent(self): inc, inc_val = self.model.get_incumbent() b = np.argmin(self.y) np.testing.assert_almost_equal(inc, self.X[b], decimal=5) assert inc_val == self.y[b]
def suggest_configuration(self): if self.X is None and self.y is None: new_x = init_random_uniform(self.lower, self.upper, n_points=1, rng=self.rng)[0, :] elif self.X.shape[0] == 1: # We need at least 2 data points to train a GP new_x = init_random_uniform(self.lower, self.upper, n_points=1, rng=self.rng)[0, :] else: cov_amp = 1 n_dims = self.lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=self.n_hypers, chain_length=self.chain_length, burnin_steps=self.burnin, normalize_input=False, normalize_output=True, rng=self.rng, lower=self.lower, upper=self.upper) a = LogEI(model) acquisition_func = MarginalizationGPMCMC(a) max_func = Direct(acquisition_func, self.lower, self.upper, verbose=False) model.train(self.X, self.y) acquisition_func.update(model) new_x = max_func.maximize() next_config = Configuration(self.config_space, vector=new_x) # Transform to sacred configuration result = configspace_config_to_sacred(next_config) return result
class TestGaussianProcessMCMC(unittest.TestCase): def setUp(self): self.X = np.random.randn(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=6, burnin_steps=100, chain_length=200) self.model.train(self.X, self.y, do_optimize=True) def test_predict(self): X_test = np.random.rand(10, 2) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] def test_loglikelihood(self): theta = np.array([0.2, 0.2, 0.001]) ll = self.model.loglikelihood(theta) def test_get_incumbent(self): inc, inc_val = self.model.get_incumbent() b = np.argmin(self.y) np.testing.assert_almost_equal(inc, self.X[b], decimal=5) assert inc_val == self.y[b]
def mtbo(objective_function, lower, upper, n_tasks=2, n_init=2, num_iterations=30, burnin=100, chain_length=200, rng=None): """ Interface to MTBO[1] which uses an auxiliary cheaper task to speed up the optimization of a more expensive but similar task. [1] Multi-Task Bayesian Optimization K. Swersky and J. Snoek and R. Adams Proceedings of the 27th International Conference on Advances in Neural Information Processing Systems (NIPS'13) Parameters ---------- objective_function: function Objective function that will be optimized lower: np.array(D,) Lower bound of the input space upper: np.array(D,) Upper bound of the input space n_tasks: int Number of task n_init: int Number of initial design points num_iterations: int Number of iterations chain_length : int The length of the MCMC chain for each walker. burnin : int The number of burnin steps before the actual MCMC sampling starts. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" assert lower.shape[0] == upper.shape[ 0], "Dimension miss match between upper and lower bound" time_start = time.time() if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) n_dims = lower.shape[0] # Bookkeeping time_func_eval = [] time_overhead = [] incumbents = [] runtime = [] X = [] y = [] c = [] # Define model for the objective function cov_amp = 1 # Covariance amplitude kernel = cov_amp # ARD Kernel for the configuration space for d in range(n_dims): kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01, ndim=n_dims + 1, dim=d) task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks) kernel *= task_kernel # Take 3 times more samples than we have hyperparameters n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 prior = MTBOPrior(len(kernel) + 1, n_ls=n_dims, n_kt=len(task_kernel), rng=rng) model_objective = GaussianProcessMCMC(kernel, prior=prior, burnin_steps=burnin, chain_length=chain_length, n_hypers=n_hypers, normalize_input=False, lower=lower, upper=upper, rng=rng) # Define model for the cost function cost_cov_amp = 1 cost_kernel = cost_cov_amp # ARD Kernel for the configuration space for d in range(n_dims): cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01, ndim=n_dims + 1, dim=d) cost_task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks) cost_kernel *= cost_task_kernel cost_prior = MTBOPrior(len(cost_kernel) + 1, n_ls=n_dims, n_kt=len(task_kernel), rng=rng) model_cost = GaussianProcessMCMC(cost_kernel, prior=cost_prior, burnin_steps=burnin, chain_length=chain_length, n_hypers=n_hypers, normalize_input=False, lower=lower, upper=upper, rng=rng) # Extend input space by task variable extend_lower = np.append(lower, 0) extend_upper = np.append(upper, n_tasks - 1) is_env = np.zeros(extend_lower.shape[0]) is_env[-1] = 1 # Define acquisition function and maximizer ig = InformationGainPerUnitCost(model_objective, model_cost, extend_lower, extend_upper, is_env_variable=is_env, n_representer=50) acquisition_func = MarginalizationGPMCMC(ig) wrapper_func = partial(transformation, acq=acquisition_func, lower=lower, upper=upper) maximizer = Direct(wrapper_func, extend_lower, extend_upper, verbose=True) # Initial Design for _ in range(n_init): logger.info("Initial design") start_time_overhead = time.time() # Draw random configuration and evaluate it just on the auxiliary task task = 0 x = init_random_uniform(lower, upper, 1, rng)[0] logger.info("Evaluate candidate %s", str(x)) st = time.time() func_val, cost = objective_function(x, task) time_func_eval.append(time.time() - st) logger.info("Configuration achieved a performance of %f with cost %f", func_val, cost) logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1]) # Bookkeeping config = np.append(x, task) X.append(config) y.append(func_val) c.append(cost) # Estimate incumbent as the best observed value so far best_idx = np.argmin(y) incumbents.append(np.append( X[best_idx], n_tasks - 1)) # Incumbent is always on the task of interest time_overhead.append(time.time() - start_time_overhead) runtime.append(time.time() - time_start) X = np.array(X) y = np.array(y) c = np.array(c) for it in range(n_init, num_iterations): logger.info("Start iteration %d ... ", it) start_time = time.time() # Train models model_objective.train(transform(X, lower, upper), y, do_optimize=True) model_cost.train(transform(X, lower, upper), c, do_optimize=True) # Estimate incumbent by projecting all observed points to the task of interest and # pick the point with the lowest mean prediction incumbent, incumbent_value = projected_incumbent_estimation( model_objective, transform(X, lower, upper)[:, :-1], proj_value=n_tasks - 1) incumbent[:-1] = normalization.zero_one_unnormalization( incumbent[:-1], lower, upper) incumbents.append(incumbent) logger.info("Current incumbent %s with estimated performance %f", str(incumbent), incumbent_value) # Maximize acquisition function acquisition_func.update(model_objective, model_cost) new_x = maximizer.maximize() new_x[-1] = np.rint( new_x[-1]) # Map float value to discrete task variable time_overhead.append(time.time() - start_time) logger.info("Optimization overhead was %f seconds", time_overhead[-1]) # Evaluate the chosen configuration logger.info("Evaluate candidate %s", str(new_x)) start_time = time.time() new_y, new_c = objective_function(new_x[:-1], new_x[-1]) time_func_eval.append(time.time() - start_time) logger.info("Configuration achieved a performance of %f with cost %f", new_y, new_c) logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1]) # Add new observation to the data X = np.concatenate((X, new_x[None, :]), axis=0) y = np.concatenate((y, np.array([new_y])), axis=0) c = np.concatenate((c, np.array([new_c])), axis=0) runtime.append(time.time() - time_start) # Estimate the final incumbent model_objective.train(transform(X, lower, upper), y) incumbent, incumbent_value = projected_incumbent_estimation( model_objective, transform(X, lower, upper)[:, :-1], proj_value=n_tasks - 1) incumbent[:-1] = normalization.zero_one_unnormalization( incumbent[:-1], lower, upper) incumbents.append(incumbent) logger.info("Final incumbent %s with estimated performance %f", str(incumbent), incumbent_value) results = dict() results["x_opt"] = incumbent[:-1] results["trajectory"] = [inc for inc in incumbents] results["runtime"] = runtime results["overhead"] = time_overhead results["time_func_eval"] = time_func_eval return results