def test_cmaes(self): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X[:, 0])[:, np.newaxis] kernel = george.kernels.Matern52Kernel(np.ones([1, 1]), ndim=2) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = PosteriorMeanOptimization(model, X_lower, X_upper, method="cmaes") startpoints = init_random_uniform(X_lower, X_upper, 10) inc, inc_val = rec.estimate_incumbent(startpoints) # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])])
class WithinModelComparison(BaseTask): def __init__(self, seed=42): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) rng = np.random.RandomState(seed) cov_amp = 1.0 mat_kernel = george.kernels.Matern52Kernel(np.ones([2]) * 0.1, ndim=2) kernel = cov_amp * mat_kernel self.xstar = rng.rand(1000, 2) K = kernel.value(self.xstar) L = sla.cholesky(K) sigma = rng.randn(1000) self.f = np.dot(L, sigma) self.gp = GaussianProcess(kernel, yerr=0.0) self.gp.train(self.xstar, self.f[:, np.newaxis], do_optimize=False) self.gp.train(self.xstar, self.f[:, np.newaxis], do_optimize=False) best = np.argmin(self.f) fopt = self.f[best] opt = self.xstar[best] super(WithinModelComparison, self).__init__(X_lower, X_upper, opt, fopt) def objective_function(self, x): noise = 1e-3 * np.random.randn() mu, _ = self.gp.predict(x) return mu + noise def evaluate_test(self, x): return self.objective_function(x)
def test_cmaes(self): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X[:, 0])[:, np.newaxis] kernel = george.kernels.Matern52Kernel(np.ones([1, 1]), ndim=2) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper, method="cmaes") startpoints = init_random_uniform(X_lower, X_upper, 10) inc, inc_val = rec.estimate_incumbent(startpoints) # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])])
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = BestObservation(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 10) inc, inc_val = rec.estimate_incumbent(startpoints) # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])])
def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) self.acquisition_func = InformationGain(self.model, np.zeros([2]), np.ones([2])) self.acquisition_func.update(self.model)
def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcess(kernel, prior=prior) self.model.train(self.X, self.y, do_optimize=False)
class Test(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.c = np.exp(self.X[:, 1]) self.n_dims = 2 self.lower = np.zeros(self.n_dims) self.upper = np.ones(self.n_dims) self.is_env = np.array([0, 1]) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) kernel = 3000 * kernel prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) cost_kernel = 3000 * cost_kernel prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) model.train(self.X, self.y, do_optimize=False) cost_model.train(self.X, self.c, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost(model, cost_model, self.lower, self.upper, self.is_env) self.acquisition_func.update(model, cost_model) def test_sampling_representer_points(self): # Check if representer points are inside the configuration subspace assert np.any(self.acquisition_func.zb[:, self.is_env == 1] == self.acquisition_func.upper[self.is_env == 1]) def test_compute(self): X_test = np.random.rand(5, 2) a = self.acquisition_func.compute(X_test, derivative=False) assert a.shape[0] == X_test.shape[0] assert len(a.shape) == 1
def setUp(self): self.task = TestTask() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel( np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) cost_noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) cost_kernel = 3000 * (cost_kernel + cost_noise_kernel) prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y, C = self.task.evaluate(X) model.train(X, Y, do_optimize=False) cost_model.train(X, C, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost( model, cost_model, self.task.X_lower, self.task.X_upper, self.task.is_env) self.acquisition_func.update(model, cost_model)
class Test(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.c = np.exp(self.X[:, 1]) self.n_dims = 2 self.lower = np.zeros(self.n_dims) self.upper = np.ones(self.n_dims) self.is_env = np.array([0, 1]) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) kernel = 3000 * kernel prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) cost_kernel = 3000 * cost_kernel prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) model.train(self.X, self.y, do_optimize=False) cost_model.train(self.X, self.c, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost( model, cost_model, self.lower, self.upper, self.is_env) self.acquisition_func.update(model, cost_model) def test_sampling_representer_points(self): # Check if representer points are inside the configuration subspace assert np.any(self.acquisition_func.zb[:, self.is_env == 1] == self.acquisition_func.upper[self.is_env == 1]) def test_compute(self): X_test = np.random.rand(5, 2) a = self.acquisition_func.compute(X_test, derivative=False) assert a.shape[0] == X_test.shape[0] assert len(a.shape) == 1
def setUp(self): lower = np.zeros([1]) upper = np.ones([1]) kernel = george.kernels.Matern52Kernel(np.array([1]), dim=1, ndim=1) model = GaussianProcess(kernel) lcb = LCB(model) maximizer = RandomSampling(lcb, lower, upper) self.solver = BayesianOptimization(objective_func, lower, upper, lcb, model, maximizer)
def benchmark_function( function, seed, n_eval=20, n_initial_points=5, model_class=None, model_kwargs=None, ): lower = np.array([-10]) upper = np.array([10]) rng1 = np.random.RandomState(seed) rng2 = np.random.RandomState(seed) cov_amp = 2 n_dims = lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) if model_class is None: model = GaussianProcess( kernel, prior=prior, rng=rng1, normalize_output=True, normalize_input=True, lower=lower, upper=upper, noise=1e-3, ) else: model = model_class(rng=rng1, **model_kwargs) acq = LogEI(model) max_func = SciPyOptimizer(acq, lower, upper, n_restarts=50, rng=rng2) bo = BayesianOptimization( objective_func=function, lower=np.array([-10]), upper=np.array([10]), acquisition_func=acq, model=model, initial_points=n_initial_points, initial_design=init_latin_hypercube_sampling, rng=rng2, maximize_func=max_func ) bo.run(n_eval) rval = np.minimum.accumulate(bo.y) return rval
def setUp(self): self.task = SinFunction() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y = self.task.evaluate(X) model.train(X, Y, do_optimize=False) self.acquisition_func = InformationGainMC(model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) self.acquisition_func.update(model)
def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcess(self.kernel, prior=prior, normalize_input=False, normalize_output=False) self.model.train(self.X, self.y, do_optimize=False)
def setUp(self): self.task = TestTask() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) cost_noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) cost_kernel = 3000 * (cost_kernel + cost_noise_kernel) prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y, C = self.task.evaluate(X) model.train(X, Y, do_optimize=False) cost_model.train(X, C, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost(model, cost_model, self.task.X_lower, self.task.X_upper, self.task.is_env) self.acquisition_func.update(model, cost_model)
def __init__(self, seed=42): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) rng = np.random.RandomState(seed) cov_amp = 1.0 mat_kernel = george.kernels.Matern52Kernel(np.ones([2]) * 0.1, ndim=2) kernel = cov_amp * mat_kernel self.xstar = rng.rand(1000, 2) K = kernel.value(self.xstar) L = sla.cholesky(K) sigma = rng.randn(1000) self.f = np.dot(L, sigma) self.gp = GaussianProcess(kernel, yerr=0.0) self.gp.train(self.xstar, self.f[:, np.newaxis], do_optimize=False) self.gp.train(self.xstar, self.f[:, np.newaxis], do_optimize=False) best = np.argmin(self.f) fopt = self.f[best] opt = self.xstar[best] super(WithinModelComparison, self).__init__(X_lower, X_upper, opt, fopt)
def test_json_base_solver(self): task = Levy() kernel = george.kernels.Matern52Kernel([1.0], ndim=1) model = GaussianProcess(kernel) ei = EI(model, task.X_lower, task.X_upper) maximizer = Direct(ei, task.X_lower, task.X_upper) solver = BayesianOptimization(acquisition_func=ei, model=model, maximize_func=maximizer, task=task) solver.run(1, X=None, Y=None) iteration = 0 data = solver.get_json_data(it=iteration) assert data['iteration'] == iteration
def test(self): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) is_env = np.array([0, 1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X)[:, None, 0] kernel = george.kernels.Matern52Kernel(np.ones([2]), ndim=2) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = BestProjectedObservation(model, X_lower, X_upper, is_env) inc, inc_val = rec.estimate_incumbent() # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])]) # Check if incumbent is the correct point b = np.argmin(Y) x_best = X[b, None, :] assert np.all(inc[:, is_env==0] == x_best[:, is_env==0])
def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.c = np.exp(self.X[:, 1]) self.n_dims = 2 self.lower = np.zeros(self.n_dims) self.upper = np.ones(self.n_dims) self.is_env = np.array([0, 1]) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) kernel = 3000 * kernel prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) cost_kernel = 3000 * cost_kernel prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) model.train(self.X, self.y, do_optimize=False) cost_model.train(self.X, self.c, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost( model, cost_model, self.lower, self.upper, self.is_env) self.acquisition_func.update(model, cost_model)
def test(self): X_lower = np.array([0, 0]) X_upper = np.array([1, 1]) is_env = np.array([0, 1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X)[:, None, 0] kernel = george.kernels.Matern52Kernel(np.ones([2]), ndim=2) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = BestProjectedObservation(model, X_lower, X_upper, is_env) inc, inc_val = rec.estimate_incumbent() # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any( [np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any( [np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])]) # Check if incumbent is the correct point b = np.argmin(Y) x_best = X[b, None, :] assert np.all(inc[:, is_env == 0] == x_best[:, is_env == 0])
def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.c = np.exp(self.X[:, 1]) self.n_dims = 2 self.lower = np.zeros(self.n_dims) self.upper = np.ones(self.n_dims) self.is_env = np.array([0, 1]) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) kernel = 3000 * kernel prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) cost_kernel = george.kernels.Matern52Kernel(np.ones([self.n_dims]) * 0.01, ndim=self.n_dims) cost_kernel = 3000 * cost_kernel prior = default_priors.TophatPrior(-2, 2) cost_model = GaussianProcess(cost_kernel, prior=prior) model.train(self.X, self.y, do_optimize=False) cost_model.train(self.X, self.c, do_optimize=False) self.acquisition_func = InformationGainPerUnitCost(model, cost_model, self.lower, self.upper, self.is_env) self.acquisition_func.update(model, cost_model)
Created on Mar 16, 2016 @author: Aaron Klein ''' import george from robo.maximizers.direct import Direct from robo.models.gaussian_process import GaussianProcess from robo.task.synthetic_functions.levy import Levy from robo.acquisition.ei import EI from robo.solver.bayesian_optimization import BayesianOptimization task = Levy() kernel = george.kernels.Matern52Kernel([1.0], ndim=1) model = GaussianProcess(kernel) ei = EI(model, task.X_lower, task.X_upper) maximizer = Direct(ei, task.X_lower, task.X_upper) bo = BayesianOptimization(acquisition_func=ei, model=model, maximize_func=maximizer, task=task) print bo.run(10)
class TestGaussianProcess(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcess(self.kernel, prior=prior, normalize_input=False, normalize_output=False) self.model.train(self.X, self.y, do_optimize=False) def test_predict(self): X_test = np.random.rand(10, 2) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] m, v = self.model.predict(X_test, full_cov=True) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 2 assert v.shape[0] == X_test.shape[0] assert v.shape[1] == X_test.shape[0] K_zz = self.kernel.value(X_test) K_zx = self.kernel.value(X_test, self.X) K_nz = self.kernel.value( self.X) + self.model.noise * np.eye(self.X.shape[0]) inv = spla.inv(K_nz) K_zz_x = K_zz - np.dot(K_zx, np.inner(inv, K_zx)) assert np.mean((K_zz_x - v)**2) < 10e-5 def test_sample_function(self): X_test = np.random.rand(8, 2) n_funcs = 3 funcs = self.model.sample_functions(X_test, n_funcs=n_funcs) assert len(funcs.shape) == 2 assert funcs.shape[0] == n_funcs assert funcs.shape[1] == X_test.shape[0] def test_predict_variance(self): x_test1 = np.random.rand(1, 2) x_test2 = np.random.rand(10, 2) var = self.model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == x_test1.shape[0] def test_nll(self): theta = np.array([0.2, 0.2, 0.001]) nll = self.model.nll(theta) def test_optimize(self): theta = self.model.optimize() # Hyperparameters are 2 length scales + noise assert theta.shape[0] == 3 def test_get_incumbent(self): inc, inc_val = self.model.get_incumbent() b = np.argmin(self.y) np.testing.assert_almost_equal(inc, self.X[b], decimal=5) assert inc_val == self.y[b]
def train(self, X, Y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and Y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. Y: np.ndarray (N, 1) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ self.X = X # For EnvES we transform s to (1 - s)^2 if self.basis_func is not None: self.X = deepcopy(X) self.X[:, self.dim] = self.basis_func(self.X[:, self.dim]) self.Y = Y # Use the mean of the data as mean for the GP mean = np.mean(Y, axis=0) self.gp = george.GP(self.kernel, mean=mean) # Precompute the covariance yerr = 1e-25 while(True): try: self.gp.compute(self.X, yerr=yerr) break except np.linalg.LinAlgError: yerr *= 10 logging.error( "Cholesky decomposition for the covariance matrix of \ the GP failed. \ Add %s noise on the diagonal." % yerr) if do_optimize: # We have one walker for each hyperparameter configuration self.sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars), self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = self.sampler.run_mcmc(self.p0, self.burnin_steps) self.burned = True # Start sampling pos, _, _ = self.sampler.run_mcmc(self.p0, self.chain_length) # Save the current position, it will be the startpoint in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = self.sampler.chain[:, -1] self.models = [] logging.info("Hypers: %s" % self.hypers) for sample in self.hypers: # Instantiate a model for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample) model = GaussianProcess(kernel, basis_func=self.basis_func, dim=self.dim) model.train(self.X, self.Y, do_optimize=False) self.models.append(model) else: self.hypers = self.gp.kernel[:]
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) x_test = init_random_uniform(X_lower, X_upper, 3) # Shape matching predict m, v = model.predict(x_test) assert len(m.shape) == 2 assert m.shape[0] == x_test.shape[0] assert m.shape[1] == 1 assert len(v.shape) == 2 assert v.shape[0] == x_test.shape[0] assert v.shape[1] == x_test.shape[0] #TODO: check gradients # Shape matching function sampling x_ = np.linspace(X_lower, X_upper, 10) x_ = x_[:, np.newaxis] funcs = model.sample_functions(x_, n_funcs=2) assert len(funcs.shape) == 2 assert funcs.shape[0] == 2 assert funcs.shape[1] == x_.shape[0] # Shape matching predict variance x_test1 = np.array([np.random.rand(1)]) x_test2 = np.random.rand(10)[:, np.newaxis] var = model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == 1 # Check compatibility with all acquisition functions acq_func = EI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = PI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = InformationGain(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) # Check compatibility with all incumbent estimation methods rec = BestObservation(model, X_lower, X_upper) inc, inc_val = rec.estimate_incumbent(None) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1
def bayesian_optimization(objective_function, lower, upper, num_iterations=30, X_init=None, Y_init=None, maximizer="random", acquisition_func="log_ei", model_type="gp_mcmc", n_init=3, rng=None, output_path=None, kernel=None, sampling_method="origin", distance="cosine", replacement=True, pool=None, best=None): """ General interface for Bayesian optimization for global black box optimization problems. Parameters ---------- objective_function: function The objective function that is minimized. This function gets a numpy array (D,) as input and returns the function value (scalar) lower: np.ndarray (D,) The lower bound of the search space upper: np.ndarray (D,) The upper bound of the search space num_iterations: int The number of iterations (initial design + BO) X_init: np.ndarray(N,D) Initial points to warmstart BO Y_init: np.ndarray(N,1) Function values of the already initial points maximizer: {"random", "scipy", "differential_evolution"} The optimizer for the acquisition function. acquisition_func: {"ei", "log_ei", "lcb", "pi"} The acquisition function model_type: {"gp", "gp_mcmc", "rf", "bohamiann", "dngo"} The model for the objective function. n_init: int Number of points for the initial design. Make sure that it is <= num_iterations. output_path: string Specifies the path where the intermediate output after each iteration will be saved. If None no output will be saved to disk. rng: numpy.random.RandomState Random number generator kernel: george.kernels.ConstantKernel {"constant", "polynomial", "linear", "dotproduct", "exp", "expsquared", "matern32", "matern52", "rationalquadratic", "cosine", "expsine2", "heuristic"} Specify the kernel for Gaussian process. sampling_method: {"origin", "approx", "exact"} Specify the method to choose next sample to update model. approx: choose the sample in the candidate pool that is closest (measured by distance arg) to the one returned from maximizing acquisition function. exact: evaluate all samples in the candidate pool on acquisition function and choose the one with maximum output. distance: {"cosine", "euclidean"} The distance measurement for approximation sampling. replacement: boolean Whether to sample from pool with replacement. pool: np.ndarray(N,D) Candidate pool containing possible x best: float Stop training when the best point is sampled. Returns ------- dict with all results """ assert upper.shape[0] == lower.shape[0], "Dimension miss match" assert np.all(lower < upper), "Lower bound >= upper bound" assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) cov_amp = 2 #n_dims = lower.shape[0] #initial_ls = np.ones([n_dims]) # if kernel == "constant": # exp_kernel = george.kernels.ConstantKernel(1, ndim=n_dims) # elif kernel == "polynomial": # exp_kernel = george.kernels.PolynomialKernel(log_sigma2=1, order=3, ndim=n_dims) # elif kernel == "linear": # exp_kernel = george.kernels.LinearKernel(log_gamma2=1, order=3, ndim=n_dims) # elif kernel == "dotproduct": # exp_kernel = george.kernels.DotProductKernel(ndim=n_dims) # elif kernel == "exp": # exp_kernel = george.kernels.ExpKernel(initial_ls, ndim=n_dims) # elif kernel == "expsquared": # exp_kernel = george.kernels.ExpSquaredKernel(initial_ls, ndim=n_dims) # elif kernel == "matern32": # exp_kernel = george.kernels.Matern32Kernel(initial_ls, ndim=n_dims) # elif kernel == "matern52": # exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) # elif kernel == "rationalquadratic": # exp_kernel = george.kernels.RationalQuadraticKernel(log_alpha=1, metric=initial_ls, ndim=n_dims) # elif kernel == "cosine": # exp_kernel = george.kernels.CosineKernel(4, ndim=n_dims) # elif kernel == "expsine2": # exp_kernel = george.kerngels.ExpSine2Kernel(1, 2, ndim=n_dims) # elif kernel == "heuristic": # exp_kernel = george.kernels.PythonKernel(heuristic_kernel_function, ndim=n_dims) # else: # raise ValueError("'{}' is not a valid kernel".format(kernel)) kernel = cov_amp * kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 if model_type == "gp": model = GaussianProcess(kernel, prior=prior, rng=rng, normalize_output=False, normalize_input=True, lower=lower, upper=upper) elif model_type == "gp_mcmc": model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=False, rng=rng, lower=lower, upper=upper) elif model_type == "rf": model = RandomForest(rng=rng) elif model_type == "bohamiann": model = WrapperBohamiann() elif model_type == "dngo": model = DNGO() else: raise ValueError("'{}' is not a valid model".format(model_type)) if acquisition_func == "ei": a = EI(model) elif acquisition_func == "log_ei": a = LogEI(model) elif acquisition_func == "pi": a = PI(model) elif acquisition_func == "lcb": a = LCB(model) else: raise ValueError("'{}' is not a valid acquisition function".format( acquisition_func)) if model_type == "gp_mcmc": acquisition_func = MarginalizationGPMCMC(a) else: acquisition_func = a if maximizer == "random": max_func = RandomSampling(acquisition_func, lower, upper, rng=rng) elif maximizer == "scipy": max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng) elif maximizer == "differential_evolution": max_func = DifferentialEvolution(acquisition_func, lower, upper, rng=rng) else: raise ValueError("'{}' is not a valid function to maximize the " "acquisition function".format(maximizer)) if sampling_method == "exact": max_func = ExactSampling(acquisition_func, lower, upper, pool, replacement, rng=rng) init_design = init_exact_random elif sampling_method == "approx": max_func = ApproxSampling(acquisition_func, lower, upper, pool, replacement, distance, rng=rng) init_design = init_exact_random else: init_design = init_latin_hypercube_sampling bo = BayesianOptimization(objective_function, lower, upper, acquisition_func, model, max_func, pool, best, sampling_method, distance, replacement, initial_points=n_init, rng=rng, initial_design=init_design, output_path=output_path) x_best, f_min = bo.run(num_iterations, X=X_init, y=Y_init) results = dict() results["x_opt"] = x_best results["f_opt"] = f_min results["incumbents"] = [inc for inc in bo.incumbents] results["incumbent_values"] = [val for val in bo.incumbents_values] results["runtime"] = bo.runtime results["overhead"] = bo.time_overhead results["X"] = [x.tolist() for x in bo.X] results["y"] = [y for y in bo.y] return results
def train(self, X, y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ if self.normalize_input: # Normalize input to be in [0, 1] self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper) else: self.X = X if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y) if self.y_std == 0: raise ValueError("Cannot normalize output. All targets have the same value") else: self.y = y # Use the mean of the data as mean for the GP self.mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=self.mean) if do_optimize: # We have one walker for each hyperparameter configuration sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) sampler.random_state = self.rng.get_state() # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior if self.prior is None: self.p0 = self.rng.rand(self.n_hypers, len(self.kernel.pars) + 1) else: self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the start point in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = sampler.chain[:, -1] else: self.hypers = self.gp.kernel[:].tolist() self.hypers.append(self.noise) self.hypers = [self.hypers] self.models = [] for sample in self.hypers: # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess(kernel, normalize_output=self.normalize_output, normalize_input=self.normalize_input, noise=noise, lower=self.lower, upper=self.upper, rng=self.rng) model.train(X, y, do_optimize=False) self.models.append(model) self.is_trained = True
def entropy_search(objective_function, lower, upper, num_iterations=30, maximizer="random", model="gp_mcmc", n_init=3, output_path=None, rng=None): """ Entropy search for global black box optimization problems. This is a reimplemenation of the entropy search algorithm by Henning and Schuler[1]. [1] Entropy search for information-efficient global optimization. P. Hennig and C. Schuler. JMLR, (1), 2012. Parameters ---------- objective_function: function The objective function that is minimized. This function gets a numpy array (D,) as input and returns the function value (scalar) lower: np.ndarray (D,) The lower bound of the search space upper: np.ndarray (D,) The upper bound of the search space num_iterations: int The number of iterations (initial design + BO) maximizer: {"random", "scipy", "differential_evolution"} Defines how the acquisition function is maximized. model: {"gp", "gp_mcmc"} The model for the objective function. n_init: int Number of points for the initial design. Make sure that it is <= num_iterations. output_path: string Specifies the path where the intermediate output after each iteration will be saved. If None no output will be saved to disk. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert upper.shape[0] == lower.shape[0], "Dimension miss match" assert np.all(lower < upper), "Lower bound >= upper bound" assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) cov_amp = 2 n_dims = lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 if model == "gp": gp = GaussianProcess(kernel, prior=prior, rng=rng, normalize_output=False, normalize_input=True, lower=lower, upper=upper) elif model == "gp_mcmc": gp = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=False, rng=rng, lower=lower, upper=upper) else: print("ERROR: %s is not a valid model!" % model) return a = InformationGain(gp, lower=lower, upper=upper, sampling_acquisition=EI) if model == "gp": acquisition_func = a elif model == "gp_mcmc": acquisition_func = MarginalizationGPMCMC(a) if maximizer == "random": max_func = RandomSampling(acquisition_func, lower, upper, rng=rng) elif maximizer == "scipy": max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng) elif maximizer == "differential_evolution": max_func = DifferentialEvolution(acquisition_func, lower, upper, rng=rng) else: print( "ERROR: %s is not a valid function to maximize the acquisition function!" % maximizer) return bo = BayesianOptimization(objective_function, lower, upper, acquisition_func, gp, max_func, initial_design=init_latin_hypercube_sampling, initial_points=n_init, rng=rng, output_path=output_path) x_best, f_min = bo.run(num_iterations) results = dict() results["x_opt"] = x_best results["f_opt"] = f_min results["incumbents"] = [inc for inc in bo.incumbents] results["incumbent_values"] = [val for val in bo.incumbents_values] results["runtime"] = bo.runtime results["overhead"] = bo.time_overhead results["X"] = [x.tolist() for x in bo.X] results["y"] = [y for y in bo.y] return results
class TestGaussianProcess(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) self.kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcess(self.kernel, prior=prior, normalize_input=False, normalize_output=False) self.model.train(self.X, self.y, do_optimize=False) def test_predict(self): X_test = np.random.rand(10, 2) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] m, v = self.model.predict(X_test, full_cov=True) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 2 assert v.shape[0] == X_test.shape[0] assert v.shape[1] == X_test.shape[0] K_zz = self.kernel.value(X_test) K_zx = self.kernel.value(X_test, self.X) K_nz = self.kernel.value(self.X) + self.model.noise * np.eye(self.X.shape[0]) inv = spla.inv(K_nz) K_zz_x = K_zz - np.dot(K_zx, np.inner(inv, K_zx)) assert np.mean((K_zz_x - v)**2) < 10e-5 def test_sample_function(self): X_test = np.random.rand(8, 2) n_funcs = 3 funcs = self.model.sample_functions(X_test, n_funcs=n_funcs) assert len(funcs.shape) == 2 assert funcs.shape[0] == n_funcs assert funcs.shape[1] == X_test.shape[0] def test_predict_variance(self): x_test1 = np.random.rand(1, 2) x_test2 = np.random.rand(10, 2) var = self.model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == x_test1.shape[0] def test_nll(self): theta = np.array([0.2, 0.2, 0.001]) nll = self.model.nll(theta) def test_optimize(self): theta = self.model.optimize() # Hyperparameters are 2 length scales + noise assert theta.shape[0] == 3 def test_get_incumbent(self): inc, inc_val = self.model.get_incumbent() b = np.argmin(self.y) np.testing.assert_almost_equal(inc, self.X[b], decimal=5) assert inc_val == self.y[b]
class TestInformationGain(unittest.TestCase): def setUp(self): self.X = np.random.rand(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.array([0.1, 0.1]), ndim=2) self.model = GaussianProcess(kernel) self.model.train(self.X, self.y) self.acquisition_func = InformationGain(self.model, np.zeros([2]), np.ones([2])) self.acquisition_func.update(self.model) def test_compute(self): X_test = np.random.rand(5, 2) a = self.acquisition_func.compute(X_test, derivative=False) assert a.shape[0] == X_test.shape[0] assert len(a.shape) == 1 def test_sampling_representer_points(self): # Check if representer points are inside the bounds assert np.any(self.acquisition_func.zb >= self.acquisition_func.lower) assert np.any(self.acquisition_func.zb <= self.acquisition_func.upper) def test_compute_pmin(self): # Uniform distribution m = np.ones([self.acquisition_func.Nb]) v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin.shape[0] == self.acquisition_func.Nb assert np.any(pmin < (uprob + 0.03)) and np.any(pmin > uprob - 0.01) # Dirac delta m = np.ones([self.acquisition_func.Nb]) * 1000 m[0] = 1 v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin[0] == 1.0 assert np.any(pmin[:1] > 1e-10) def test_innovations(self): # Case 1: Assume no influence of test point on representer points rep = np.array([[1.0]]) x = np.array([[0.0]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) < 1e-3) assert np.any(np.abs(dv) < 1e-3) # Case 2: Test point is close to representer points rep = np.array([[1.0]]) x = np.array([[0.99]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) > 1e-3) assert np.any(np.abs(dv) > 1e-3)
def train(self, X, Y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and Y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. Y: np.ndarray (N, 1) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ self.X = X # For Fabolas we transform s to (1 - s)^2 if self.basis_func is not None: self.X = deepcopy(X) self.X[:, self.dim] = self.basis_func(self.X[:, self.dim]) self.Y = Y if self.normalize_output: self.Y_mean = np.mean(Y) self.Y_std = np.std(Y) self.Y = (Y - self.Y_mean) / self.Y_std # Use the mean of the data as mean for the GP mean = np.mean(self.Y, axis=0) self.gp = george.GP(self.kernel, mean=mean) if do_optimize: # We have one walker for each hyperparameter configuration self.sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = self.sampler.run_mcmc(self.p0, self.burnin_steps) self.burned = True # Start sampling pos, _, _ = self.sampler.run_mcmc(self.p0, self.chain_length) # Save the current position, it will be the startpoint in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = self.sampler.chain[:, -1] self.models = [] else: self.hypers = [self.gp.kernel[:]] for sample in self.hypers: # Instantiate a model for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess(kernel, basis_func=self.basis_func, dim=self.dim, normalize_output=self.normalize_output, noise=noise) model.train(X, Y, do_optimize=False) self.models.append(model)
def build_model(lower, upper, model_type="gp_mcmc", model_seed=1, prior_seed=1): """ General interface for Bayesian optimization for global black box optimization problems. Parameters ---------- lower: numpy.ndarray (D,) The lower bound of the search space upper: numpy.ndarray (D,) The upper bound of the search space model_type: {"gp", "gp_mcmc", "rf", "bohamiann", "dngo"} The model for the objective function. model_seed: int Seed for random number generator of the model prior_seed: int Seed for random number generator of the prior Returns ------- Model """ assert upper.shape[0] == lower.shape[0], "Dimension miss match" assert numpy.all(lower < upper), "Lower bound >= upper bound" cov_amp = 2 n_dims = lower.shape[0] initial_ls = numpy.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1, numpy.random.RandomState(prior_seed)) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 # NOTE: Some models do not support RNG properly and rely on global RNG state # so we need to seed here as well... numpy.random.seed(model_seed) model_rng = numpy.random.RandomState(model_seed) if model_type == "gp": model = GaussianProcess(kernel, prior=prior, rng=model_rng, normalize_output=False, normalize_input=True, lower=lower, upper=upper) elif model_type == "gp_mcmc": model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=False, rng=model_rng, lower=lower, upper=upper) elif model_type == "rf": model = RandomForest(rng=model_rng) elif model_type == "bohamiann": model = WrapperBohamiann() elif model_type == "dngo": from pybnn.dngo import DNGO model = DNGO() else: raise ValueError("'{}' is not a valid model".format(model_type)) return model
def bayesian_optimization(objective_function, lower, upper, num_iterations=30, maximizer="random", acquisition_func="log_ei", model_type="gp_mcmc", n_init=3, rng=None, output_path=None): """ General interface for Bayesian optimization for global black box optimization problems. Parameters ---------- objective_function: function The objective function that is minimized. This function gets a numpy array (D,) as input and returns the function value (scalar) lower: np.ndarray (D,) The lower bound of the search space upper: np.ndarray (D,) The upper bound of the search space num_iterations: int The number of iterations (initial design + BO) maximizer: {"direct", "cmaes", "random", "scipy"} The optimizer for the acquisition function. NOTE: "cmaes" only works in D > 1 dimensions acquisition_func: {"ei", "log_ei", "lcb", "pi"} The acquisition function model_type: {"gp", "gp_mcmc", "rf"} The model for the objective function. n_init: int Number of points for the initial design. Make sure that it is <= num_iterations. output_path: string Specifies the path where the intermediate output after each iteration will be saved. If None no output will be saved to disk. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert upper.shape[0] == lower.shape[0], "Dimension miss match" assert np.all(lower < upper), "Lower bound >= upper bound" assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) cov_amp = 2 n_dims = lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 if model_type == "gp": model = GaussianProcess(kernel, prior=prior, rng=rng, normalize_output=False, normalize_input=True, lower=lower, upper=upper) elif model_type == "gp_mcmc": model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=True, rng=rng, lower=lower, upper=upper) elif model_type == "rf": model = RandomForest(rng=rng) else: raise ValueError("'{}' is not a valid model".format(model_type)) if acquisition_func == "ei": a = EI(model) elif acquisition_func == "log_ei": a = LogEI(model) elif acquisition_func == "pi": a = PI(model) elif acquisition_func == "lcb": a = LCB(model) else: raise ValueError("'{}' is not a valid acquisition function".format( acquisition_func)) if model_type == "gp_mcmc": acquisition_func = MarginalizationGPMCMC(a) else: acquisition_func = a if maximizer == "cmaes": max_func = CMAES(acquisition_func, lower, upper, verbose=False, rng=rng) elif maximizer == "direct": max_func = Direct(acquisition_func, lower, upper, verbose=True) elif maximizer == "random": max_func = RandomSampling(acquisition_func, lower, upper, rng=rng) elif maximizer == "scipy": max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng) else: raise ValueError("'{}' is not a valid function to maximize the " "acquisition function".format(maximizer)) bo = BayesianOptimization(objective_function, lower, upper, acquisition_func, model, max_func, initial_points=n_init, rng=rng, output_path=output_path) x_best, f_min = bo.run(num_iterations) results = dict() results["x_opt"] = x_best results["f_opt"] = f_min results["incumbents"] = [inc for inc in bo.incumbents] results["incumbent_values"] = [val for val in bo.incumbents_values] results["runtime"] = bo.runtime results["overhead"] = bo.time_overhead results["X"] = [x.tolist() for x in bo.X] results["y"] = [y for y in bo.y] return results
def bayesian_optimization(objective_function, lower, upper, num_iterations=30, maximizer="direct", acquisition_func="log_ei", model="gp_mcmc", n_init=3, rng=None): """ General interface for Bayesian optimization for global black box optimization problems. Parameters ---------- objective_function: function The objective function that is minimized. This function gets a numpy array (D,) as input and returns the function value (scalar) lower: np.ndarray (D,) The lower bound of the search space upper: np.ndarray (D,) The upper bound of the search space num_iterations: int The number of iterations (initial design + BO) maximizer: {"direct", "cmaes"} Defines how the acquisition function is maximized. NOTE: "cmaes" only works in D > 1 dimensions acquisition_func: {"ei", "log_ei", "lcb", "pi"} The acquisition function model: {"gp", "gp_mcmc"} The model for the objective function. n_init: int Number of points for the initial design. Make sure that it is <= num_iterations. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert upper.shape[0] == lower.shape[0] assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) cov_amp = 2 n_dims = lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 if model == "gp": gp = GaussianProcess(kernel, prior=prior, rng=rng, normalize_output=True, normalize_input=True, lower=lower, upper=upper) elif model == "gp_mcmc": gp = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=True, rng=rng, lower=lower, upper=upper) else: print("ERROR: %s is not a valid model!" % model) return if acquisition_func == "ei": a = EI(gp) elif acquisition_func == "log_ei": a = LogEI(gp) elif acquisition_func == "pi": a = PI(gp) elif acquisition_func == "lcb": a = LCB(gp) else: print("ERROR: %s is not a valid acquisition function!" % acquisition_func) return if model == "gp": acquisition_func = a elif model == "gp_mcmc": acquisition_func = MarginalizationGPMCMC(a) if maximizer == "cmaes": max_func = CMAES(acquisition_func, lower, upper, verbose=False, rng=rng) elif maximizer == "direct": max_func = Direct(acquisition_func, lower, upper, verbose=False) else: print( "ERROR: %s is not a valid function to maximize the acquisition function!" % maximizer) return bo = BayesianOptimization(objective_function, lower, upper, acquisition_func, gp, max_func, initial_points=n_init, rng=rng) x_best, f_min = bo.run(num_iterations) results = dict() results["x_opt"] = x_best results["f_opt"] = f_min results["incumbents"] = [inc for inc in bo.incumbents] results["incumbent_values"] = [val for val in bo.incumbents_values] results["runtime"] = bo.runtime results["overhead"] = bo.time_overhead return results
def __init__(self, objective_func, X_lower, X_upper, maximizer="direct", acquisition="LogEI", par=None, n_func_evals=4000, n_iters=500): self.objective_func = objective_func self.X_lower = X_lower self.X_upper = X_upper assert self.X_upper.shape[0] == self.X_lower.shape[0] self.task = Task(self.X_lower, self.X_upper, self.objective_func) cov_amp = 2 initial_ls = np.ones([self.task.n_dims]) exp_kernel = george.kernels.Matern32Kernel(initial_ls, ndim=self.task.n_dims) kernel = cov_amp * exp_kernel #kernel = GPy.kern.Matern52(input_dim=task.n_dims) prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 #self.model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=500, burnin_steps=100) self.model = GaussianProcess(kernel, prior=prior, dim=self.X_lower.shape[0], noise=1e-3) #self.model = GPyModel(kernel) #MAP ESTMIATE if acquisition == "EI": if par is not None: self.a = EI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, par=par) else: self.a = EI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "LogEI": if par is not None: self.a = LogEI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, par=par) else: self.a = LogEI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "PI": self.a = PI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "UCB": if par is not None: self.a = LCB(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, par=par) else: self.a = LCB(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "UCB_GP": if par is not None: self.a = LCB_GP(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, par=par) else: self.a = LCB_GP(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "InformationGain": self.a = InformationGain(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "InformationGainMC": self.a = InformationGainMC( self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, ) else: logger.error("ERROR: %s is not a" "valid acquisition function!" % (acquisition)) return None #self.acquisition_func = IntegratedAcquisition(self.model, self.a, self.task.X_lower, self.task.X_upper) self.acquisition_func = self.a if maximizer == "cmaes": self.max_fkt = cmaes.CMAES(self.acquisition_func, self.task.X_lower, self.task.X_upper) elif maximizer == "direct": self.max_fkt = direct.Direct( self.acquisition_func, self.task.X_lower, self.task.X_upper, n_func_evals=n_func_evals, n_iters=n_iters) #default is n_func_evals=400, n_iters=200 elif maximizer == "stochastic_local_search": self.max_fkt = stochastic_local_search.StochasticLocalSearch( self.acquisition_func, self.task.X_lower, self.task.X_upper) elif maximizer == "grid_search": self.max_fkt = grid_search.GridSearch(self.acquisition_func, self.task.X_lower, self.task.X_upper) else: logger.error("ERROR: %s is not a valid function" "to maximize the acquisition function!" % (acquisition)) return None self.bo = BayesianOptimization(acquisition_func=self.acquisition_func, model=self.model, maximize_func=self.max_fkt, task=self.task)
def train(self, X, y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. y: np.ndarray (N,) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ if self.normalize_input: # Normalize input to be in [0, 1] self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper) else: self.X = X if self.normalize_output: # Normalize output to have zero mean and unit standard deviation self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y) if self.y_std == 0: raise ValueError("Cannot normalize output. All targets have the same value") else: self.y = y # Use the mean of the data as mean for the GP self.mean = np.mean(self.y, axis=0) self.gp = george.GP(self.kernel, mean=self.mean) if do_optimize: # We have one walker for each hyperparameter configuration sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars) + 1, self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior if self.prior is None: self.p0 = np.random.rand(self.n_hypers, len(self.kernel.pars) + 1) else: self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = sampler.run_mcmc(self.p0, self.burnin_steps, rstate0=self.rng) self.burned = True # Start sampling pos, _, _ = sampler.run_mcmc(self.p0, self.chain_length, rstate0=self.rng) # Save the current position, it will be the start point in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = sampler.chain[:, -1] else: self.hypers = self.gp.kernel[:].tolist() self.hypers.append(self.noise) self.hypers = [self.hypers] self.models = [] for sample in self.hypers: # Instantiate a GP for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample[:-1]) noise = np.exp(sample[-1]) model = GaussianProcess(kernel, normalize_output=self.normalize_output, normalize_input=self.normalize_input, noise=noise, lower=self.lower, upper=self.upper, rng=self.rng) model.train(X, y, do_optimize=False) self.models.append(model) self.is_trained = True
def train(self, X, Y, do_optimize=True, **kwargs): """ Performs MCMC sampling to sample hyperparameter configurations from the likelihood and trains for each sample a GP on X and Y Parameters ---------- X: np.ndarray (N, D) Input data points. The dimensionality of X is (N, D), with N as the number of points and D is the number of features. Y: np.ndarray (N, 1) The corresponding target values. do_optimize: boolean If set to true we perform MCMC sampling otherwise we just use the hyperparameter specified in the kernel. """ self.X = X # For EnvES we transform s to (1 - s)^2 if self.basis_func is not None: self.X = deepcopy(X) self.X[:, self.dim] = self.basis_func(self.X[:, self.dim]) self.Y = Y # Use the mean of the data as mean for the GP mean = np.mean(Y, axis=0) self.gp = george.GP(self.kernel, mean=mean) # Precompute the covariance yerr = 1e-25 while (True): try: self.gp.compute(self.X, yerr=yerr) break except np.linalg.LinAlgError: yerr *= 10 logging.error( "Cholesky decomposition for the covariance matrix of \ the GP failed. \ Add %s noise on the diagonal." % yerr) if do_optimize: # We have one walker for each hyperparameter configuration self.sampler = emcee.EnsembleSampler(self.n_hypers, len(self.kernel.pars), self.loglikelihood) # Do a burn-in in the first iteration if not self.burned: # Initialize the walkers by sampling from the prior self.p0 = self.prior.sample_from_prior(self.n_hypers) # Run MCMC sampling self.p0, _, _ = self.sampler.run_mcmc(self.p0, self.burnin_steps) self.burned = True # Start sampling pos, _, _ = self.sampler.run_mcmc(self.p0, self.chain_length) # Save the current position, it will be the startpoint in # the next iteration self.p0 = pos # Take the last samples from each walker self.hypers = self.sampler.chain[:, -1] self.models = [] logging.info("Hypers: %s" % self.hypers) for sample in self.hypers: # Instantiate a model for each hyperparameter configuration kernel = deepcopy(self.kernel) kernel.pars = np.exp(sample) model = GaussianProcess(kernel, basis_func=self.basis_func, dim=self.dim) model.train(self.X, self.Y, do_optimize=False) self.models.append(model) else: self.hypers = self.gp.kernel[:]