def setUp(self): self.task = SinFunction() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y = self.task.evaluate(X) model.train(X, Y, do_optimize=False) self.acquisition_func = InformationGain(model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) self.acquisition_func.update(model)
def __init__(self, objective_func, X_lower, X_upper, maximizer="direct", acquisition="LogEI", par=None, n_func_evals=4000, n_iters=500): self.objective_func = objective_func self.X_lower = X_lower self.X_upper = X_upper assert self.X_upper.shape[0] == self.X_lower.shape[0] self.task = Task(self.X_lower, self.X_upper, self.objective_func) cov_amp = 2 initial_ls = np.ones([self.task.n_dims]) exp_kernel = george.kernels.Matern32Kernel(initial_ls, ndim=self.task.n_dims) kernel = cov_amp * exp_kernel #kernel = GPy.kern.Matern52(input_dim=task.n_dims) prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 #self.model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=500, burnin_steps=100) self.model = GaussianProcess(kernel, prior=prior, dim=self.X_lower.shape[0], noise=1e-3) #self.model = GPyModel(kernel) #MAP ESTMIATE if acquisition == "EI": if par is not None: self.a = EI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, par=par) else: self.a = EI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "LogEI": if par is not None: self.a = LogEI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, par=par) else: self.a = LogEI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "PI": self.a = PI(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "UCB": if par is not None: self.a = LCB(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, par=par) else: self.a = LCB(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "UCB_GP": if par is not None: self.a = LCB_GP(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, par=par) else: self.a = LCB_GP(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "InformationGain": self.a = InformationGain(self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) elif acquisition == "InformationGainMC": self.a = InformationGainMC( self.model, X_upper=self.task.X_upper, X_lower=self.task.X_lower, ) else: logger.error("ERROR: %s is not a" "valid acquisition function!" % (acquisition)) return None #self.acquisition_func = IntegratedAcquisition(self.model, self.a, self.task.X_lower, self.task.X_upper) self.acquisition_func = self.a if maximizer == "cmaes": self.max_fkt = cmaes.CMAES(self.acquisition_func, self.task.X_lower, self.task.X_upper) elif maximizer == "direct": self.max_fkt = direct.Direct( self.acquisition_func, self.task.X_lower, self.task.X_upper, n_func_evals=n_func_evals, n_iters=n_iters) #default is n_func_evals=400, n_iters=200 elif maximizer == "stochastic_local_search": self.max_fkt = stochastic_local_search.StochasticLocalSearch( self.acquisition_func, self.task.X_lower, self.task.X_upper) elif maximizer == "grid_search": self.max_fkt = grid_search.GridSearch(self.acquisition_func, self.task.X_lower, self.task.X_upper) else: logger.error("ERROR: %s is not a valid function" "to maximize the acquisition function!" % (acquisition)) return None self.bo = BayesianOptimization(acquisition_func=self.acquisition_func, model=self.model, maximize_func=self.max_fkt, task=self.task)
def fmin(objective_func, X_lower, X_upper, num_iterations=30, maximizer="direct", acquisition="LogEI", initX=None, initY=None): assert X_upper.shape[0] == X_lower.shape[0] class Task(BaseTask): def __init__(self, X_lower, X_upper, objective_fkt): super(Task, self).__init__(X_lower, X_upper) self.objective_function = objective_fkt task = Task(X_lower, X_upper, objective_func) cov_amp = 2 initial_ls = np.ones([task.n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=task.n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100) if acquisition == "EI": a = EI(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "LogEI": a = LogEI(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "PI": a = PI(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "UCB": a = LCB(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "InformationGain": a = InformationGain(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "InformationGainMC": a = InformationGainMC( model, X_upper=task.X_upper, X_lower=task.X_lower, ) else: logger.error("ERROR: %s is not a" "valid acquisition function!" % (acquisition)) return None acquisition_func = IntegratedAcquisition(model, a, task.X_lower, task.X_upper) if maximizer == "cmaes": max_fkt = cmaes.CMAES(acquisition_func, task.X_lower, task.X_upper) elif maximizer == "direct": max_fkt = direct.Direct(acquisition_func, task.X_lower, task.X_upper) elif maximizer == "stochastic_local_search": max_fkt = stochastic_local_search.StochasticLocalSearch( acquisition_func, task.X_lower, task.X_upper) elif maximizer == "grid_search": max_fkt = grid_search.GridSearch(acquisition_func, task.X_lower, task.X_upper) else: logger.error("ERROR: %s is not a valid function" "to maximize the acquisition function!" % (acquisition)) return None bo = BayesianOptimization(acquisition_func=acquisition_func, model=model, maximize_func=max_fkt, task=task) best_x, f_min = bo.run(num_iterations, X=initX, Y=initY) return task.retransform(best_x), f_min, model, acquisition_func, max_fkt
class InformationGainTestCase(unittest.TestCase): def setUp(self): self.task = SinFunction() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y = self.task.evaluate(X) model.train(X, Y, do_optimize=False) self.acquisition_func = InformationGain(model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) self.acquisition_func.update(model) def test_sampling_representer_points(self): # Check if representer points are inside the bounds assert np.any( self.acquisition_func.zb >= self.acquisition_func.X_lower) assert np.any( self.acquisition_func.zb <= self.acquisition_func.X_upper) def test_compute_pmin(self): # Uniform distribution m = np.ones([self.acquisition_func.Nb, 1]) v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin.shape[0] == self.acquisition_func.Nb assert np.any(pmin < (uprob + 0.03)) and np.any(pmin > uprob - 0.01) # Dirac delta m = np.ones([self.acquisition_func.Nb, 1]) * 1000 m[0] = 1 v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin[0] == 1.0 assert np.any(pmin[:1] > 1e-10) def test_innovations(self): # Case 1: Assume no influence of test point on representer points rep = np.array([[1.0]]) x = np.array([[0.0]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) < 1e-3) assert np.any(np.abs(dv) < 1e-3) # Case 2: Test point is close to representer points rep = np.array([[1.0]]) x = np.array([[0.99]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) > 1e-3) assert np.any(np.abs(dv) > 1e-3) def test_general_interface(self): X_test = init_random_uniform(self.task.X_lower, self.task.X_upper, 1) a = self.acquisition_func(X_test, False) assert len(a.shape) == 2 assert a.shape[0] == X_test.shape[0] assert a.shape[1] == 1 def test_check_grads(self): x_ = np.array([[0.1]]) assert check_grad(self.acquisition_func, lambda x: -self.acquisition_func(x, True)[1], x_) < 1e-3
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) x_test = init_random_uniform(X_lower, X_upper, 3) # Shape matching predict m, v = model.predict(x_test) assert len(m.shape) == 2 assert m.shape[0] == x_test.shape[0] assert m.shape[1] == 1 assert len(v.shape) == 2 assert v.shape[0] == x_test.shape[0] assert v.shape[1] == x_test.shape[0] #TODO: check gradients # Shape matching function sampling x_ = np.linspace(X_lower, X_upper, 10) x_ = x_[:, np.newaxis] funcs = model.sample_functions(x_, n_funcs=2) assert len(funcs.shape) == 2 assert funcs.shape[0] == 2 assert funcs.shape[1] == x_.shape[0] # Shape matching predict variance x_test1 = np.array([np.random.rand(1)]) x_test2 = np.random.rand(10)[:, np.newaxis] var = model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == 1 # Check compatibility with all acquisition functions acq_func = EI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = PI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = InformationGain(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) # Check compatibility with all incumbent estimation methods rec = BestObservation(model, X_lower, X_upper) inc, inc_val = rec.estimate_incumbent(None) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1
class InformationGainTestCase(unittest.TestCase): def setUp(self): self.task = SinFunction() kernel = george.kernels.Matern52Kernel(np.ones([self.task.n_dims]) * 0.01, ndim=self.task.n_dims) noise_kernel = george.kernels.WhiteKernel(1e-9, ndim=self.task.n_dims) kernel = 3000 * (kernel + noise_kernel) prior = default_priors.TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) X = init_random_uniform(self.task.X_lower, self.task.X_upper, 3) Y = self.task.evaluate(X) model.train(X, Y, do_optimize=False) self.acquisition_func = InformationGain(model, X_upper=self.task.X_upper, X_lower=self.task.X_lower) self.acquisition_func.update(model) def test_sampling_representer_points(self): # Check if representer points are inside the bounds assert np.any(self.acquisition_func.zb >= self.acquisition_func.X_lower) assert np.any(self.acquisition_func.zb <= self.acquisition_func.X_upper) def test_compute_pmin(self): # Uniform distribution m = np.ones([self.acquisition_func.Nb, 1]) v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin.shape[0] == self.acquisition_func.Nb assert np.any(pmin < (uprob + 0.03)) and np.any(pmin > uprob - 0.01) # Dirac delta m = np.ones([self.acquisition_func.Nb, 1]) * 1000 m[0] = 1 v = np.eye(self.acquisition_func.Nb) pmin = epmgp.joint_min(m, v) pmin = np.exp(pmin) uprob = 1. / self.acquisition_func.Nb assert pmin[0] == 1.0 assert np.any(pmin[:1] > 1e-10) def test_innovations(self): # Case 1: Assume no influence of test point on representer points rep = np.array([[1.0]]) x = np.array([[0.0]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) < 1e-3) assert np.any(np.abs(dv) < 1e-3) # Case 2: Test point is close to representer points rep = np.array([[1.0]]) x = np.array([[0.99]]) dm, dv = self.acquisition_func.innovations(x, rep) assert np.any(np.abs(dm) > 1e-3) assert np.any(np.abs(dv) > 1e-3) def test_general_interface(self): X_test = init_random_uniform(self.task.X_lower, self.task.X_upper, 1) a = self.acquisition_func(X_test, False) assert len(a.shape) == 2 assert a.shape[0] == X_test.shape[0] assert a.shape[1] == 1 def test_check_grads(self): x_ = np.array([[0.1]]) assert check_grad(self.acquisition_func, lambda x: -self.acquisition_func(x, True)[1], x_) < 1e-3
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = GPy.kern.Matern52(input_dim=1) model = GPyModel(kernel) model.train(X, Y) x_test = init_random_uniform(X_lower, X_upper, 3) # Shape matching predict m, v = model.predict(x_test, full_cov=True) assert len(m.shape) == 2 assert m.shape[0] == x_test.shape[0] assert m.shape[1] == 1 assert len(v.shape) == 2 assert v.shape[0] == x_test.shape[0] assert v.shape[1] == x_test.shape[0] # Check gradients dm, dv = model.predictive_gradients(x_test) assert len(dm.shape) == 2 assert dm.shape[0] == x_test.shape[0] assert dm.shape[1] == x_test.shape[1] assert len(dv.shape) == 2 assert dv.shape[0] == x_test.shape[0] assert dv.shape[1] == 1 # Shape matching function sampling x_ = np.linspace(X_lower, X_upper, 10) x_ = x_[:, np.newaxis] funcs = model.sample_functions(x_, n_funcs=2) assert len(funcs.shape) == 2 assert funcs.shape[0] == 2 assert funcs.shape[1] == x_.shape[0] # Shape matching predict variance x_test2 = np.array([np.random.rand(1)]) x_test1 = np.random.rand(10)[:, np.newaxis] var = model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test1.shape[0] assert var.shape[1] == 1 # Check compatibility with all acquisition functions acq_func = EI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = PI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = InformationGain(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) # Check compatibility with all incumbent estimation methods rec = BestObservation(model, X_lower, X_upper) inc, inc_val = rec.estimate_incumbent(None) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1