class TestGaussianProcessMCMC(unittest.TestCase): def setUp(self): self.X = np.random.randn(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) self.model = GaussianProcessMCMC(kernel, n_hypers=6, burnin_steps=100, chain_length=200) self.model.train(self.X, self.y, do_optimize=True) def test_predict(self): X_test = np.random.rand(10, 2) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] def test_loglikelihood(self): theta = np.array([0.2, 0.2, 0.001]) ll = self.model.loglikelihood(theta) def test_get_incumbent(self): inc, inc_val = self.model.get_incumbent() b = np.argmin(self.y) np.testing.assert_almost_equal(inc, self.X[b], decimal=5) assert inc_val == self.y[b]
def setUp(self): self.X = np.random.randn(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) self.model = GaussianProcessMCMC(kernel, n_hypers=6, burnin_steps=100, chain_length=200) self.model.train(self.X, self.y, do_optimize=True)
def suggest_configuration(self): if self.X is None and self.y is None: new_x = init_random_uniform(self.lower, self.upper, n_points=1, rng=self.rng)[0, :] elif self.X.shape[0] == 1: # We need at least 2 data points to train a GP new_x = init_random_uniform(self.lower, self.upper, n_points=1, rng=self.rng)[0, :] else: cov_amp = 1 n_dims = self.lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=self.n_hypers, chain_length=self.chain_length, burnin_steps=self.burnin, normalize_input=False, normalize_output=True, rng=self.rng, lower=self.lower, upper=self.upper) a = LogEI(model) acquisition_func = MarginalizationGPMCMC(a) max_func = Direct(acquisition_func, self.lower, self.upper, verbose=False) model.train(self.X, self.y) acquisition_func.update(model) new_x = max_func.maximize() next_config = Configuration(self.config_space, vector=new_x) # Transform to sacred configuration result = configspace_config_to_sacred(next_config) return result
class TestGaussianProcessMCMC(unittest.TestCase): def setUp(self): self.X = np.random.randn(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=6, burnin_steps=100, chain_length=200) self.model.train(self.X, self.y, do_optimize=True) def test_predict(self): X_test = np.random.rand(10, 2) m, v = self.model.predict(X_test) assert len(m.shape) == 1 assert m.shape[0] == X_test.shape[0] assert len(v.shape) == 1 assert v.shape[0] == X_test.shape[0] def test_loglikelihood(self): theta = np.array([0.2, 0.2, 0.001]) ll = self.model.loglikelihood(theta) def test_get_incumbent(self): inc, inc_val = self.model.get_incumbent() b = np.argmin(self.y) np.testing.assert_almost_equal(inc, self.X[b], decimal=5) assert inc_val == self.y[b]
def setUp(self): self.X = np.random.randn(10, 2) self.y = np.sinc(self.X * 10 - 5).sum(axis=1) kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]), ndim=self.X.shape[1]) prior = TophatPrior(-2, 2) self.model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=6, burnin_steps=100, chain_length=200) self.model.train(self.X, self.y, do_optimize=True)
def fabolas_fmin(objective_func, X_lower, X_upper, num_iterations=100, n_init=40, burnin=100, chain_length=200, Nb=50, initX=None, initY=None): """ Interface to Fabolas [1] which models loss and training time as a function of dataset size and automatically trades off high information gain about the global optimum against computational cost. [1] Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets A. Klein and S. Falkner and S. Bartels and P. Hennig and F. Hutter http://arxiv.org/abs/1605.07079 Parameters ---------- objective_func : func Function handle for the objective function that get a configuration x and the training data subset size s and returns the validation error of x. See the example_fmin_fabolas.py script how the interface to this function should look like. X_lower : np.ndarray(D) Lower bound of the input space X_upper : np.ndarray(D) Upper bound of the input space num_iterations: int Number of iterations for the Bayesian optimization loop n_init: int Number of points for the initial design that is run before BO starts burnin: int Determines the length of the burnin phase of the MCMC sampling for the GP hyperparameters chain_length: int Specifies the chain length of the MCMC sampling for the GP hyperparameters Nb: int The number of representer points for approximating pmin Returns ------- x : (1, D) numpy array The estimated global optimium also called incumbent """ assert X_upper.shape[0] == X_lower.shape[0] def f(x): x_ = x[:, :-1] s = x[:, -1] return objective_func(x_, s) class Task(BaseTask): def __init__(self, X_lower, X_upper, f): super(Task, self).__init__(X_lower, X_upper) self.objective_function = f is_env = np.zeros([self.n_dims]) # Assume the last dimension to be the system size is_env[-1] = 1 self.is_env = is_env task = Task(X_lower, X_upper, f) def basis_function(x): return (1 - x)**2 # Define model for the objective function # Covariance amplitude cov_amp = 1 kernel = cov_amp # ARD Kernel for the configuration space for d in range(task.n_dims - 1): kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01, ndim=task.n_dims, dim=d) # Kernel for the environmental variable # We use (1-s)**2 as basis function for the Bayesian linear kernel degree = 1 env_kernel = george.kernels.BayesianLinearRegressionKernel( task.n_dims, dim=task.n_dims - 1, degree=degree) env_kernel[:] = np.ones([degree + 1]) * 0.1 kernel *= env_kernel n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 # Define the prior of the kernel's hyperparameters prior = EnvPrior(len(kernel) + 1, n_ls=task.n_dims - 1, n_lr=(degree + 1)) model = GaussianProcessMCMC(kernel, prior=prior, burnin=burnin, chain_length=chain_length, n_hypers=n_hypers, basis_func=basis_function, dim=task.n_dims - 1) # Define model for the cost function cost_cov_amp = 3000 cost_kernel = cost_cov_amp for d in range(task.n_dims - 1): cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.1, ndim=task.n_dims, dim=d) cost_degree = 1 cost_env_kernel = george.kernels.BayesianLinearRegressionKernel( task.n_dims, dim=task.n_dims - 1, degree=cost_degree) cost_env_kernel[:] = np.ones([cost_degree + 1]) * 0.1 cost_kernel *= cost_env_kernel cost_prior = EnvPrior(len(cost_kernel) + 1, n_ls=task.n_dims - 1, n_lr=(cost_degree + 1)) cost_model = GaussianProcessMCMC(cost_kernel, prior=cost_prior, burnin=burnin, chain_length=chain_length, n_hypers=n_hypers) # Define acquisition function and maximizer es = InformationGainPerUnitCost(model, cost_model, task.X_lower, task.X_upper, task.is_env, Nb=Nb) acquisition_func = IntegratedAcquisition(model, es, task.X_lower, task.X_upper, cost_model) maximizer = cmaes.CMAES(acquisition_func, task.X_lower, task.X_upper) rec = BestProjectedObservation(model, task.X_lower, task.X_upper, task.is_env) bo = Fabolas(acquisition_func=acquisition_func, model=model, cost_model=cost_model, maximize_func=maximizer, task=task, initial_points=n_init, incumbent_estimation=rec) best_x, f_min = bo.run(num_iterations, X=initX, Y=initY) return task.retransform(best_x), f_min, model, acquisition_func, maximizer
def bayesian_optimization(objective_function, lower, upper, num_iterations=30, maximizer="random", acquisition_func="log_ei", model_type="gp_mcmc", n_init=3, rng=None, output_path=None): """ General interface for Bayesian optimization for global black box optimization problems. Parameters ---------- objective_function: function The objective function that is minimized. This function gets a numpy array (D,) as input and returns the function value (scalar) lower: np.ndarray (D,) The lower bound of the search space upper: np.ndarray (D,) The upper bound of the search space num_iterations: int The number of iterations (initial design + BO) maximizer: {"direct", "cmaes", "random", "scipy"} The optimizer for the acquisition function. NOTE: "cmaes" only works in D > 1 dimensions acquisition_func: {"ei", "log_ei", "lcb", "pi"} The acquisition function model_type: {"gp", "gp_mcmc", "rf"} The model for the objective function. n_init: int Number of points for the initial design. Make sure that it is <= num_iterations. output_path: string Specifies the path where the intermediate output after each iteration will be saved. If None no output will be saved to disk. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert upper.shape[0] == lower.shape[0], "Dimension miss match" assert np.all(lower < upper), "Lower bound >= upper bound" assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) cov_amp = 2 n_dims = lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 if model_type == "gp": model = GaussianProcess(kernel, prior=prior, rng=rng, normalize_output=False, normalize_input=True, lower=lower, upper=upper) elif model_type == "gp_mcmc": model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=True, rng=rng, lower=lower, upper=upper) elif model_type == "rf": model = RandomForest(rng=rng) else: raise ValueError("'{}' is not a valid model".format(model_type)) if acquisition_func == "ei": a = EI(model) elif acquisition_func == "log_ei": a = LogEI(model) elif acquisition_func == "pi": a = PI(model) elif acquisition_func == "lcb": a = LCB(model) else: raise ValueError("'{}' is not a valid acquisition function".format( acquisition_func)) if model_type == "gp_mcmc": acquisition_func = MarginalizationGPMCMC(a) else: acquisition_func = a if maximizer == "cmaes": max_func = CMAES(acquisition_func, lower, upper, verbose=False, rng=rng) elif maximizer == "direct": max_func = Direct(acquisition_func, lower, upper, verbose=True) elif maximizer == "random": max_func = RandomSampling(acquisition_func, lower, upper, rng=rng) elif maximizer == "scipy": max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng) else: raise ValueError("'{}' is not a valid function to maximize the " "acquisition function".format(maximizer)) bo = BayesianOptimization(objective_function, lower, upper, acquisition_func, model, max_func, initial_points=n_init, rng=rng, output_path=output_path) x_best, f_min = bo.run(num_iterations) results = dict() results["x_opt"] = x_best results["f_opt"] = f_min results["incumbents"] = [inc for inc in bo.incumbents] results["incumbent_values"] = [val for val in bo.incumbents_values] results["runtime"] = bo.runtime results["overhead"] = bo.time_overhead results["X"] = [x.tolist() for x in bo.X] results["y"] = [y for y in bo.y] return results
def bayesian_optimization(objective_function, lower, upper, num_iterations=30, X_init=None, Y_init=None, maximizer="random", acquisition_func="log_ei", model_type="gp_mcmc", n_init=3, rng=None, output_path=None, kernel=None, sampling_method="origin", distance="cosine", replacement=True, pool=None, best=None): """ General interface for Bayesian optimization for global black box optimization problems. Parameters ---------- objective_function: function The objective function that is minimized. This function gets a numpy array (D,) as input and returns the function value (scalar) lower: np.ndarray (D,) The lower bound of the search space upper: np.ndarray (D,) The upper bound of the search space num_iterations: int The number of iterations (initial design + BO) X_init: np.ndarray(N,D) Initial points to warmstart BO Y_init: np.ndarray(N,1) Function values of the already initial points maximizer: {"random", "scipy", "differential_evolution"} The optimizer for the acquisition function. acquisition_func: {"ei", "log_ei", "lcb", "pi"} The acquisition function model_type: {"gp", "gp_mcmc", "rf", "bohamiann", "dngo"} The model for the objective function. n_init: int Number of points for the initial design. Make sure that it is <= num_iterations. output_path: string Specifies the path where the intermediate output after each iteration will be saved. If None no output will be saved to disk. rng: numpy.random.RandomState Random number generator kernel: george.kernels.ConstantKernel {"constant", "polynomial", "linear", "dotproduct", "exp", "expsquared", "matern32", "matern52", "rationalquadratic", "cosine", "expsine2", "heuristic"} Specify the kernel for Gaussian process. sampling_method: {"origin", "approx", "exact"} Specify the method to choose next sample to update model. approx: choose the sample in the candidate pool that is closest (measured by distance arg) to the one returned from maximizing acquisition function. exact: evaluate all samples in the candidate pool on acquisition function and choose the one with maximum output. distance: {"cosine", "euclidean"} The distance measurement for approximation sampling. replacement: boolean Whether to sample from pool with replacement. pool: np.ndarray(N,D) Candidate pool containing possible x best: float Stop training when the best point is sampled. Returns ------- dict with all results """ assert upper.shape[0] == lower.shape[0], "Dimension miss match" assert np.all(lower < upper), "Lower bound >= upper bound" assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) cov_amp = 2 #n_dims = lower.shape[0] #initial_ls = np.ones([n_dims]) # if kernel == "constant": # exp_kernel = george.kernels.ConstantKernel(1, ndim=n_dims) # elif kernel == "polynomial": # exp_kernel = george.kernels.PolynomialKernel(log_sigma2=1, order=3, ndim=n_dims) # elif kernel == "linear": # exp_kernel = george.kernels.LinearKernel(log_gamma2=1, order=3, ndim=n_dims) # elif kernel == "dotproduct": # exp_kernel = george.kernels.DotProductKernel(ndim=n_dims) # elif kernel == "exp": # exp_kernel = george.kernels.ExpKernel(initial_ls, ndim=n_dims) # elif kernel == "expsquared": # exp_kernel = george.kernels.ExpSquaredKernel(initial_ls, ndim=n_dims) # elif kernel == "matern32": # exp_kernel = george.kernels.Matern32Kernel(initial_ls, ndim=n_dims) # elif kernel == "matern52": # exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) # elif kernel == "rationalquadratic": # exp_kernel = george.kernels.RationalQuadraticKernel(log_alpha=1, metric=initial_ls, ndim=n_dims) # elif kernel == "cosine": # exp_kernel = george.kernels.CosineKernel(4, ndim=n_dims) # elif kernel == "expsine2": # exp_kernel = george.kerngels.ExpSine2Kernel(1, 2, ndim=n_dims) # elif kernel == "heuristic": # exp_kernel = george.kernels.PythonKernel(heuristic_kernel_function, ndim=n_dims) # else: # raise ValueError("'{}' is not a valid kernel".format(kernel)) kernel = cov_amp * kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 if model_type == "gp": model = GaussianProcess(kernel, prior=prior, rng=rng, normalize_output=False, normalize_input=True, lower=lower, upper=upper) elif model_type == "gp_mcmc": model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=False, rng=rng, lower=lower, upper=upper) elif model_type == "rf": model = RandomForest(rng=rng) elif model_type == "bohamiann": model = WrapperBohamiann() elif model_type == "dngo": model = DNGO() else: raise ValueError("'{}' is not a valid model".format(model_type)) if acquisition_func == "ei": a = EI(model) elif acquisition_func == "log_ei": a = LogEI(model) elif acquisition_func == "pi": a = PI(model) elif acquisition_func == "lcb": a = LCB(model) else: raise ValueError("'{}' is not a valid acquisition function".format( acquisition_func)) if model_type == "gp_mcmc": acquisition_func = MarginalizationGPMCMC(a) else: acquisition_func = a if maximizer == "random": max_func = RandomSampling(acquisition_func, lower, upper, rng=rng) elif maximizer == "scipy": max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng) elif maximizer == "differential_evolution": max_func = DifferentialEvolution(acquisition_func, lower, upper, rng=rng) else: raise ValueError("'{}' is not a valid function to maximize the " "acquisition function".format(maximizer)) if sampling_method == "exact": max_func = ExactSampling(acquisition_func, lower, upper, pool, replacement, rng=rng) init_design = init_exact_random elif sampling_method == "approx": max_func = ApproxSampling(acquisition_func, lower, upper, pool, replacement, distance, rng=rng) init_design = init_exact_random else: init_design = init_latin_hypercube_sampling bo = BayesianOptimization(objective_function, lower, upper, acquisition_func, model, max_func, pool, best, sampling_method, distance, replacement, initial_points=n_init, rng=rng, initial_design=init_design, output_path=output_path) x_best, f_min = bo.run(num_iterations, X=X_init, y=Y_init) results = dict() results["x_opt"] = x_best results["f_opt"] = f_min results["incumbents"] = [inc for inc in bo.incumbents] results["incumbent_values"] = [val for val in bo.incumbents_values] results["runtime"] = bo.runtime results["overhead"] = bo.time_overhead results["X"] = [x.tolist() for x in bo.X] results["y"] = [y for y in bo.y] return results
def entropy_search(objective_function, lower, upper, num_iterations=30, maximizer="random", model="gp_mcmc", n_init=3, output_path=None, rng=None): """ Entropy search for global black box optimization problems. This is a reimplemenation of the entropy search algorithm by Henning and Schuler[1]. [1] Entropy search for information-efficient global optimization. P. Hennig and C. Schuler. JMLR, (1), 2012. Parameters ---------- objective_function: function The objective function that is minimized. This function gets a numpy array (D,) as input and returns the function value (scalar) lower: np.ndarray (D,) The lower bound of the search space upper: np.ndarray (D,) The upper bound of the search space num_iterations: int The number of iterations (initial design + BO) maximizer: {"random", "scipy", "differential_evolution"} Defines how the acquisition function is maximized. model: {"gp", "gp_mcmc"} The model for the objective function. n_init: int Number of points for the initial design. Make sure that it is <= num_iterations. output_path: string Specifies the path where the intermediate output after each iteration will be saved. If None no output will be saved to disk. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert upper.shape[0] == lower.shape[0], "Dimension miss match" assert np.all(lower < upper), "Lower bound >= upper bound" assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) cov_amp = 2 n_dims = lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 if model == "gp": gp = GaussianProcess(kernel, prior=prior, rng=rng, normalize_output=False, normalize_input=True, lower=lower, upper=upper) elif model == "gp_mcmc": gp = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=False, rng=rng, lower=lower, upper=upper) else: print("ERROR: %s is not a valid model!" % model) return a = InformationGain(gp, lower=lower, upper=upper, sampling_acquisition=EI) if model == "gp": acquisition_func = a elif model == "gp_mcmc": acquisition_func = MarginalizationGPMCMC(a) if maximizer == "random": max_func = RandomSampling(acquisition_func, lower, upper, rng=rng) elif maximizer == "scipy": max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng) elif maximizer == "differential_evolution": max_func = DifferentialEvolution(acquisition_func, lower, upper, rng=rng) else: print( "ERROR: %s is not a valid function to maximize the acquisition function!" % maximizer) return bo = BayesianOptimization(objective_function, lower, upper, acquisition_func, gp, max_func, initial_design=init_latin_hypercube_sampling, initial_points=n_init, rng=rng, output_path=output_path) x_best, f_min = bo.run(num_iterations) results = dict() results["x_opt"] = x_best results["f_opt"] = f_min results["incumbents"] = [inc for inc in bo.incumbents] results["incumbent_values"] = [val for val in bo.incumbents_values] results["runtime"] = bo.runtime results["overhead"] = bo.time_overhead results["X"] = [x.tolist() for x in bo.X] results["y"] = [y for y in bo.y] return results
def fmin(objective_func, X_lower, X_upper, num_iterations=30, maximizer="direct", acquisition="LogEI", initX=None, initY=None): assert X_upper.shape[0] == X_lower.shape[0] class Task(BaseTask): def __init__(self, X_lower, X_upper, objective_fkt): super(Task, self).__init__(X_lower, X_upper) self.objective_function = objective_fkt task = Task(X_lower, X_upper, objective_func) cov_amp = 2 initial_ls = np.ones([task.n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=task.n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100) if acquisition == "EI": a = EI(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "LogEI": a = LogEI(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "PI": a = PI(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "UCB": a = LCB(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "InformationGain": a = InformationGain(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition == "InformationGainMC": a = InformationGainMC( model, X_upper=task.X_upper, X_lower=task.X_lower, ) else: logger.error("ERROR: %s is not a" "valid acquisition function!" % (acquisition)) return None acquisition_func = IntegratedAcquisition(model, a, task.X_lower, task.X_upper) if maximizer == "cmaes": max_fkt = cmaes.CMAES(acquisition_func, task.X_lower, task.X_upper) elif maximizer == "direct": max_fkt = direct.Direct(acquisition_func, task.X_lower, task.X_upper) elif maximizer == "stochastic_local_search": max_fkt = stochastic_local_search.StochasticLocalSearch( acquisition_func, task.X_lower, task.X_upper) elif maximizer == "grid_search": max_fkt = grid_search.GridSearch(acquisition_func, task.X_lower, task.X_upper) else: logger.error("ERROR: %s is not a valid function" "to maximize the acquisition function!" % (acquisition)) return None bo = BayesianOptimization(acquisition_func=acquisition_func, model=model, maximize_func=max_fkt, task=task) best_x, f_min = bo.run(num_iterations, X=initX, Y=initY) return task.retransform(best_x), f_min, model, acquisition_func, max_fkt
def fmin(objective_fkt, X_lower, X_upper, num_iterations=30, maximizer="direct", acquisition_fkt="EI"): assert X_upper.shape[0] == X_lower.shape[0] class Task(BaseTask): def __init__(self, X_lower, X_upper, objective_fkt): super(Task, self).__init__(X_lower, X_upper) self.objective_function = objective_fkt task = Task(X_lower, X_upper, objective_fkt) noise = 1.0 cov_amp = 2 initial_ls = np.ones([task.n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=task.n_dims) noise_kernel = george.kernels.WhiteKernel(noise, ndim=task.n_dims) kernel = cov_amp * (exp_kernel + noise_kernel) prior = DefaultPrior(len(kernel)) model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=20, chain_length=100, burnin_steps=50) if acquisition_fkt == "EI": a = EI(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition_fkt == "PI": a = PI(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition_fkt == "UCB": a = LCB(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition_fkt == "Entropy": a = Entropy(model, X_upper=task.X_upper, X_lower=task.X_lower) elif acquisition_fkt == "EntropyMC": a = EntropyMC( model, X_upper=task.X_upper, X_lower=task.X_lower, ) else: logger.error("ERROR: %s is not a" "valid acquisition function!" % (acquisition_fkt)) return None if maximizer == "cmaes": max_fkt = cmaes.CMAES(a, task.X_lower, task.X_upper) elif maximizer == "direct": max_fkt = direct.Direct(a, task.X_lower, task.X_upper) elif maximizer == "stochastic_local_search": max_fkt = stochastic_local_search.StochasticLocalSearch( a, task.X_lower, task.X_upper) elif maximizer == "grid_search": max_fkt = grid_search.GridSearch(a, task.X_lower, task.X_upper) else: logger.error("ERROR: %s is not a valid function" "to maximize the acquisition function!" % (acquisition_fkt)) return None bo = BayesianOptimization(acquisition_func=a, model=model, maximize_func=max_fkt, task=task) best_x, f_min = bo.run(num_iterations) return best_x, f_min
chain_length = 200 n_hypers = 20 task = Branin() cov_amp = 1.0 config_kernel = george.kernels.Matern52Kernel(np.ones([task.n_dims]), ndim=task.n_dims) kernel = cov_amp * config_kernel prior = MyPrior(len(kernel) + 1) model = GaussianProcessMCMC(kernel, prior=prior, burnin=burnin, chain_length=chain_length, n_hypers=n_hypers) ei = EI( model, X_upper=task.X_upper, X_lower=task.X_lower, ) acquisition_func = IntegratedAcquisition(model, ei, task.X_lower, task.X_upper) maximizer = Direct(acquisition_func, task.X_lower, task.X_upper) bo = BayesianOptimization(acquisition_func=acquisition_func, model=model,
from robo.acquisition.ei import EI from robo.maximizers.direct import Direct from robo.task.controlling_tasks.walker import Walker from robo.solver.bayesian_optimization import BayesianOptimization from robo.priors.default_priors import DefaultPrior from robo.acquisition.integrated_acquisition import IntegratedAcquisition task = Walker() test = '/test' kernel = 1 * george.kernels.Matern52Kernel(np.ones([task.n_dims]), ndim=task.n_dims) prior = DefaultPrior(len(kernel) + 1) model = GaussianProcessMCMC(kernel, prior=prior, chain_length=100, burnin_steps=200, n_hypers=8) ei = EI(model, task.X_lower, task.X_upper) acquisition_func = IntegratedAcquisition(model, ei, task.X_lower, task.X_upper) maximizer = Direct(acquisition_func, task.X_lower, task.X_upper) bo = BayesianOptimization(acquisition_func=acquisition_func, model=model, maximize_func=maximizer, task=task, save_dir=test) print bo.run(2)
def bayesian_optimization(objective_function, lower, upper, num_iterations=30, maximizer="direct", acquisition_func="log_ei", model="gp_mcmc", n_init=3, rng=None): """ General interface for Bayesian optimization for global black box optimization problems. Parameters ---------- objective_function: function The objective function that is minimized. This function gets a numpy array (D,) as input and returns the function value (scalar) lower: np.ndarray (D,) The lower bound of the search space upper: np.ndarray (D,) The upper bound of the search space num_iterations: int The number of iterations (initial design + BO) maximizer: {"direct", "cmaes"} Defines how the acquisition function is maximized. NOTE: "cmaes" only works in D > 1 dimensions acquisition_func: {"ei", "log_ei", "lcb", "pi"} The acquisition function model: {"gp", "gp_mcmc"} The model for the objective function. n_init: int Number of points for the initial design. Make sure that it is <= num_iterations. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert upper.shape[0] == lower.shape[0] assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) cov_amp = 2 n_dims = lower.shape[0] initial_ls = np.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 if model == "gp": gp = GaussianProcess(kernel, prior=prior, rng=rng, normalize_output=True, normalize_input=True, lower=lower, upper=upper) elif model == "gp_mcmc": gp = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=True, rng=rng, lower=lower, upper=upper) else: print("ERROR: %s is not a valid model!" % model) return if acquisition_func == "ei": a = EI(gp) elif acquisition_func == "log_ei": a = LogEI(gp) elif acquisition_func == "pi": a = PI(gp) elif acquisition_func == "lcb": a = LCB(gp) else: print("ERROR: %s is not a valid acquisition function!" % acquisition_func) return if model == "gp": acquisition_func = a elif model == "gp_mcmc": acquisition_func = MarginalizationGPMCMC(a) if maximizer == "cmaes": max_func = CMAES(acquisition_func, lower, upper, verbose=False, rng=rng) elif maximizer == "direct": max_func = Direct(acquisition_func, lower, upper, verbose=False) else: print( "ERROR: %s is not a valid function to maximize the acquisition function!" % maximizer) return bo = BayesianOptimization(objective_function, lower, upper, acquisition_func, gp, max_func, initial_points=n_init, rng=rng) x_best, f_min = bo.run(num_iterations) results = dict() results["x_opt"] = x_best results["f_opt"] = f_min results["incumbents"] = [inc for inc in bo.incumbents] results["incumbent_values"] = [val for val in bo.incumbents_values] results["runtime"] = bo.runtime results["overhead"] = bo.time_overhead return results
def build_model(lower, upper, model_type="gp_mcmc", model_seed=1, prior_seed=1): """ General interface for Bayesian optimization for global black box optimization problems. Parameters ---------- lower: numpy.ndarray (D,) The lower bound of the search space upper: numpy.ndarray (D,) The upper bound of the search space model_type: {"gp", "gp_mcmc", "rf", "bohamiann", "dngo"} The model for the objective function. model_seed: int Seed for random number generator of the model prior_seed: int Seed for random number generator of the prior Returns ------- Model """ assert upper.shape[0] == lower.shape[0], "Dimension miss match" assert numpy.all(lower < upper), "Lower bound >= upper bound" cov_amp = 2 n_dims = lower.shape[0] initial_ls = numpy.ones([n_dims]) exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims) kernel = cov_amp * exp_kernel prior = DefaultPrior(len(kernel) + 1, numpy.random.RandomState(prior_seed)) n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 # NOTE: Some models do not support RNG properly and rely on global RNG state # so we need to seed here as well... numpy.random.seed(model_seed) model_rng = numpy.random.RandomState(model_seed) if model_type == "gp": model = GaussianProcess(kernel, prior=prior, rng=model_rng, normalize_output=False, normalize_input=True, lower=lower, upper=upper) elif model_type == "gp_mcmc": model = GaussianProcessMCMC(kernel, prior=prior, n_hypers=n_hypers, chain_length=200, burnin_steps=100, normalize_input=True, normalize_output=False, rng=model_rng, lower=lower, upper=upper) elif model_type == "rf": model = RandomForest(rng=model_rng) elif model_type == "bohamiann": model = WrapperBohamiann() elif model_type == "dngo": from pybnn.dngo import DNGO model = DNGO() else: raise ValueError("'{}' is not a valid model".format(model_type)) return model
def mtbo(objective_function, lower, upper, n_tasks=2, n_init=2, num_iterations=30, burnin=100, chain_length=200, rng=None): """ Interface to MTBO[1] which uses an auxiliary cheaper task to speed up the optimization of a more expensive but similar task. [1] Multi-Task Bayesian Optimization K. Swersky and J. Snoek and R. Adams Proceedings of the 27th International Conference on Advances in Neural Information Processing Systems (NIPS'13) Parameters ---------- objective_function: function Objective function that will be optimized lower: np.array(D,) Lower bound of the input space upper: np.array(D,) Upper bound of the input space n_tasks: int Number of task n_init: int Number of initial design points num_iterations: int Number of iterations chain_length : int The length of the MCMC chain for each walker. burnin : int The number of burnin steps before the actual MCMC sampling starts. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations" assert lower.shape[0] == upper.shape[ 0], "Dimension miss match between upper and lower bound" time_start = time.time() if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) n_dims = lower.shape[0] # Bookkeeping time_func_eval = [] time_overhead = [] incumbents = [] runtime = [] X = [] y = [] c = [] # Define model for the objective function cov_amp = 1 # Covariance amplitude kernel = cov_amp # ARD Kernel for the configuration space for d in range(n_dims): kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01, ndim=n_dims + 1, dim=d) task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks) kernel *= task_kernel # Take 3 times more samples than we have hyperparameters n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 prior = MTBOPrior(len(kernel) + 1, n_ls=n_dims, n_kt=len(task_kernel), rng=rng) model_objective = GaussianProcessMCMC(kernel, prior=prior, burnin_steps=burnin, chain_length=chain_length, n_hypers=n_hypers, normalize_input=False, lower=lower, upper=upper, rng=rng) # Define model for the cost function cost_cov_amp = 1 cost_kernel = cost_cov_amp # ARD Kernel for the configuration space for d in range(n_dims): cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01, ndim=n_dims + 1, dim=d) cost_task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks) cost_kernel *= cost_task_kernel cost_prior = MTBOPrior(len(cost_kernel) + 1, n_ls=n_dims, n_kt=len(task_kernel), rng=rng) model_cost = GaussianProcessMCMC(cost_kernel, prior=cost_prior, burnin_steps=burnin, chain_length=chain_length, n_hypers=n_hypers, normalize_input=False, lower=lower, upper=upper, rng=rng) # Extend input space by task variable extend_lower = np.append(lower, 0) extend_upper = np.append(upper, n_tasks - 1) is_env = np.zeros(extend_lower.shape[0]) is_env[-1] = 1 # Define acquisition function and maximizer ig = InformationGainPerUnitCost(model_objective, model_cost, extend_lower, extend_upper, is_env_variable=is_env, n_representer=50) acquisition_func = MarginalizationGPMCMC(ig) wrapper_func = partial(transformation, acq=acquisition_func, lower=lower, upper=upper) maximizer = Direct(wrapper_func, extend_lower, extend_upper, verbose=True) # Initial Design for _ in range(n_init): logger.info("Initial design") start_time_overhead = time.time() # Draw random configuration and evaluate it just on the auxiliary task task = 0 x = init_random_uniform(lower, upper, 1, rng)[0] logger.info("Evaluate candidate %s", str(x)) st = time.time() func_val, cost = objective_function(x, task) time_func_eval.append(time.time() - st) logger.info("Configuration achieved a performance of %f with cost %f", func_val, cost) logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1]) # Bookkeeping config = np.append(x, task) X.append(config) y.append(func_val) c.append(cost) # Estimate incumbent as the best observed value so far best_idx = np.argmin(y) incumbents.append(np.append( X[best_idx], n_tasks - 1)) # Incumbent is always on the task of interest time_overhead.append(time.time() - start_time_overhead) runtime.append(time.time() - time_start) X = np.array(X) y = np.array(y) c = np.array(c) for it in range(n_init, num_iterations): logger.info("Start iteration %d ... ", it) start_time = time.time() # Train models model_objective.train(transform(X, lower, upper), y, do_optimize=True) model_cost.train(transform(X, lower, upper), c, do_optimize=True) # Estimate incumbent by projecting all observed points to the task of interest and # pick the point with the lowest mean prediction incumbent, incumbent_value = projected_incumbent_estimation( model_objective, transform(X, lower, upper)[:, :-1], proj_value=n_tasks - 1) incumbent[:-1] = normalization.zero_one_unnormalization( incumbent[:-1], lower, upper) incumbents.append(incumbent) logger.info("Current incumbent %s with estimated performance %f", str(incumbent), incumbent_value) # Maximize acquisition function acquisition_func.update(model_objective, model_cost) new_x = maximizer.maximize() new_x[-1] = np.rint( new_x[-1]) # Map float value to discrete task variable time_overhead.append(time.time() - start_time) logger.info("Optimization overhead was %f seconds", time_overhead[-1]) # Evaluate the chosen configuration logger.info("Evaluate candidate %s", str(new_x)) start_time = time.time() new_y, new_c = objective_function(new_x[:-1], new_x[-1]) time_func_eval.append(time.time() - start_time) logger.info("Configuration achieved a performance of %f with cost %f", new_y, new_c) logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1]) # Add new observation to the data X = np.concatenate((X, new_x[None, :]), axis=0) y = np.concatenate((y, np.array([new_y])), axis=0) c = np.concatenate((c, np.array([new_c])), axis=0) runtime.append(time.time() - time_start) # Estimate the final incumbent model_objective.train(transform(X, lower, upper), y) incumbent, incumbent_value = projected_incumbent_estimation( model_objective, transform(X, lower, upper)[:, :-1], proj_value=n_tasks - 1) incumbent[:-1] = normalization.zero_one_unnormalization( incumbent[:-1], lower, upper) incumbents.append(incumbent) logger.info("Final incumbent %s with estimated performance %f", str(incumbent), incumbent_value) results = dict() results["x_opt"] = incumbent[:-1] results["trajectory"] = [inc for inc in incumbents] results["runtime"] = runtime results["overhead"] = time_overhead results["time_func_eval"] = time_func_eval return results