def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs): r""" Probability of Improvement solves the following equation :math:`PI(X) := \mathbb{P}\left( f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) > \xi\right)`, where :math:`f(X^+)` is the best input found so far. Parameters ---------- model: Model object A model that implements at least - predict(X) - getCurrentBestX(). If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(PI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs): r""" Computes for a given x the expected improvement as acquisition value. :math:`EI(X) := \mathbb{E}\left[ \max\{0, f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) - \xi\right] \} ]`, with :math:`f(X^+)` as the incumbent. Parameters ---------- model: Model object A model that implements at least - predict(X) - getCurrentBestX(). If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space compute_incumbent: func A python function that takes as input a model and returns a np.array as incumbent par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(EI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) rec = BestObservation(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 10) inc, inc_val = rec.estimate_incumbent(startpoints) # Check shapes assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_lower.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 # Check if incumbent is in the bounds assert not np.any([np.any(inc[:, i] < X_lower[i]) for i in range(X_lower.shape[0])]) assert not np.any([np.any(inc[:, i] > X_upper[i]) for i in range(X_upper.shape[0])])
def __init__(self, model, X_lower, X_upper, par=0.01, **kwargs): r""" Computes for a given x the logarithm expected improvement as acquisition value. Parameters ---------- model: Model object A model that implements at least - predict(X) If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(LogEI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
def update(self, model): """ This method will be called if the model is updated. Parameters ---------- model : Model object Models the objective function. """ super(PI, self).update(model) self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
def __init__(self, task=None, save_dir=None, num_save=1, rng=None): """ Random Search [1] that simply evaluates random points. We do not have any priors thus we sample points uniformly at random. [1] J. Bergstra and Y. Bengio. Random search for hyper-parameter optimization. JMLR, 2012. Parameters ---------- task: TaskObject Task object that contains the objective function and additional meta information such as the lower and upper bound of the search space. num_save: int Defines after how many iteration the output is saved. save_dir: String Output path rng: numpy.random.RandomState """ if rng is None: self.rng = np.random.RandomState(42) else: self.rng = rng self.task = task self.save_dir = save_dir self.X = None self.Y = None self.estimator = BestObservation(self, self.task.X_lower, self.task.X_upper) self.time_func_eval = None self.time_overhead = None self.num_save = num_save self.model_untrained = True self.incumbent = None self.incumbents = [] self.incumbent_values = [] self.runtime = [] if self.save_dir is not None: self.create_save_dir()
def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs): r""" Computes for a given x the logarithm expected improvement as acquisition value. Parameters ---------- model: Model object A model that implements at least - predict(X) If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(LogEI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
def update(self, model): """ This method will be called if the model is updated. Parameters ---------- model : Model object Models the objective function. """ super(EI, self).update(model) self.rec = BestObservation(self.model, self.X_lower, self.X_upper)
class EI(BaseAcquisitionFunction): def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs): r""" Computes for a given x the expected improvement as acquisition value. :math:`EI(X) := \mathbb{E}\left[ \max\{0, f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) - \xi\right] \} ]`, with :math:`f(X^+)` as the incumbent. Parameters ---------- model: Model object A model that implements at least - predict(X) - getCurrentBestX(). If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space compute_incumbent: func A python function that takes as input a model and returns a np.array as incumbent par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(EI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def update(self, model): """ This method will be called if the model is updated. Parameters ---------- model : Model object Models the objective function. """ super(EI, self).update(model) self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def compute(self, X, derivative=False, **kwargs): """ Computes the EI value and its derivatives. Parameters ---------- X: np.ndarray(1, D), The input point where the acquisition function should be evaluate. The dimensionality of X is (N, D), with N as the number of points to evaluate at and D is the number of dimensions of one X. derivative: Boolean If is set to true also the derivative of the acquisition function at X is returned Returns ------- np.ndarray(1,1) Expected Improvement of X np.ndarray(1,D) Derivative of Expected Improvement at X (only if derivative=True) """ if X.shape[0] > 1: raise ValueError("EI is only for single test points") if len(X.shape) == 1: X = X[:, np.newaxis] if np.any(X < self.X_lower) or np.any(X > self.X_upper): if derivative: f = 0 df = np.zeros((1, X.shape[1])) return np.array([[f]]), np.array([df]) else: return np.array([[0]]) m, v = self.model.predict(X) # Use the best seen observation as incumbent _, eta = self.rec.estimate_incumbent(None) s = np.sqrt(v) if (s == 0).any(): f = np.array([[0]]) df = np.zeros((1, X.shape[1])) else: z = (eta - m - self.par) / s # f = (eta - m - self.par) * norm.cdf(z) + s * norm.pdf(z) f = s * ( z * norm.cdf(z) + norm.pdf(z)) if derivative: dmdx, ds2dx = self.model.predictive_gradients(X) dmdx = dmdx[0] ds2dx = ds2dx[0][:, None] dsdx = ds2dx / (2 * s) df = (-dmdx * norm.cdf(z) + (dsdx * norm.pdf(z))).T if (f < 0).any(): logger.error("Expected Improvement is smaller than 0!") raise ValueError if derivative: return f, df else: return f
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = GPy.kern.Matern52(input_dim=1) model = GPyModel(kernel) model.train(X, Y) x_test = init_random_uniform(X_lower, X_upper, 3) # Shape matching predict m, v = model.predict(x_test, full_cov=True) assert len(m.shape) == 2 assert m.shape[0] == x_test.shape[0] assert m.shape[1] == 1 assert len(v.shape) == 2 assert v.shape[0] == x_test.shape[0] assert v.shape[1] == x_test.shape[0] # Check gradients dm, dv = model.predictive_gradients(x_test) assert len(dm.shape) == 2 assert dm.shape[0] == x_test.shape[0] assert dm.shape[1] == x_test.shape[1] assert len(dv.shape) == 2 assert dv.shape[0] == x_test.shape[0] assert dv.shape[1] == 1 # Shape matching function sampling x_ = np.linspace(X_lower, X_upper, 10) x_ = x_[:, np.newaxis] funcs = model.sample_functions(x_, n_funcs=2) assert len(funcs.shape) == 2 assert funcs.shape[0] == 2 assert funcs.shape[1] == x_.shape[0] # Shape matching predict variance x_test2 = np.array([np.random.rand(1)]) x_test1 = np.random.rand(10)[:, np.newaxis] var = model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test1.shape[0] assert var.shape[1] == 1 # Check compatibility with all acquisition functions acq_func = EI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = PI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = InformationGain(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) # Check compatibility with all incumbent estimation methods rec = BestObservation(model, X_lower, X_upper) inc, inc_val = rec.estimate_incumbent(None) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1
class PI(BaseAcquisitionFunction): def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs): r""" Probability of Improvement solves the following equation :math:`PI(X) := \mathbb{P}\left( f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) > \xi\right)`, where :math:`f(X^+)` is the best input found so far. Parameters ---------- model: Model object A model that implements at least - predict(X) - getCurrentBestX(). If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(PI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def update(self, model): """ This method will be called if the model is updated. Parameters ---------- model : Model object Models the objective function. """ super(PI, self).update(model) self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def compute(self, X, derivative=False, **kwargs): """ Computes the PI value and its derivatives. Parameters ---------- X: np.ndarray(1, D), The input point where the acquisition function should be evaluate. The dimensionality of X is (N, D), with N as the number of points to evaluate at and D is the number of dimensions of one X. derivative: Boolean If is set to true also the derivative of the acquisition function at X is returned Returns ------- np.ndarray(1,1) Probability of Improvement of X np.ndarray(1,D) Derivative of Probability of Improvement at X (only if derivative=True) """ if X.shape[0] > 1: logger.error("PI is only for single x inputs") return if np.any(X < self.X_lower) or np.any(X > self.X_upper): if derivative: f = 0 df = np.zeros((1, X.shape[1])) return np.array([[f]]), np.array([df]) else: return np.array([[0]]) m, v = self.model.predict(X) _, eta = self.rec.estimate_incumbent(None) s = np.sqrt(v) z = (eta - m - self.par) / s f = norm.cdf(z) if derivative: dmdx, ds2dx = self.model.predictive_gradients(X) dmdx = dmdx[0] ds2dx = ds2dx[0][:, None] dsdx = ds2dx / (2 * s) df = (-(-norm.pdf(z) / s) * (dmdx + dsdx * z)).T return f, df else: return f
class RandomSearch(BaseSolver): def __init__(self, task=None, save_dir=None, num_save=1, rng=None): """ Random Search [1] that simply evaluates random points. We do not have any priors thus we sample points uniformly at random. [1] J. Bergstra and Y. Bengio. Random search for hyper-parameter optimization. JMLR, 2012. Parameters ---------- task: TaskObject Task object that contains the objective function and additional meta information such as the lower and upper bound of the search space. num_save: int Defines after how many iteration the output is saved. save_dir: String Output path rng: numpy.random.RandomState """ if rng is None: self.rng = np.random.RandomState(42) else: self.rng = rng self.task = task self.save_dir = save_dir self.X = None self.Y = None self.estimator = BestObservation(self, self.task.X_lower, self.task.X_upper) self.time_func_eval = None self.time_overhead = None self.num_save = num_save self.model_untrained = True self.incumbent = None self.incumbents = [] self.incumbent_values = [] self.runtime = [] if self.save_dir is not None: self.create_save_dir() def run(self, num_iterations=10): """ The main optimization loop Parameters ---------- num_iterations: int The number of iterations Returns ------- np.ndarray(1,D) Incumbent np.ndarray(1,1) (Estimated) function value of the incumbent """ self.time_start = time.time() for it in range(num_iterations): logger.info("Start iteration %d ... ", it) start_time = time.time() # Choose next point to evaluate new_x = self.choose_next() time_overhead = time.time() - start_time self.time_overhead = np.append(self.time_overhead, np.array([time_overhead])) logger.info("Optimization overhead was %f seconds" % (self.time_overhead[-1])) logger.info("Evaluate candidate %s" % (str(new_x))) start_time = time.time() new_y = self.task.evaluate(new_x) time_func_eval = time.time() - start_time self.time_func_eval = np.append(self.time_func_eval, np.array([time_func_eval])) logger.info("Configuration achieved a performance of %f " % (new_y[0, 0])) logger.info("Evaluation of this configuration took %f seconds" % (self.time_func_eval[-1])) self.runtime.append(time.time() - self.time_start) # Update the data if self.X is None and self.Y is None: self.X = new_x self.Y = new_y else: self.X = np.append(self.X, new_x, axis=0) self.Y = np.append(self.Y, new_y, axis=0) # The incumbent is just the best observation we have seen so far start_time_inc = time.time() self.incumbent, self.incumbent_value = \ self.estimator.estimate_incumbent(None) self.incumbents.append(self.incumbent) self.incumbent_values.append(self.incumbent_value) logger.info("New incumbent %s found in %f seconds with " "estimated performance %f", str(self.incumbent), time.time() - start_time_inc, self.incumbent_value) if self.save_dir is not None and (it) % self.num_save == 0: self.save_iteration(it) logger.info("Return %s as incumbent with predicted performance %f" % (str(self.incumbent), self.incumbent_value)) return self.incumbent, self.incumbent_value def choose_next(self): """ Sample a new point uniformly at random. Returns ------- np.ndarray(1,D) Suggested point to evaluate """ return self.rng.uniform(self.task.X_lower, self.task.X_upper)[np.newaxis, :]
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) model = RandomForest(types=np.zeros([X_lower.shape[0]])) model.train(X, Y) x_test = init_random_uniform(X_lower, X_upper, 3) # Shape matching predict m, v = model.predict(x_test) assert len(m.shape) == 2 assert m.shape[0] == x_test.shape[0] assert m.shape[1] == 1 assert len(v.shape) == 2 assert v.shape[0] == x_test.shape[0] assert v.shape[1] == 1 # Shape matching function sampling x_ = np.linspace(X_lower, X_upper, 10) x_ = x_[:, np.newaxis] #funcs = model.sample_functions(x_, n_funcs=2) #assert len(funcs.shape) == 2 #assert funcs.shape[0] == 2 #assert funcs.shape[1] == x_.shape[0] # Check compatibility with all acquisition functions acq_func = EI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = PI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) # Check compatibility with all incumbent estimation methods rec = BestObservation(model, X_lower, X_upper) inc, inc_val = rec.estimate_incumbent(None) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1
def __init__(self, acquisition_func, model, maximize_func, task, save_dir=None, initial_design=None, initial_points=3, incumbent_estimation=None, num_save=1, train_intervall=1, n_restarts=1): """ Implementation of the standard Bayesian optimization loop that uses an acquisition function and a model to optimize a given task. This module keeps track of additional information such as runtime, optimization overhead, evaluated points and saves the output in a csv file. Parameters ---------- acquisition_func: BaseAcquisitionFunctionObject The acquisition function which will be maximized. model: ModelObject Model (i.e. GaussianProcess, RandomForest) that models our current believe of the objective function. task: TaskObject Task object that contains the objective function and additional meta information such as the lower and upper bound of the search space. save_dir: String Output path initial_design: function Function that returns some points which will be evaluated before the Bayesian optimization loop is started. This allows to initialize the model. initial_points: int Defines the number of initial points that are evaluated before the actual Bayesian optimization. incumbent_estimation: IncumbentEstimationObject, Object to estimate the incumbent based on the current model. The incumbent is the current best guess of the global optimum and is estimated in each iteration. num_save: int Defines after how many iteration the output is saved. train_intervall: int Specifies after how many iterations the model is retrained. n_restarts: int How often the incumbent estimation is repeated. """ super(BayesianOptimization, self).__init__(acquisition_func, model, maximize_func, task, save_dir) self.start_time = time.time() if initial_design == None: self.initial_design = init_random_uniform else: self.initial_design = initial_design self.X = None self.Y = None self.time_func_eval = None self.time_overhead = None self.train_intervall = train_intervall self.num_save = num_save self.time_start = None self.model_untrained = True if incumbent_estimation is None: self.estimator = BestObservation(self.model, self.task.X_lower, self.task.X_upper) else: self.estimator = incumbent_estimation self.incumbent = None self.incumbents = [] self.incumbent_values = [] self.n_restarts = n_restarts self.init_points = initial_points self.runtime = []
class RandomSearch(BaseSolver): def __init__(self, task=None, save_dir=None, num_save=1, rng=None): """ Random Search [1] that simply evaluates random points. We do not have any priors thus we sample points uniformly at random. [1] J. Bergstra and Y. Bengio. Random search for hyper-parameter optimization. JMLR, 2012. Parameters ---------- task: TaskObject Task object that contains the objective function and additional meta information such as the lower and upper bound of the search space. num_save: int Defines after how many iteration the output is saved. save_dir: String Output path rng: numpy.random.RandomState """ if rng is None: self.rng = np.random.RandomState(np.random.randint(0, 10000)) else: self.rng = rng self.task = task self.save_dir = save_dir self.X = None self.Y = None self.estimator = BestObservation(self, self.task.X_lower, self.task.X_upper) self.time_func_eval = [] self.time_overhead = [] self.num_save = num_save self.model_untrained = True self.incumbent = None self.incumbents = [] self.incumbent_values = [] self.runtime = [] if self.save_dir is not None: self.create_save_dir() def run(self, num_iterations=10): """ The main optimization loop Parameters ---------- num_iterations: int The number of iterations Returns ------- np.ndarray(1,D) Incumbent np.ndarray(1,1) (Estimated) function value of the incumbent """ self.time_start = time.time() for it in range(num_iterations): logger.info("Start iteration %d ... ", it) start_time = time.time() # Choose next point to evaluate new_x = self.choose_next() time_overhead = time.time() - start_time self.time_overhead.append(time_overhead) logger.info("Optimization overhead was %f seconds" % (self.time_overhead[-1])) logger.info("Evaluate candidate %s" % (str(new_x))) start_time = time.time() new_y = self.task.evaluate(new_x) time_func_eval = time.time() - start_time self.time_func_eval.append(time_func_eval) logger.info("Configuration achieved a performance of %f " % (new_y[0, 0])) logger.info("Evaluation of this configuration took %f seconds" % (self.time_func_eval[-1])) self.runtime.append(time.time() - self.time_start) # Update the data if self.X is None and self.Y is None: self.X = new_x self.Y = new_y else: self.X = np.append(self.X, new_x, axis=0) self.Y = np.append(self.Y, new_y, axis=0) # The incumbent is just the best observation we have seen so far start_time_inc = time.time() self.incumbent, self.incumbent_value = \ self.estimator.estimate_incumbent(None) self.incumbents.append(self.incumbent) self.incumbent_values.append(self.incumbent_value) logger.info( "New incumbent %s found in %f seconds with " "estimated performance %f", str(self.incumbent), time.time() - start_time_inc, self.incumbent_value) if self.save_dir is not None and (it) % self.num_save == 0: self.save_iteration(it) logger.info("Return %s as incumbent with predicted performance %f" % (str(self.incumbent), self.incumbent_value)) return self.incumbent, self.incumbent_value def choose_next(self): """ Sample a new point uniformly at random. Returns ------- np.ndarray(1,D) Suggested point to evaluate """ x = self.rng.uniform(self.task.X_lower, self.task.X_upper) if type(x) == np.float: return np.array([[x]]) else: return x[np.newaxis, :]
class BayesianOptimization(BaseSolver): def __init__(self, acquisition_func, model, maximize_func, task, save_dir=None, initial_design=None, initial_points=3, incumbent_estimation=None, num_save=1, train_intervall=1, n_restarts=1): """ Implementation of the standard Bayesian optimization loop that uses an acquisition function and a model to optimize a given task. This module keeps track of additional information such as runtime, optimization overhead, evaluated points and saves the output in a csv file. Parameters ---------- acquisition_func: AcquisitionFunctionObject The acquisition function which will be maximized. model: ModelObject Model (i.e. GaussianProcess, RandomForest) that models our current believe of the objective function. task: TaskObject Task object that contains the objective function and additional meta information such as the lower and upper bound of the search space. save_dir: String Output path initial_design: function Function that returns some points which will be evaluated before the Bayesian optimization loop is started. This allows to initialize the model. initial_points: int Defines the number of initial points that are evaluated before the actual Bayesian optimization. incumbent_estimation: IncumbentEstimationObject, Object to estimate the incumbent based on the current model. The incumbent is the current best guess of the global optimum and is estimated in each iteration. num_save: int Defines after how many iteration the output is saved. train_intervall: int Specifies after how many iterations the model is retrained. n_restarts: int How often the incumbent estimation is repeated. """ super(BayesianOptimization, self).__init__(acquisition_func, model, maximize_func, task, save_dir) self.start_time = time.time() if initial_design == None: self.initial_design = init_random_uniform else: self.initial_design = initial_design self.X = None self.Y = None self.time_func_eval = None self.time_overhead = None self.train_intervall = train_intervall self.num_save = num_save self.model_untrained = True if incumbent_estimation is None: self.estimator = BestObservation(self.model, self.task.X_lower, self.task.X_upper) else: self.estimator = incumbent_estimation self.incumbent = None self.incumbents = [] self.incumbent_values = [] self.n_restarts = n_restarts self.init_points = initial_points self.runtime = [] def run(self, num_iterations=10, X=None, Y=None): """ The main Bayesian optimization loop Parameters ---------- num_iterations: int The number of iterations X: np.ndarray(N,D) Initial points that are already evaluated Y: np.ndarray(N,1) Function values of the already evaluated points Returns ------- np.ndarray(1,D) Incumbent np.ndarray(1,1) (Estimated) function value of the incumbent """ # Save the time where we start the Bayesian optimization procedure self.time_start = time.time() if X is None and Y is None: self.time_func_eval = np.zeros([self.init_points]) self.time_overhead = np.zeros([self.init_points]) self.X = np.zeros([self.init_points, self.task.n_dims]) self.Y = np.zeros([self.init_points, 1]) init = self.initial_design(self.task.X_lower, self.task.X_upper, N=self.init_points) for i, x in enumerate(init): x = x[np.newaxis, :] logger.info("Evaluate: %s" % x) start_time = time.time() y = self.task.evaluate(x) self.X[i] = x[0, :] self.Y[i] = y[0, :] self.time_func_eval[i] = time.time() - start_time self.time_overhead[i] = 0.0 logger.info("Configuration achieved a performance " "of %f in %f seconds" % (self.Y[i], self.time_func_eval[i])) # Use best point seen so far as incumbent best_idx = np.argmin(self.Y) self.incumbent = np.array([self.X[best_idx]]) self.incumbent_value = np.array([self.Y[best_idx]]) self.incumbents.append(self.incumbent) self.incumbent_values.append(self.incumbent_value) self.runtime.append(time.time() - self.start_time) if self.save_dir is not None and (i) % self.num_save == 0: self.save_iteration(i, hyperparameters=None, acquisition_value=0) else: self.X = X self.Y = Y self.time_func_eval = np.zeros([self.X.shape[0]]) self.time_overhead = np.zeros([self.X.shape[0]]) # best = np.argmin(Y) # incumbent = X[best] # incumbent_value = Y[best] # self.incumbents.append(incumbent[np.newaxis, :]) # self.incumbent_values.append(incumbent_value[np.newaxis, :]) # self.runtime.append(time.time() - self.start_time) for it in range(self.init_points, num_iterations): logger.info("Start iteration %d ... ", it) start_time = time.time() # Choose next point to evaluate if it % self.train_intervall == 0: do_optimize = True else: do_optimize = False new_x = self.choose_next(self.X, self.Y, do_optimize) # Estimate current incumbent start_time_inc = time.time() startpoints = init_random_uniform(self.task.X_lower, self.task.X_upper, self.n_restarts) self.incumbent, self.incumbent_value = \ self.estimator.estimate_incumbent(startpoints) self.incumbents.append(self.incumbent) self.incumbent_values.append(self.incumbent_value) logger.info("New incumbent %s found in %f seconds with " "estimated performance %f", str(self.incumbent), time.time() - start_time_inc, self.incumbent_value) time_overhead = time.time() - start_time self.time_overhead = np.append(self.time_overhead, np.array([time_overhead])) logger.info("Optimization overhead was %f seconds" % (self.time_overhead[-1])) logger.info("Evaluate candidate %s" % (str(new_x))) start_time = time.time() new_y = self.task.evaluate(new_x) time_func_eval = time.time() - start_time self.time_func_eval = np.append(self.time_func_eval, np.array([time_func_eval])) logger.info("Configuration achieved a performance of %f " % (new_y[0, 0])) logger.info("Evaluation of this configuration took %f seconds" % (self.time_func_eval[-1])) # Update the data self.X = np.append(self.X, new_x, axis=0) self.Y = np.append(self.Y, new_y, axis=0) self.runtime.append(time.time() - self.start_time) if self.save_dir is not None and (it) % self.num_save == 0: hypers = self.model.hypers self.save_iteration( it, hyperparameters=hypers, acquisition_value=self.acquisition_func(new_x)) # TODO: Retrain model and then return the incumbent logger.info("Return %s as incumbent with predicted performance %f" % (str(self.incumbent), self.incumbent_value)) return self.incumbent, self.incumbent_value def choose_next(self, X=None, Y=None, do_optimize=True): """ Suggests a new point to evaluate. Parameters ---------- num_iterations: int The number of iterations X: np.ndarray(N,D) Initial points that are already evaluated Y: np.ndarray(N,1) Function values of the already evaluated points do_optimize: bool If true the hyperparameters of the model are optimized before the acquisition function is maximized. Returns ------- np.ndarray(1,D) Suggested point """ if X is None and Y is None: x = self.initial_design(self.task.X_lower, self.task.X_upper, N=1) elif X.shape[0] == 1: # We need at least 2 data points to train a GP x = self.initial_design(self.task.X_lower, self.task.X_upper, N=1) else: try: logger.info("Train model...") t = time.time() self.model.train(X, Y, do_optimize=do_optimize) logger.info("Time to train the model: %f", (time.time() - t)) except: logger.error("Model could not be trained", X, Y) raise self.model_untrained = False self.acquisition_func.update(self.model) logger.info("Maximize acquisition function...") t = time.time() x = self.maximize_func.maximize() logger.info("Time to maximize the acquisition function: %f", \ (time.time() - t)) return x
def __init__(self, acquisition_func, model, maximize_func, task, save_dir=None, initial_design=None, initial_points=3, incumbent_estimation=None, num_save=1, train_intervall=1, n_restarts=1): """ Implementation of the standard Bayesian optimization loop that uses an acquisition function and a model to optimize a given task. This module keeps track of additional information such as runtime, optimization overhead, evaluated points and saves the output in a csv file. Parameters ---------- acquisition_func: AcquisitionFunctionObject The acquisition function which will be maximized. model: ModelObject Model (i.e. GaussianProcess, RandomForest) that models our current believe of the objective function. task: TaskObject Task object that contains the objective function and additional meta information such as the lower and upper bound of the search space. save_dir: String Output path initial_design: function Function that returns some points which will be evaluated before the Bayesian optimization loop is started. This allows to initialize the model. initial_points: int Defines the number of initial points that are evaluated before the actual Bayesian optimization. incumbent_estimation: IncumbentEstimationObject, Object to estimate the incumbent based on the current model. The incumbent is the current best guess of the global optimum and is estimated in each iteration. num_save: int Defines after how many iteration the output is saved. train_intervall: int Specifies after how many iterations the model is retrained. n_restarts: int How often the incumbent estimation is repeated. """ super(BayesianOptimization, self).__init__(acquisition_func, model, maximize_func, task, save_dir) self.start_time = time.time() if initial_design == None: self.initial_design = init_random_uniform else: self.initial_design = initial_design self.X = None self.Y = None self.time_func_eval = None self.time_overhead = None self.train_intervall = train_intervall self.num_save = num_save self.model_untrained = True if incumbent_estimation is None: self.estimator = BestObservation(self.model, self.task.X_lower, self.task.X_upper) else: self.estimator = incumbent_estimation self.incumbent = None self.incumbents = [] self.incumbent_values = [] self.n_restarts = n_restarts self.init_points = initial_points self.runtime = []
class LogEI(BaseAcquisitionFunction): def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs): r""" Computes for a given x the logarithm expected improvement as acquisition value. Parameters ---------- model: Model object A model that implements at least - predict(X) If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(LogEI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def update(self, model): """ This method will be called if the model is updated. Parameters ---------- model : Model object Models the objective function. """ super(LogEI, self).update(model) self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def compute(self, X, derivative=False, **kwargs): """ Computes the Log EI value and its derivatives. Parameters ---------- X: np.ndarray(1, D), The input point where the acquisition function should be evaluate. The dimensionality of X is (N, D), with N as the number of points to evaluate at and D is the number of dimensions of one X. derivative: Boolean If is set to true also the derivative of the acquisition function at X is returned Not implemented yet! Returns ------- np.ndarray(1,1) Log Expected Improvement of X np.ndarray(1,D) Derivative of Log Expected Improvement at X (only if derivative=True) """ if derivative: logger.error("LogEI does not support derivative \ calculation until now") return if np.any(X < self.X_lower) or np.any(X > self.X_upper): return np.array([[- np.finfo(np.float).max]]) m, v = self.model.predict(X) _, eta = self.rec.estimate_incumbent(None) f_min = eta - self.par s = np.sqrt(v) z = (f_min - m) / s log_ei = np.zeros((m.size, 1)) for i in range(0, m.size): mu, sigma = m[i], s[i] # par_s = self.par * sigma # Degenerate case 1: first term vanishes if np.any(abs(f_min - mu)) == 0: if sigma > 0: log_ei[i] = np.log(sigma) + norm.logpdf(z[i]) else: log_ei[i] = -np.Infinity # Degenerate case 2: second term vanishes and first term # has a special form. elif sigma == 0: if mu < np.any(f_min): log_ei[i] = np.log(f_min - mu) else: log_ei[i] = -np.Infinity # Normal case else: b = np.log(sigma) + norm.logpdf(z[i]) # log(y+z) is tricky, we distinguish two cases: if np.any(f_min > mu): # When y>0, z>0, we define a=ln(y), b=ln(z). # Then y+z = exp[ max(a,b) + ln(1 + exp(-|b-a|)) ], # and thus log(y+z) = max(a,b) + ln(1 + exp(-|b-a|)) a = np.log(f_min - mu) + norm.logcdf(z[i]) log_ei[i] = max(a, b) + np.log(1 + np.exp(-abs(b - a))) else: # When y<0, z>0, we define a=ln(-y), b=ln(z), # and it has to be true that b >= a in # order to satisfy y+z>=0. # Then y+z = exp[ b + ln(exp(b-a) -1) ], # and thus log(y+z) = a + ln(exp(b-a) -1) a = np.log(mu - f_min) + norm.logcdf(z[i]) if a >= b: # a>b can only happen due to numerical inaccuracies # or approximation errors log_ei[i] = -np.Infinity else: log_ei[i] = b + np.log(1 - np.exp(a - b)) return log_ei
def test(self): X_lower = np.array([0]) X_upper = np.array([1]) X = init_random_uniform(X_lower, X_upper, 10) Y = np.sin(X) kernel = george.kernels.Matern52Kernel(np.ones([1]), ndim=1) prior = TophatPrior(-2, 2) model = GaussianProcess(kernel, prior=prior) model.train(X, Y) x_test = init_random_uniform(X_lower, X_upper, 3) # Shape matching predict m, v = model.predict(x_test) assert len(m.shape) == 2 assert m.shape[0] == x_test.shape[0] assert m.shape[1] == 1 assert len(v.shape) == 2 assert v.shape[0] == x_test.shape[0] assert v.shape[1] == x_test.shape[0] #TODO: check gradients # Shape matching function sampling x_ = np.linspace(X_lower, X_upper, 10) x_ = x_[:, np.newaxis] funcs = model.sample_functions(x_, n_funcs=2) assert len(funcs.shape) == 2 assert funcs.shape[0] == 2 assert funcs.shape[1] == x_.shape[0] # Shape matching predict variance x_test1 = np.array([np.random.rand(1)]) x_test2 = np.random.rand(10)[:, np.newaxis] var = model.predict_variance(x_test1, x_test2) assert len(var.shape) == 2 assert var.shape[0] == x_test2.shape[0] assert var.shape[1] == 1 # Check compatibility with all acquisition functions acq_func = EI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = PI(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = LCB(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) acq_func = InformationGain(model, X_upper=X_upper, X_lower=X_lower) acq_func.update(model) acq_func(x_test) # Check compatibility with all incumbent estimation methods rec = BestObservation(model, X_lower, X_upper) inc, inc_val = rec.estimate_incumbent(None) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1 rec = PosteriorMeanAndStdOptimization(model, X_lower, X_upper) startpoints = init_random_uniform(X_lower, X_upper, 4) inc, inc_val = rec.estimate_incumbent(startpoints) assert len(inc.shape) == 2 assert inc.shape[0] == 1 assert inc.shape[1] == X_upper.shape[0] assert len(inc_val.shape) == 2 assert inc_val.shape[0] == 1 assert inc_val.shape[1] == 1
class BayesianOptimization(BaseSolver): def __init__(self, acquisition_func, model, maximize_func, task, save_dir=None, initial_design=None, initial_points=3, incumbent_estimation=None, num_save=1, train_intervall=1, n_restarts=1): """ Implementation of the standard Bayesian optimization loop that uses an acquisition function and a model to optimize a given task. This module keeps track of additional information such as runtime, optimization overhead, evaluated points and saves the output in a csv file. Parameters ---------- acquisition_func: BaseAcquisitionFunctionObject The acquisition function which will be maximized. model: ModelObject Model (i.e. GaussianProcess, RandomForest) that models our current believe of the objective function. task: TaskObject Task object that contains the objective function and additional meta information such as the lower and upper bound of the search space. save_dir: String Output path initial_design: function Function that returns some points which will be evaluated before the Bayesian optimization loop is started. This allows to initialize the model. initial_points: int Defines the number of initial points that are evaluated before the actual Bayesian optimization. incumbent_estimation: IncumbentEstimationObject, Object to estimate the incumbent based on the current model. The incumbent is the current best guess of the global optimum and is estimated in each iteration. num_save: int Defines after how many iteration the output is saved. train_intervall: int Specifies after how many iterations the model is retrained. n_restarts: int How often the incumbent estimation is repeated. """ super(BayesianOptimization, self).__init__(acquisition_func, model, maximize_func, task, save_dir) self.start_time = time.time() if initial_design == None: self.initial_design = init_random_uniform else: self.initial_design = initial_design self.X = None self.Y = None self.time_func_eval = None self.time_overhead = None self.train_intervall = train_intervall self.num_save = num_save self.time_start = None self.model_untrained = True if incumbent_estimation is None: self.estimator = BestObservation(self.model, self.task.X_lower, self.task.X_upper) else: self.estimator = incumbent_estimation self.incumbent = None self.incumbents = [] self.incumbent_values = [] self.n_restarts = n_restarts self.init_points = initial_points self.runtime = [] def run(self, num_iterations=10, X=None, Y=None): """ The main Bayesian optimization loop Parameters ---------- num_iterations: int The number of iterations X: np.ndarray(N,D) Initial points that are already evaluated Y: np.ndarray(N,1) Function values of the already evaluated points Returns ------- np.ndarray(1,D) Incumbent np.ndarray(1,1) (Estimated) function value of the incumbent """ # Save the time where we start the Bayesian optimization procedure self.time_start = time.time() if X is None and Y is None: self.time_func_eval = np.zeros([self.init_points]) self.time_overhead = np.zeros([self.init_points]) self.X = np.zeros([self.init_points, self.task.n_dims]) self.Y = np.zeros([self.init_points, 1]) init = self.initial_design(self.task.X_lower, self.task.X_upper, N=self.init_points) for i, x in enumerate(init): x = x[np.newaxis, :] logger.info("Evaluate: %s" % x) start_time = time.time() y = self.task.evaluate(x) self.X[i] = x[0, :] self.Y[i] = y[0, :] self.time_func_eval[i] = time.time() - start_time self.time_overhead[i] = 0.0 logger.info("Configuration achieved a performance " "of %f in %f seconds" % (self.Y[i], self.time_func_eval[i])) # Use best point seen so far as incumbent best_idx = np.argmin(self.Y) self.incumbent = np.array([self.X[best_idx]]) self.incumbent_value = np.array([self.Y[best_idx]]) self.incumbents.append(self.incumbent) self.incumbent_values.append(self.incumbent_value) self.runtime.append(time.time() - self.start_time) if self.save_dir is not None and (i) % self.num_save == 0: self.save_iteration(i, hyperparameters=None, acquisition_value=0) self.save_json(i) #print self.X #print self.Y else: self.X = X self.Y = Y self.time_func_eval = np.zeros([self.X.shape[0]]) self.time_overhead = np.zeros([self.X.shape[0]]) self.init_points = X.shape[0] print X.shape, Y.shape for i in range(Y.shape[0]): print "Score:", Y[i][0], X[i] # best = np.argmin(Y) # incumbent = X[best] # incumbent_value = Y[best] # self.incumbents.append(incumbent[np.newaxis, :]) # self.incumbent_values.append(incumbent_value[np.newaxis, :]) # self.runtime.append(time.time() - self.start_time) it = self.init_points while it < num_iterations: self.acquisition_func.update_time(it) logger.info("Start iteration %d ... ", it) start_time = time.time() # Choose next point to evaluate if it % self.train_intervall == 0: do_optimize = True else: do_optimize = False try: new_x = self.choose_next(self.X, self.Y, do_optimize) # Estimate current incumbent start_time_inc = time.time() startpoints = init_random_uniform(self.task.X_lower, self.task.X_upper, self.n_restarts) self.incumbent, self.incumbent_value = \ self.estimator.estimate_incumbent(startpoints) self.incumbents.append(self.incumbent) self.incumbent_values.append(self.incumbent_value) logger.info( "New incumbent %s found in %f seconds with " "estimated performance %f", str(self.incumbent), time.time() - start_time_inc, self.incumbent_value) time_overhead = time.time() - start_time self.time_overhead = np.append(self.time_overhead, np.array([time_overhead])) logger.info("Optimization overhead was %f seconds" % (self.time_overhead[-1])) logger.info("Evaluate candidate %s" % (str(new_x))) start_time = time.time() new_y = self.task.evaluate(new_x) time_func_eval = time.time() - start_time self.time_func_eval = np.append(self.time_func_eval, np.array([time_func_eval])) logger.info("Configuration achieved a performance of %f " % (new_y[0, 0])) logger.info( "Evaluation of this configuration took %f seconds" % (self.time_func_eval[-1])) # Update the data self.X = np.append(self.X, new_x, axis=0) self.Y = np.append(self.Y, new_y, axis=0) self.runtime.append(time.time() - self.start_time) if self.save_dir is not None and (it) % self.num_save == 0: hypers = self.model.hypers self.save_iteration( it, hyperparameters=hypers, acquisition_value=self.acquisition_func(new_x)) self.save_json(it) it += 1 except KeyboardInterrupt: raise Exception except: print "experiment failed, retrying" # TODO: Retrain model and then return the incumbent logger.info("Return %s as incumbent with predicted performance %f" % (str(self.incumbent), self.incumbent_value)) return self.incumbent, self.incumbent_value def choose_next(self, X=None, Y=None, do_optimize=True): """ Suggests a new point to evaluate. Parameters ---------- num_iterations: int The number of iterations X: np.ndarray(N,D) Initial points that are already evaluated Y: np.ndarray(N,1) Function values of the already evaluated points do_optimize: bool If true the hyperparameters of the model are optimized before the acquisition function is maximized. Returns ------- np.ndarray(1,D) Suggested point """ if X is None and Y is None: x = self.initial_design(self.task.X_lower, self.task.X_upper, N=1) elif X.shape[0] == 1: # We need at least 2 data points to train a GP x = self.initial_design(self.task.X_lower, self.task.X_upper, N=1) else: try: logger.info("Train model...") t = time.time() self.model.train(X, Y, do_optimize=do_optimize) logger.info("Time to train the model: %f", (time.time() - t)) except: logger.error("Model could not be trained", X, Y) raise self.model_untrained = False self.acquisition_func.update(self.model) logger.info("Maximize acquisition function...") t = time.time() x = self.maximize_func.maximize() logger.info("Time to maximize the acquisition function: %f", \ (time.time() - t)) return x def get_json_data(self, it): ''' Overrides method in BaseSolver. ''' jsonData = dict() jsonData = { "optimization_overhead": None if self.time_overhead is None else self.time_overhead[it], "runtime": None if self.time_start is None else time.time() - self.time_start, "incumbent": None if self.incumbent is None else self.incumbent.tolist(), "incumbent_fval": None if self.incumbent_value is None else self.incumbent_value.tolist(), "time_func_eval": self.time_func_eval[it], "iteration": it } return jsonData
class LogEI(AcquisitionFunction): def __init__(self, model, X_lower, X_upper, par=0.01, **kwargs): r""" Computes for a given x the logarithm expected improvement as acquisition value. Parameters ---------- model: Model object A model that implements at least - predict(X) If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(LogEI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def update(self, model): """ This method will be called if the model is updated. Parameters ---------- model : Model object Models the objective function. """ super(LogEI, self).update(model) self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def compute(self, X, derivative=False, **kwargs): """ Computes the Log EI value and its derivatives. Parameters ---------- X: np.ndarray(1, D), The input point where the acquisition function should be evaluate. The dimensionality of X is (N, D), with N as the number of points to evaluate at and D is the number of dimensions of one X. derivative: Boolean If is set to true also the derivative of the acquisition function at X is returned Not implemented yet! Returns ------- np.ndarray(1,1) Log Expected Improvement of X np.ndarray(1,D) Derivative of Log Expected Improvement at X (only if derivative=True) """ if derivative: logger.error("LogEI does not support derivative \ calculation until now") return if np.any(X < self.X_lower) or np.any(X > self.X_upper): return np.array([[-np.finfo(np.float).max]]) m, v = self.model.predict(X) _, eta = self.rec.estimate_incumbent(None) f_min = eta - self.par s = np.sqrt(v) z = (f_min - m) / s log_ei = np.zeros((m.size, 1)) for i in range(0, m.size): mu, sigma = m[i], s[i] # par_s = self.par * sigma # Degenerate case 1: first term vanishes if np.any(abs(f_min - mu)) == 0: if sigma > 0: log_ei[i] = np.log(sigma) + norm.logpdf(z[i]) else: log_ei[i] = -np.Infinity # Degenerate case 2: second term vanishes and first term # has a special form. elif sigma == 0: if mu < np.any(f_min): log_ei[i] = np.log(f_min - mu) else: log_ei[i] = -np.Infinity # Normal case else: b = np.log(sigma) + norm.logpdf(z[i]) # log(y+z) is tricky, we distinguish two cases: if np.any(f_min > mu): # When y>0, z>0, we define a=ln(y), b=ln(z). # Then y+z = exp[ max(a,b) + ln(1 + exp(-|b-a|)) ], # and thus log(y+z) = max(a,b) + ln(1 + exp(-|b-a|)) a = np.log(f_min - mu) + norm.logcdf(z[i]) log_ei[i] = max(a, b) + np.log(1 + np.exp(-abs(b - a))) else: # When y<0, z>0, we define a=ln(-y), b=ln(z), # and it has to be true that b >= a in # order to satisfy y+z>=0. # Then y+z = exp[ b + ln(exp(b-a) -1) ], # and thus log(y+z) = a + ln(exp(b-a) -1) a = np.log(mu - f_min) + norm.logcdf(z[i]) if a >= b: # a>b can only happen due to numerical inaccuracies # or approximation errors log_ei[i] = -np.Infinity else: log_ei[i] = b + np.log(1 - np.exp(a - b)) return log_ei
class EI(BaseAcquisitionFunction): def __init__(self, model, X_lower, X_upper, par=0.0, **kwargs): r""" Computes for a given x the expected improvement as acquisition value. :math:`EI(X) := \mathbb{E}\left[ \max\{0, f(\mathbf{X^+}) - f_{t+1}(\mathbf{X}) - \xi\right] \} ]`, with :math:`f(X^+)` as the incumbent. Parameters ---------- model: Model object A model that implements at least - predict(X) - getCurrentBestX(). If you want to calculate derivatives than it should also support - predictive_gradients(X) X_lower: np.ndarray (D) Lower bounds of the input space X_upper: np.ndarray (D) Upper bounds of the input space compute_incumbent: func A python function that takes as input a model and returns a np.array as incumbent par: float Controls the balance between exploration and exploitation of the acquisition function. Default is 0.01 """ super(EI, self).__init__(model, X_lower, X_upper) self.par = par self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def update(self, model): """ This method will be called if the model is updated. Parameters ---------- model : Model object Models the objective function. """ super(EI, self).update(model) self.rec = BestObservation(self.model, self.X_lower, self.X_upper) def compute(self, X, derivative=False, **kwargs): """ Computes the EI value and its derivatives. Parameters ---------- X: np.ndarray(1, D), The input point where the acquisition function should be evaluate. The dimensionality of X is (N, D), with N as the number of points to evaluate at and D is the number of dimensions of one X. derivative: Boolean If is set to true also the derivative of the acquisition function at X is returned Returns ------- np.ndarray(1,1) Expected Improvement of X np.ndarray(1,D) Derivative of Expected Improvement at X (only if derivative=True) """ if X.shape[0] > 1: raise ValueError("EI is only for single test points") if len(X.shape) == 1: X = X[:, np.newaxis] if np.any(X < self.X_lower) or np.any(X > self.X_upper): if derivative: f = 0 df = np.zeros((1, X.shape[1])) return np.array([[f]]), np.array([df]) else: return np.array([[0]]) m, v = self.model.predict(X) # Use the best seen observation as incumbent _, eta = self.rec.estimate_incumbent(None) s = np.sqrt(v) if (s == 0).any(): f = np.array([[0]]) df = np.zeros((1, X.shape[1])) else: z = (eta - m - self.par) / s # f = (eta - m - self.par) * norm.cdf(z) + s * norm.pdf(z) f = s * (z * norm.cdf(z) + norm.pdf(z)) if derivative: dmdx, ds2dx = self.model.predictive_gradients(X) dmdx = dmdx[0] ds2dx = ds2dx[0][:, None] dsdx = ds2dx / (2 * s) df = (-dmdx * norm.cdf(z) + (dsdx * norm.pdf(z))).T if (f < 0).any(): logger.error("Expected Improvement is smaller than 0!") raise ValueError if derivative: return f, df else: return f