def gp_fit(M=108, seed=0): # Make data np.random.seed(seed) I = np.random.permutation(X.shape[0])[:M] gp = GaussianProcess() gp.fit(X[I, :], fX[I]) return gp
def __init__(self, opt_fun, bounds, kernel, acquisition, n_random_samples=None, X_train=None, X_pre_calc=None, Y_pre_calc=None): """ Find optima within bounds using bayesian optimization Parameters ---------- opt_fun : function call function to be optimized. bounds : list of tuples lower and upper limit for each variable. kernel : kernel call initilized kernel class. acquisition : function call function used for determining the next point. n_random_samples : int, optional number of randomly generated samples within the bounds X_train : numpy array, optional sample points to estimate X_pre_calc : numpy array, optional sample points already estimated Y_pre_calc : numpy array, optional function values for an already calculated sample """ self.opt_fun = opt_fun self.bounds = bounds self.n_vals = len(bounds) self.kernel = kernel self.acquisition = acquisition self.construct_sample(n_random_samples, X_train, X_pre_calc, Y_pre_calc) self.gpr = GaussianProcess(self.X_sample, self.Y_sample, kernel)
def test_gaussian_process(): space = Space({ "x": [[-3.0, 3.0] for _ in range(2)], "y": [[-3.0, 3.0] for _ in range(2)] }) GaussianProcess(to_maximize, space).maximize().plot(path="history_maximize.png") GaussianProcess(to_minimize, space).minimize().plot(path="history_minimize.png")
def run_demo(args): """ @brief a Gaussian Process regression example using an input covariance model """ np.random.seed(1) def f(x): """ @brief the function to predict. """ return x * np.sin(x) # the inpute data points X = np.linspace(0.1, 9.9, 20) # make the observations with added noise y = f(X).ravel() dy = 0.5 + 1.0 * np.random.random(y.shape) noise = np.random.normal(0, dy) y += noise # mesh the input space for evaluations of the prediction x = np.linspace(-2, 12, 2*len(X)) # instanciate a Gaussian Process model, allowing all params to vary gp = GaussianProcess(theta0 = [0.5, 2.0, 1.0], covfunction=args.covariance, verbose=True, fixed=[False, False, False], random_start=10) # fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y, dy) # make the prediction on the meshed x-axis y_pred, sigma = gp.predict(x) # plot the function, the prediction and the 95% confidence interval based on # the standard deviation fig = pl.figure() pl.plot(x, f(x), 'r:', label=r'$f(x) = x \ \mathrm{sin}(x)$') pl.errorbar(X.ravel(), y, dy, label='Observations') pl.plot(x, y_pred, label='Prediction') pl.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.2, fc='DarkGoldenRod', ec="None", label='95% confidence interval') pl.xlabel('$x$', fontsize=16) pl.ylabel('$f(x)$', fontsize=16) pl.ylim(-15, 20) pl.legend(loc='upper left') pl.show()
def get_estimated_mean_and_std(gp: GaussianProcess, array_samples_parameters, X): functions_samples = [] X = X.reshape((-1, gp.array_dataset.shape[1])) for num_samples, sample_gp_parameter in enumerate( array_samples_parameters): gp.set_kernel_parameters(*sample_gp_parameter.flatten()) functions_samples.append(gp.get_sample(X)) yield gp.get_sample(X) if num_samples % 50 == 0: print(f'num samples: {num_samples} \r')
def __init__(self, kernel: Kernel, objective_function: objective_functions.abstract_objective_function.ObjectiveFunction, acquisition_function: AcquisitionFunction, ): """ :param kernel: Kernel object used by the gaussian process to perform a regression. :param objective_function: ObjectiveFunction object which we will try to minimise :param acquisition_function: AcquisitionFunction object """ self._initial_kernel = copy.deepcopy(kernel) self._gaussian_process = GaussianProcess(kernel) self._objective_function = objective_function self._acquisition_function = acquisition_function
def __init__(self, discount=0.9, perf_weight=1., depth=0, n_sim=10): # Settings self.discount = discount self.n_queries = 20 self.depth = depth self.n_sim = n_sim self.n_reward_samples = 200 self.perf_sample_std = 0.001 self.runtime_sample_std = 0.001 self.perf_weight = perf_weight batch_size = 10 # create bayesian optimizer and gassian process self.kernel = SquaredExponential(n_dim=1, # assuming 1 hyperparameter init_scale_range=(.01, .1), init_amp=1.) self.gp = GaussianProcess(n_epochs=100, batch_size=batch_size, n_dim=1, # assuming 1 hyperparameter kernel=self.kernel, noise=0.01, train_noise=False, optimizer=tf.train.AdagradOptimizer(0.01), # optimizer=tf.train.GradientDescentOptimizer(0.01), verbose=0) self.bo = BayesianOptimizer(self.gp, region=np.array([[0., 1.]]), # assuming 1 hyperparameter iters=100, tries=2, optimizer=tf.train.GradientDescentOptimizer(0.1), verbose=0)
def _evaluate(self, gaussian_process: GaussianProcess, data_points: np.ndarray) -> np.ndarray: """ Evaluates the acquisition function at all the data points :param gaussian_process: :param data_points: numpy array of dimension n x m where n is the number of elements to evaluate and m is the number of variables used to calculate the objective function :return: a numpy array of shape n x 1 (or a float) representing the estimation of the acquisition function at each point """ # TODO mean_data_points, std_data_points = gaussian_process.get_gp_mean_std( data_points.reshape((-1, gaussian_process.array_dataset.shape[1]))) mean = mean_data_points mean = mean.flatten() mean_opt = np.min(gaussian_process.array_objective_function_values) difference = mean_opt - mean Z = difference / std_data_points ei = difference * norm.cdf(Z) + std_data_points * norm.pdf(Z) ei[std_data_points == 0.0] = 0.0 return ei
def rui_1d(): from kernels import SquaredExponential from gaussian_process import GaussianProcess import matplotlib.pyplot as plt batch_size = 4 new_samples = 1000 n_dim = 1 # Set up the modules for bayesian optimizer kernel = SquaredExponential(n_dim=n_dim, init_scale_range=(.1, .5), init_amp=1.) gp = GaussianProcess(n_epochs=100, batch_size=10, n_dim=n_dim, kernel=kernel, noise=0.05, train_noise=False, optimizer=tf.train.GradientDescentOptimizer(0.001), verbose=0) bo = BayesianOptimizer(gp, region=np.array([[0., 1.]]), iters=100, tries=20, optimizer=tf.train.GradientDescentOptimizer(0.1), verbose=1) X = np.array([1.0, 0.000335462627903, 0.0314978449076, 2980.95798704]).reshape(-1, 1) X = np.log(X) / 8 y = np.array([0.864695262443, 0.5, 0.860244469176, 0.862691649896]).reshape(-1, 1) X_old = X y_old = y bo.fit(X, y) x_next, y, z = bo.select() x = np.linspace(-1, 1).reshape(-1, 1) y_pred, var = gp.np_predict(x) ci = 2 * np.sqrt(var) ci = np.sqrt(var) * 2 plt.plot(x, y_pred) plt.plot(x, y_pred + ci, 'g--') plt.plot(x, y_pred - ci, 'g--') plt.scatter(X_old, y_old) plt.plot([x_next[0, 0], x_next[0, 0]], plt.ylim(), 'r--') plt.show()
def __init__(self, score_func, bounds, policy='ei', epsilon=1e-7, lambda_val=1.5, gp_params=None): assert policy == 'ei' or policy =='ucb' self.score_func = score_func self.bounds = bounds self.policy = policy self.epsilon = epsilon self.lambda_val = lambda_val # for ucb policy only if gp_params is not None: self.gp = GaussianProcess(**gp_params) else: n_params = bounds.shape[0] length_scale = 0.5 * np.ones(n_params) bounds = np.tile(np.array([1e-2, 1e2]), (n_params, 1)) kernel = RBFKernel(length_scale=length_scale, length_scale_bounds=bounds) self.gp = GaussianProcess(kernel, alpha=0.03)
def plot_vi_gp(obj, mu, Sigma, X, y): gp = GaussianProcess(GaussianLinearKernel(0., 0., 0., 0., 0., 0.), X, y) xlim, = obj.boundaries x_gt = np.linspace(xlim[0], xlim[1], 100) xx = np.linspace(xlim[0] - 2, xlim[1] + 2, 200) plt.plot(x_gt, obj.evaluate_without_noise(x_gt), c='c') plt.title(f"Gaussian Process Regression") mu = mu.flatten() for _ in range(500): sample_gp_parameter = onp.random.multivariate_normal(mu, Sigma) gp.set_kernel_parameters(*sample_gp_parameter) function_sample = gp.get_sample(xx.reshape((-1, 1))) plt.plot(xx, function_sample, alpha=0.3, c='C0') plt.scatter(gp.array_dataset, gp.array_objective_function_values, c='m', marker="+", zorder=1000, s=(30, )) plt.pause(0.01) plt.show()
def get_log_upper_proba_distribution_gp(gaussian_process: GaussianProcess, theta: np.ndarray): """ This functions evaluates log( p_1(theta | X, y) ) where: - p_1 = Z * p - p is the posterior distribution - p_1 is easy to calculate There are 2 methods that you might find useful in the class GaussianProcess: - get_log_marginal_likelihood - get_log_prior_at :param gaussian_process :param theta: parameters at which we evaluate p_1. In our example, it is a numpy array (row vector) of shape (6,). As our linear + gaussian kernel depends on 6 real numbers. :return: log( p_1(theta | X, y) ) """ # TODO log_marginal_likelihood = gaussian_process.get_log_marginal_likelihood( *theta) log_prior = gaussian_process.get_log_prior_at(*theta) return log_marginal_likelihood + log_prior
def gp_fit(M=108, seed=0, obj='lift'): # Make data np.random.seed(seed) I = np.random.permutation(X.shape[0])[:M] gp = GaussianProcess() if obj == 'lift': gp.fit(X[I, :], f_lift[I]) elif obj == 'drag': gp.fit(X[I, :], f_drag[I]) return gp
def _evaluate(self, gaussian_process: GaussianProcess, data_points: np.ndarray) -> np.ndarray: """ Evaluates the acquisition function at all the data points :param gaussian_process: :param data_points: numpy array of dimension n x m where n is the number of elements to evaluate and m is the number of variables used to calculate the objective function :return: a numpy array of shape n x 1 (or a float) representing the estimation of the acquisition function at each point """ array_objective_function_values = gaussian_process.array_objective_function_values best_objective_function_value = np.min(array_objective_function_values) mean_data_points, std_data_points = gaussian_process.get_gp_mean_std( data_points) mean_data_points = mean_data_points.reshape((-1, 1)) std_data_points = std_data_points.reshape((-1, 1)) gamma = (best_objective_function_value - mean_data_points) / std_data_points return std_data_points * (gamma * norm.cdf(gamma) + norm.pdf(gamma))
b_kri = np.zeros((nSamples, nSamples)) for i in range(nSamples): for j in range(nSamples): a_kri[j, i] = np.dot(U_comp[:, j].T, uV[:, i]) b_kri[j, i] = np.dot(G_comp[:, j].T, gV[:, i]) np.save("../results/Offline/a_kri", a_kri) np.save("../results/Offline/b_kri", b_kri) # Trained solution mean = "constant" covariance = "squared_exponential" theta_U = np.array([100000.0] * 6) theta_L = np.array([0.001] * 6) theta_0 = np.array([1.0] * 6) for i in range(nSamples): GP_u = GaussianProcess(regr=mean, corr=covariance, theta0=theta_0, thetaL=theta_L, thetaU=theta_U) GP_u.fit(pCandMax.T, a_kri[:, i]) GP_g = GaussianProcess(regr=mean, corr=covariance, theta0=theta_0, thetaL=theta_L, thetaU=theta_U) GP_g.fit(pCandMax.T, b_kri[:, i]) joblib.dump(GP_u, "../results/Offline/GP_alpha_" + str(i) + ".pkl") joblib.dump(GP_g, "../results/Offline/GP_beta_" + str(i) + ".pkl") toc = timeit.default_timer() print("KRIGING COMPUTATION TIME: " + str(toc - tic) + " s")
import numpy as np import matplotlib.pyplot as plt from gaussian_process import GaussianProcess def func(x): return np.sin(0.9 * x) g = GaussianProcess(noise_variance=1E-5) def sample_and_plot(): xtest = np.linspace(-5, 5, 100).reshape(-1, 1) ytest = func(xtest) mu, cov_posterior, s = g.predict(xtest) plt.figure() plt.clf() plt.plot(g.X, g.Y, 'r+', ms=20) plt.plot(xtest, ytest, 'b-') plt.gca().fill_between(xtest.flat, mu[:, 0] - 3 * s, mu[:, 0] + 3 * s, color="#dddddd") plt.plot(xtest, mu, 'r--', lw=2) # plt.savefig('predictive.png', bbox_inches='tight') plt.title('Mean predictions plus 3 st.deviations') plt.axis([-5, 5, -3, 3]) # draw samples from the posterior at our test points.
import numpy as np import matplotlib.pyplot as plt from scipy.stats import norm as stats_norm from gaussian_process import GaussianProcess, SquaredDistanceKernel BOUNDS = [0, 10, -10, 10] PLOT_POINT_COUNT = 1000 def func(v): return v * np.sin(v) gp = GaussianProcess(kernel=SquaredDistanceKernel(kernel_param=0.01), noise_variance=1E-3) fig = plt.figure() ax_data = fig.add_subplot(311) ax_acquisition = fig.add_subplot(312) ax_func = fig.add_subplot(313) l_mu = None l_data = None l_stddev = None l_acquisition = None l_acquisition_area = None l_func = None xx = np.linspace(BOUNDS[0], BOUNDS[1], PLOT_POINT_COUNT).reshape(-1, 1)
"--mu_memory={mu_memory}".format(mu_memory=mu_memory), "--mu_data={mu_data}".format(mu_data=mu_data), "--ant_system" ]) df = pd.read_csv(filename, index_col=0) score = int(-df["mean"].values.flatten()[0]) return score space = Space({ "oblivion": (0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), "mu_memory": [0, 1000], "mu_data": [0, 1000] }) n_calls = 1000 gp = GaussianProcess(score, space) with Notipy(): results = gp.minimize(n_calls=n_calls, n_random_starts=10, callback=[ TQDMGaussianProcess(n_calls=n_calls), DeltaYStopper(**{ "delta": 1, "n_best": 100 }) ], random_state=42, n_jobs=cpu_count()) print(gp.best_parameters) pd.DataFrame(dict(gp.best_parameters),
def gaussian_process(kernel, nb_training_points=6, nb_samples=10, plot_dist=False, plot_gt=True, save=None): """ Fit and plot a Gaussian process. # Parameters * kernel: an object deriving from gaussian_process.Kernel Kernel used to fit the GP * nb_training_points: int, default 6 How many points to fit * nb_samples: int, default 10 How many samples to draw * plot_dist: bool, default False If True, plot the mean and 95% confidence interval of the GP * plot_gt: bool, default True If True, plot the true underlying function * save: str, default None if not None, save fig under save """ # Prepare training and test set def f(x): return 2 * np.sin(2*x) / x rs = np.random.RandomState(5) x = np.linspace(0.1, 10., 50) rs.shuffle(x) x_train = x[:nb_training_points] y_train = f(x_train) x_pred = np.linspace(-1, 12., 1000) y = f(x_pred) # Fit and predict gp = GaussianProcess(kernel, random_state=rs) if nb_training_points > 0: gp.fit(x_train.reshape(-1, 1), y_train) y_pred, std_pred = gp.predict(x_pred.reshape(-1, 1), return_std=True) y_pred = y_pred.squeeze() std_pred += 1e-15 # Nobody likes 0 # Configure plot settings color = sns.diverging_palette(15, 255, n=9, s=90, l=40) fig = plt.figure(figsize=(12, 4)) sns.set_style("dark") ax = plt.Axes(fig, [0., 0., 1., 1.]) fig.add_axes(ax) # Plot ground truth if required if plot_gt: plt.plot(x_pred, y, c=color[1], lw=3, label=u'Truth') # Plot mean and 95% prediction interval if required if plot_dist: plt.plot(x_pred, y_pred, c=color[8], lw=3, label=u'Prediction', zorder=4) plt.fill(np.concatenate([x_pred, x_pred[::-1]]), np.concatenate([y_pred - 1.9600 * std_pred, (y_pred + 1.9600 * std_pred)[::-1]]), alpha=.4, fc=color[7], ec='None', label="95% prediction interval") # Plot required number of samples if nb_samples > 0: samples = gp.sample_y(x_pred.reshape(-1, 1), nb_samples) plt.plot(x_pred, samples) # Plot training set plt.scatter(x_train, y_train, facecolors=color[0], s=80, zorder=5) # More plot settings plt.xlim([-1, 12.]) plt.ylim([-5, 5.]) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) if save is None: plt.show() else: plt.savefig(save, bbox_inches='tight', pad_inches=0) plt.close()
def run_demo(args): """ @brief a Gaussian Process regression example that fits several supernovae spectra """ # read in the relevant files in correct order f1 = glob("../data/SN2011fe/11feM*") f1.sort(reverse=True) f2 = glob("../data/SN2011fe/11feP*") f2.sort() files = f1+f2 pl.ion() # fit each supernova spectrum in serial for i, f in enumerate(files): file_root = os.path.splitext(os.path.basename(f))[0] time = float(file_root[-3:])/10. if 'M' in file_root: time *= -1. # load the data from inputdata.txt X, Y, Yerr = np.loadtxt(f, unpack=True) # save these for later X_tot = X.copy() Y_tot = Y.copy() Yerr_tot = Yerr.copy() n_eval = len(X) batch_size = args.batch_size resolution = args.resolution xrec_full = [] yrec_full = [] yerr_full = [] # instanciate a Gaussian Process model, allowing all params to vary gp = GaussianProcess(theta0 = [1e-13, 2.0, 1e-13], covfunction=args.covariance, verbose=True, fixed=[False, False, False]) nbatches = max(1, n_eval / batch_size + 1) # fit the spectra in batches along the x axis for k in range(nbatches): batch_from = k * batch_size batch_to = min([(k + 1) * batch_size + 1, n_eval + 1]) if k == nbatches-1: batch_to = len(X_tot) xmin = np.amin(X_tot[batch_from:batch_to]) xmax = np.amax(X_tot[batch_from:batch_to]) nstar = len(X_tot[batch_from:batch_to])*resolution batch_to += 0.1*batch_size batch_from -= 0.1*batch_size if batch_from < 0: batch_from = 0 X = X_tot[batch_from:batch_to] Y = Y_tot[batch_from:batch_to] Yerr = Yerr_tot[batch_from:batch_to] # mesh the input space for evaluations of the prediction x = np.linspace(xmin, xmax, nstar) # fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, Y, Yerr) # make the prediction on the meshed x-axis y_pred, sigma = gp.predict(x) xrec_full += list(x) yrec_full += list(y_pred) yerr_full += list(sigma) yerr_full = np.array(yerr_full) yrec_full = np.array(yrec_full) # plot the function, the prediction and the 95% confidence interval based on # the standard deviation pl.cla() pl.plot(X_tot, Y_tot, label='Observations') pl.plot(xrec_full, yrec_full, label='Prediction') pl.fill(np.concatenate([xrec_full, xrec_full[::-1]]), np.concatenate([yrec_full - 1.9600 * yerr_full, (yrec_full + 1.9600 * yerr_full)[::-1]]), alpha=0.5, fc='DarkGoldenRod', ec="None", label='95% confidence interval') pl.xlabel(r'$\lambda \ (\AA)$', fontsize=16) pl.ylabel('$\mathrm{Flux \ (erg/s/cm^2/\AA)}$', fontsize=16) pl.legend(loc='upper right') pl.title("SNe 2011fe %+.1f days relative to B-band max" %time) pl.ylim(-0.2e-12, 1.2e-12) pl.savefig("figures/SN11fe_%02d.png" %i) pl.draw()
class BayesianOptimization(object): def __init__(self, score_func, bounds, policy='ei', epsilon=1e-7, lambda_val=1.5, gp_params=None): assert policy == 'ei' or policy =='ucb' self.score_func = score_func self.bounds = bounds self.policy = policy self.epsilon = epsilon self.lambda_val = lambda_val # for ucb policy only if gp_params is not None: self.gp = GaussianProcess(**gp_params) else: n_params = bounds.shape[0] length_scale = 0.5 * np.ones(n_params) bounds = np.tile(np.array([1e-2, 1e2]), (n_params, 1)) kernel = RBFKernel(length_scale=length_scale, length_scale_bounds=bounds) self.gp = GaussianProcess(kernel, alpha=0.03) def clone(self): cloned_obj = BayesianOptimization(self.score_func, self.bounds, self.policy, self.epsilon, self.lambda_val) cloned_obj.gp = self.gp.clone() return cloned_obj def fit(self, n_iter=10, x0=None, n_pre_samples=5, random_search=False): """ Apply Bayesian Optimization to find the optimal parameter """ if x0 is None: assert n_pre_samples is not None and n_pre_samples > 0 if random_search: assert random_search > 1 n_params = self.bounds.shape[0] x_list = [] y_list = [] if x0 is None: for params in np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(n_pre_samples, n_params)): x_list.append(params) y_list.append(self.score_func(params)) else: for params in x0: x_list.append(params) y_list.append(self.score_func(params)) X = np.atleast_2d(np.array(x_list)) y = np.array(y_list) for i in range(n_iter): self.gp.fit(X, y) if random_search: x_candidates = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(random_search, n_params)) acquisitions = -self.acquisition_function(x_candidates, y, n_params, self.policy) next_sample = x_candidates[np.argmax(acquisitions)] else: next_sample = self.sample_next_hyperparameter(self.acquisition_function, y, n_restart=10, policy=self.policy) if np.any(np.abs(next_sample - X) <= self.epsilon): next_sample = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1]) x_list.append(next_sample) y_list.append(self.score_func(next_sample)) X = np.atleast_2d(np.array(x_list)) y = np.array(y_list) self.X_search = X self.y_search = y def optimal(self): return self.X_search[np.argmax(self.y_search)], np.max(self.y_search) def get_iteration_history(self): return self.X_search, self.y_search def acquisition_function(self, X, y, n_params, policy): if policy == 'ei': return self.negative_expected_improvement(X, y, n_params) elif self.policy == 'ucb': return self.negative_upper_confidence_bound(X, y, n_params) else: raise ValueError("unknown policy {0:}".format(self.policy)) def negative_expected_improvement(self, X, y, n_params): X = np.reshape(X, (-1, n_params)) mu, Sigma = self.gp.predict(X, return_cov=True) sigma = np.sqrt(np.diag(Sigma)) mu = mu.ravel() sigma = sigma.ravel() f_best = np.max(y) Z = (mu - f_best) / sigma ei = (mu - f_best) * norm.cdf(Z) + sigma * norm.pdf(-Z) ei[sigma == 0.0] = 0.0 return -ei def negative_upper_confidence_bound(self, X, y, n_params): X = np.reshape(X, (-1, n_params)) mu, Sigma = self.gp.predict(X, return_cov=True) sigma = np.sqrt(np.diag(Sigma)) mu = mu.ravel() sigma = sigma.ravel() ucb = mu + self.lambda_val * sigma return -ucb def sample_next_hyperparameter(self, acquisition_function, y, n_restart, policy): n_params = self.bounds.shape[0] best_x = None best_acquisition_value = 100.0 for initial_value in np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(n_restart, n_params)): res = minimize(fun=acquisition_function, x0=initial_value, bounds=self.bounds, method='L-BFGS-B', args=(y, n_params, policy)) if res.fun < best_acquisition_value: best_acquisition_value = res.fun best_x = res.x return best_x
class BayesianOptimization: def __init__(self, opt_fun, bounds, kernel, acquisition, n_random_samples=None, X_train=None, X_pre_calc=None, Y_pre_calc=None): """ Find optima within bounds using bayesian optimization Parameters ---------- opt_fun : function call function to be optimized. bounds : list of tuples lower and upper limit for each variable. kernel : kernel call initilized kernel class. acquisition : function call function used for determining the next point. n_random_samples : int, optional number of randomly generated samples within the bounds X_train : numpy array, optional sample points to estimate X_pre_calc : numpy array, optional sample points already estimated Y_pre_calc : numpy array, optional function values for an already calculated sample """ self.opt_fun = opt_fun self.bounds = bounds self.n_vals = len(bounds) self.kernel = kernel self.acquisition = acquisition self.construct_sample(n_random_samples, X_train, X_pre_calc, Y_pre_calc) self.gpr = GaussianProcess(self.X_sample, self.Y_sample, kernel) def opt_fun_iter(self, x): return np.array([self.opt_fun(x[i,:]) for i in range(x.shape[0])]) def construct_sample(self, n_random_samples, X_train, X_pre_calc, Y_pre_calc): """ Function for combining all sample values and creating random input values and their corresponding outputs. """ if isinstance(X_train, np.ndarray): Y_train = self.opt_fun_iter(X_train) elif X_train == None: X_train = np.empty((0, self.n_vals)) Y_train = np.empty(0) else: ValueError("X_train needs to be a numpy array.") if type(n_random_samples) in [int, float]: X_rand = np.random.random((n_random_samples, self.n_vals)) for i, val in enumerate(self.bounds): X_rand[:,i] *= val[1] - val[0] X_rand[:,i] += val[0] # implement checking for duplicates Y_rand = self.opt_fun_iter(X_rand) else: X_rand = np.empty((0, self.n_vals)) Y_rand = np.empty(0) if not isinstance(X_pre_calc, np.ndarray): X_pre_calc = np.empty((0, self.n_vals)) Y_pre_calc = np.empty(0) else: Y_pre_calc = Y_pre_calc.reshape(-1) self.X_sample = np.concatenate((X_pre_calc, X_train, X_rand), axis=0) self.Y_sample = np.concatenate((Y_pre_calc, Y_train, Y_rand), axis=0) def next_location(self, n_restarts): """ Determine the next location for evaluation by minimizing the negative acquisition function Parameters ---------- n_restarts : int number of times the minimization algorithm should be run with random initial values. Returns ------- numpy array with next values """ dim = self.X_sample.shape[1] min_val = 1 min_x = None def min_obj(X): return -self.acquisition(X, self.X_sample, self.Y_sample, self.gpr) # Find the best optimum by starting from n_restart different random points. for x0 in np.random.uniform(np.asarray(self.bounds)[:, 0], np.asarray(self.bounds)[:, 1], size=(n_restarts, dim)): try: res = minimize(min_obj, x0=x0, bounds=self.bounds, method='L-BFGS-B') val = res.fun[0] except ValueError: val = np.inf if val < min_val: min_val = val min_x = res.x return min_x#.reshape(1, -1) def search(self, n_iter, re_opt_gpr, n_restarts, print_progress=True): """ Search for the best function value Parameters ---------- n_iter : int how many new function evaluations are made re_opt_gpr : int after how many trials should the kernel hyper parameters be optimized n_restarts : int number of times the minimization algorithm should be run with random initial values when determining the next location. print_progress : bool, optional True if the current iteration values should be printed Returns ------- ind : int index of the best location. X_opt : numpy array best location. Y_opt : float best function vlaue. """ for i in range(n_iter): if i % re_opt_gpr: self.gpr.kernel.hyper = np.exp(self.gpr.opt_kernel_hyper()) X_new = self.next_location(n_restarts=n_restarts) Y_new = self.opt_fun(X_new).reshape(1,) self.X_sample = np.concatenate((self.X_sample, X_new.reshape(1,-1)), axis=0) self.Y_sample = np.concatenate((self.Y_sample, Y_new), axis=0) self.gpr.update_train_data(self.X_sample, self.Y_sample) if print_progress: print(f"Iteration {i}: Current function value {Y_new[0]} and max value of {np.max(self.Y_sample)}") ind = self.Y_sample.argmax() Y_opt = self.Y_sample[ind] X_opt = self.X_sample[ind,:] return ind, X_opt, Y_opt
X,observations = load_air() # Separate the values we're regressing on from the dates indexes = np.array(X[3], dtype=np.int32) dates = X[4] X = np.array(X[0:3], dtype=np.float64) # Make the data be zero-mean y_mean = observations.mean() y = observations - y_mean # Create a partial function so we can try multiple data points easily sqexp = partial(squared_exponential, BANDWIDTH, y.std(), TAU2) # Create our Gaussian process with zero mean and squared exponential covariance gp = GaussianProcess(lambda x: np.zeros(x.shape[0]), sqexp) # Calculate the initial features considering only features with lower indices error = np.array(y) features = [] for i,x in enumerate(X): f_i = gp.predict(x, x, error, error.std(), percentile=None)[0] features.append(f_i) error -= f_i features = np.array(features) # Track the squared error of the estimates mse = (error * error).mean() mse_delta = mse print 'Initial mean squared error: {0}'.format(mse)
import numpy as np import matplotlib.pyplot as plt from gaussian_process import GaussianProcess def func(x): return np.sin(0.9 * x) g = GaussianProcess(noise_variance=1E-5) def sample_and_plot(): xtest = np.linspace(-5, 5, 100).reshape(-1, 1) ytest = func(xtest) mu, cov_posterior, s = g.predict(xtest) plt.figure() plt.clf() plt.plot(g.X, g.Y, 'r+', ms=20) plt.plot(xtest, ytest, 'b-') plt.gca().fill_between(xtest.flat, mu[:, 0] - 3*s, mu[:, 0] + 3*s, color="#dddddd") plt.plot(xtest, mu, 'r--', lw=2) # plt.savefig('predictive.png', bbox_inches='tight') plt.title('Mean predictions plus 3 st.deviations') plt.axis([-5, 5, -3, 3]) # draw samples from the posterior at our test points. L = np.linalg.cholesky(cov_posterior) f_post = mu + np.dot(L, np.random.normal(size=(xtest.shape[0], 10))) plt.figure()
save_dir = 'gp_saves' env = gym.make('CartPole-v0') discount_factors = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99] learning_rates = [0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2] memory_sizes = [1000, 10000, 100000] update_frequencies = [1, 10, 50, 100] n_layers = [0, 1, 2] n_units = [4, 8, 16, 32, 64] param_space = [ discount_factors, learning_rates, memory_sizes, update_frequencies, n_layers, n_units ] gp = GaussianProcess(space_dim=len(param_space), length_scale=0.5, noise=0.1, standardize=True) # Uncomment this to start from saved values #known_points, known_values = load(save_dir) #gp.add_points(known_points, known_values) #eval_point = gp.most_likely_max(param_space) eval_point = [ 0.99, # df 0.005, # lr 1000, # memsize 1, # freq 0, # n layers 32 ] # n units while True:
import matplotlib.pyplot as plt from scipy.stats import norm as stats_norm from gaussian_process import GaussianProcess, SquaredDistanceKernel, Matern52Kernel BOUNDS = [0, 10, -10, 10] PLOT_POINT_COUNT = 1000 def func(v): return v * np.sin(v) # gp = GaussianProcess(kernel=SquaredDistanceKernel(kernel_param=0.01), noise_variance=1E-3) gp = GaussianProcess(kernel=Matern52Kernel(kernel_param=0.01), noise_variance=1E-3) fig = plt.figure() ax_data = fig.add_subplot(311) ax_acquisition = fig.add_subplot(312) ax_func = fig.add_subplot(313) l_mu = None l_data = None l_stddev = None l_acquisition = None l_acquisition_area = None l_func = None xx = np.linspace(BOUNDS[0], BOUNDS[1], PLOT_POINT_COUNT).reshape(-1, 1)
class BayesianOptimisation(object): def __init__(self, kernel: Kernel, objective_function: objective_functions.abstract_objective_function.ObjectiveFunction, acquisition_function: AcquisitionFunction, ): """ :param kernel: Kernel object used by the gaussian process to perform a regression. :param objective_function: ObjectiveFunction object which we will try to minimise :param acquisition_function: AcquisitionFunction object """ self._initial_kernel = copy.deepcopy(kernel) self._gaussian_process = GaussianProcess(kernel) self._objective_function = objective_function self._acquisition_function = acquisition_function def _initialise_gaussian_process(self, array_initial_dataset: np.ndarray, array_initial_objective_function_values: np.ndarray ) -> None: """ Initialise the gaussian process with its initial dataset :param array_initial_dataset: array representing all the data points used to calculate the posterior mean and variance of the GP. Its dimension is n x l, there are: - n elements in the dataset. Each row corresponds to a data point x_i (with 1<=i<=n), at which the objective function can be evaluated - each one of them is of dimension l (representing the number of variables required by the objective function) :param array_initial_objective_function_values: array of the evaluations for all the elements in array_dataset. Its shape is hence n x 1 (it's a column vector) """ self._gaussian_process.initialise_dataset(array_initial_dataset, array_initial_objective_function_values) def run(self, number_steps: int, array_initial_dataset: np.ndarray, array_initial_objective_function_values: np.ndarray, ) -> None: """ Generator that performs a bayesian optimisation This method is a generator: at every step, it yields a tuple containing 3 elements: - the current up-to-date gaussian process - the acquisition function - the last computed argmax of the acquisition function. Hence, in order to use this method, you need to put it in a for loop, for gp, af, arg_max in bo.run(): # Here, bo is a BayesianOptimisation object # some code here :param number_steps: number of steps to execute in the Bayesian Optimisation procedure. :param array_initial_dataset: array_initial_dataset: array representing all the data points used to calculate the posterior mean and variance of the GP. Its dimension is n x l, there are: - n elements in the dataset. Each row corresponds to a data point x_i (with 1<=i<=n), at which the objective function can be evaluated - each one of them is of dimension l (representing the number of variables required by the objective function) :param array_initial_objective_function_values: array of the evaluations for all the elements in array_dataset. Its shape is hence n x 1 (it's a column vector) """ print(f"Step {0}/{number_steps} - Initialise Gaussian Process for Provided Dataset") self._initialise_gaussian_process(array_initial_dataset, array_initial_objective_function_values) arg_max_acquisition_function = self.compute_arg_max_acquisition_function() for index_step in range(number_steps): print(f"Step {index_step}/{number_steps} - Evaluating Objective Function at position {arg_max_acquisition_function.tolist()}") arg_max_acquisition_function = self._bayesian_optimisation_step(arg_max_acquisition_function) # The yield keyword makes the method behave like a generator yield self._gaussian_process, self._acquisition_function, arg_max_acquisition_function def _bayesian_optimisation_step(self, arg_max_acquisition_function: np.ndarray ) -> np.ndarray: """ :param arg_max_acquisition_function: the previously computed argmax of the acquisition function :return: the next computed arg_max of the acquisition function after having updated the Gaussian Process """ # TODO arg_max_acquisition_function # Add new data point self._gaussian_process.add_data_point(arg_max_acquisition_function, self._objective_function.evaluate(arg_max_acquisition_function)) # Update gaussian process and optimise parameters self.reinitialise_kernel() self._gaussian_process.optimise_parameters() # compute argmax return self.compute_arg_max_acquisition_function() def get_best_data_point(self) -> np.ndarray: index_best_data_point = np.argmin(self._gaussian_process.array_objective_function_values) return self._gaussian_process.array_dataset[index_best_data_point] def compute_arg_max_acquisition_function(self) -> np.ndarray: return self._acquisition_function.compute_arg_max( gaussian_process=self._gaussian_process, objective_function=self._objective_function ) def reinitialise_kernel(self) -> None: self._gaussian_process.set_kernel_parameters(self._initial_kernel.log_amplitude, self._initial_kernel.log_length_scale, self._initial_kernel.log_noise_scale)
def main_1d(): from kernels import SquaredExponential from gaussian_process import GaussianProcess import matplotlib.pyplot as plt import time np.random.seed(10) tf.set_random_seed(10) # Settings n_samples = 3 batch_size = 4 new_samples = 1000 n_dim = 1 # Set up the modules for bayesian optimizer kernel = SquaredExponential(n_dim=n_dim, init_scale_range=(.1, .5), init_amp=1.) gp = GaussianProcess(n_epochs=100, batch_size=10, n_dim=n_dim, kernel=kernel, noise=0.01, train_noise=False, optimizer=tf.train.GradientDescentOptimizer(0.001), verbose=0) bo = BayesianOptimizer(gp, region=np.array([[0., 1.]]), iters=100, tries=2, optimizer=tf.train.GradientDescentOptimizer(0.1), verbose=1) # Define the latent function + noise def observe(X): y = np.float32(1 * (-(X - 0.5)**2 + 1) + np.random.normal(0, .1, [X.shape[0], 1])) # y = np.float32((np.sin(X.sum(1)).reshape([X.shape[0], 1]) + # np.random.normal(0,.1, [X.shape[0], 1]))) return y # Get data X = np.float32(np.random.uniform(0, 1, [n_samples, n_dim])) y = observe(X) plt.axis((-0.1, 1.1, 0, 1.5)) # Fit the gp bo.fit(X, y) for i in xrange(5): # print "Iteration {0:3d}".format(i) + "*"*80 t0 = time.time() max_acq = -np.inf # Inner loop to allow for gd with random initializations multiple times x_next, y_next, acq_next = bo.select() # Plot the selected point plt.plot([x_next[0, 0], x_next[0, 0]], plt.ylim(), 'r--') plt.scatter(x_next, y_next, c='r', linewidths=0, s=50) plt.scatter(x_next, acq_next, c='g', linewidths=0, s=50) # Observe and add point to observed data y_obs = observe(x_next) X = np.vstack((X, x_next)) y = np.vstack((y, y_obs)) t2 = time.time() # Fit again bo.fit(X, y) print "BOFitDuration: {0:.5f}".format(time.time() - t2) print "BOTotalDuration: {0:.5f}".format(time.time() - t0) # Get the final posterior mean and variance for the entire domain space X_new = np.float32(np.linspace(0, 1, new_samples).reshape(-1, 1)) X_new = np.sort(X_new, axis=0) y_pred, var = gp.np_predict(X_new) # Compute the confidence interval ci = np.sqrt(var) * 2 plt.plot(X_new, y_pred) plt.plot(X_new, y_pred + ci, 'g--') plt.plot(X_new, y_pred - ci, 'g--') plt.scatter(X, y) plt.show()
plt.plot(xx, function_sample, alpha=0.3, c='C0') plt.scatter(gp.array_dataset, gp.array_objective_function_values, c='m', marker="+", zorder=1000, s=(30, )) plt.pause(0.05) plt.show() if __name__ == '__main__': # obj = UnivariateObjectiveFunction() np.random.seed(207) obj = LinearSin(0.5) initial_dataset = obj.get_uniform_dataset(21).reshape((-1, 1)) evaluations = obj(initial_dataset) # we use a linear combination of a gaussian and a linear kernel: k = k_gaussian + k_linear # Then, there are 4 parameters to sample from in the posterior distribution kernel = GaussianLinearKernel(0., 0., 0., 0., 0., 0.) gp = GaussianProcess(kernel, initial_dataset, evaluations) test_metropolis_hastings(obj, gp, 100, sigma_exploration_mh=0.4, number_hyperparameters_gaussian_process=6)