def run_demo(args): """ @brief a Gaussian Process regression example using an input covariance model """ np.random.seed(1) def f(x): """ @brief the function to predict. """ return x * np.sin(x) # the inpute data points X = np.linspace(0.1, 9.9, 20) # make the observations with added noise y = f(X).ravel() dy = 0.5 + 1.0 * np.random.random(y.shape) noise = np.random.normal(0, dy) y += noise # mesh the input space for evaluations of the prediction x = np.linspace(-2, 12, 2*len(X)) # instanciate a Gaussian Process model, allowing all params to vary gp = GaussianProcess(theta0 = [0.5, 2.0, 1.0], covfunction=args.covariance, verbose=True, fixed=[False, False, False], random_start=10) # fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y, dy) # make the prediction on the meshed x-axis y_pred, sigma = gp.predict(x) # plot the function, the prediction and the 95% confidence interval based on # the standard deviation fig = pl.figure() pl.plot(x, f(x), 'r:', label=r'$f(x) = x \ \mathrm{sin}(x)$') pl.errorbar(X.ravel(), y, dy, label='Observations') pl.plot(x, y_pred, label='Prediction') pl.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.2, fc='DarkGoldenRod', ec="None", label='95% confidence interval') pl.xlabel('$x$', fontsize=16) pl.ylabel('$f(x)$', fontsize=16) pl.ylim(-15, 20) pl.legend(loc='upper left') pl.show()
class BayesianOptimization(object): def __init__(self, score_func, bounds, policy='ei', epsilon=1e-7, lambda_val=1.5, gp_params=None): assert policy == 'ei' or policy =='ucb' self.score_func = score_func self.bounds = bounds self.policy = policy self.epsilon = epsilon self.lambda_val = lambda_val # for ucb policy only if gp_params is not None: self.gp = GaussianProcess(**gp_params) else: n_params = bounds.shape[0] length_scale = 0.5 * np.ones(n_params) bounds = np.tile(np.array([1e-2, 1e2]), (n_params, 1)) kernel = RBFKernel(length_scale=length_scale, length_scale_bounds=bounds) self.gp = GaussianProcess(kernel, alpha=0.03) def clone(self): cloned_obj = BayesianOptimization(self.score_func, self.bounds, self.policy, self.epsilon, self.lambda_val) cloned_obj.gp = self.gp.clone() return cloned_obj def fit(self, n_iter=10, x0=None, n_pre_samples=5, random_search=False): """ Apply Bayesian Optimization to find the optimal parameter """ if x0 is None: assert n_pre_samples is not None and n_pre_samples > 0 if random_search: assert random_search > 1 n_params = self.bounds.shape[0] x_list = [] y_list = [] if x0 is None: for params in np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(n_pre_samples, n_params)): x_list.append(params) y_list.append(self.score_func(params)) else: for params in x0: x_list.append(params) y_list.append(self.score_func(params)) X = np.atleast_2d(np.array(x_list)) y = np.array(y_list) for i in range(n_iter): self.gp.fit(X, y) if random_search: x_candidates = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(random_search, n_params)) acquisitions = -self.acquisition_function(x_candidates, y, n_params, self.policy) next_sample = x_candidates[np.argmax(acquisitions)] else: next_sample = self.sample_next_hyperparameter(self.acquisition_function, y, n_restart=10, policy=self.policy) if np.any(np.abs(next_sample - X) <= self.epsilon): next_sample = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1]) x_list.append(next_sample) y_list.append(self.score_func(next_sample)) X = np.atleast_2d(np.array(x_list)) y = np.array(y_list) self.X_search = X self.y_search = y def optimal(self): return self.X_search[np.argmax(self.y_search)], np.max(self.y_search) def get_iteration_history(self): return self.X_search, self.y_search def acquisition_function(self, X, y, n_params, policy): if policy == 'ei': return self.negative_expected_improvement(X, y, n_params) elif self.policy == 'ucb': return self.negative_upper_confidence_bound(X, y, n_params) else: raise ValueError("unknown policy {0:}".format(self.policy)) def negative_expected_improvement(self, X, y, n_params): X = np.reshape(X, (-1, n_params)) mu, Sigma = self.gp.predict(X, return_cov=True) sigma = np.sqrt(np.diag(Sigma)) mu = mu.ravel() sigma = sigma.ravel() f_best = np.max(y) Z = (mu - f_best) / sigma ei = (mu - f_best) * norm.cdf(Z) + sigma * norm.pdf(-Z) ei[sigma == 0.0] = 0.0 return -ei def negative_upper_confidence_bound(self, X, y, n_params): X = np.reshape(X, (-1, n_params)) mu, Sigma = self.gp.predict(X, return_cov=True) sigma = np.sqrt(np.diag(Sigma)) mu = mu.ravel() sigma = sigma.ravel() ucb = mu + self.lambda_val * sigma return -ucb def sample_next_hyperparameter(self, acquisition_function, y, n_restart, policy): n_params = self.bounds.shape[0] best_x = None best_acquisition_value = 100.0 for initial_value in np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(n_restart, n_params)): res = minimize(fun=acquisition_function, x0=initial_value, bounds=self.bounds, method='L-BFGS-B', args=(y, n_params, policy)) if res.fun < best_acquisition_value: best_acquisition_value = res.fun best_x = res.x return best_x
# Make the data be zero-mean y_mean = observations.mean() y = observations - y_mean # Create a partial function so we can try multiple data points easily sqexp = partial(squared_exponential, BANDWIDTH, y.std(), TAU2) # Create our Gaussian process with zero mean and squared exponential covariance gp = GaussianProcess(lambda x: np.zeros(x.shape[0]), sqexp) # Calculate the initial features considering only features with lower indices error = np.array(y) features = [] for i,x in enumerate(X): f_i = gp.predict(x, x, error, error.std(), percentile=None)[0] features.append(f_i) error -= f_i features = np.array(features) # Track the squared error of the estimates mse = (error * error).mean() mse_delta = mse print 'Initial mean squared error: {0}'.format(mse) # Iterate until convergence is reached iteration = 1 while mse_delta >= CONVERGENCE_THRESHOLD: # Calculate the features using the previous step's values where necessary for i,x in enumerate(X):
def run_demo(args): """ @brief a Gaussian Process regression example that fits several supernovae spectra """ # read in the relevant files in correct order f1 = glob("../data/SN2011fe/11feM*") f1.sort(reverse=True) f2 = glob("../data/SN2011fe/11feP*") f2.sort() files = f1+f2 pl.ion() # fit each supernova spectrum in serial for i, f in enumerate(files): file_root = os.path.splitext(os.path.basename(f))[0] time = float(file_root[-3:])/10. if 'M' in file_root: time *= -1. # load the data from inputdata.txt X, Y, Yerr = np.loadtxt(f, unpack=True) # save these for later X_tot = X.copy() Y_tot = Y.copy() Yerr_tot = Yerr.copy() n_eval = len(X) batch_size = args.batch_size resolution = args.resolution xrec_full = [] yrec_full = [] yerr_full = [] # instanciate a Gaussian Process model, allowing all params to vary gp = GaussianProcess(theta0 = [1e-13, 2.0, 1e-13], covfunction=args.covariance, verbose=True, fixed=[False, False, False]) nbatches = max(1, n_eval / batch_size + 1) # fit the spectra in batches along the x axis for k in range(nbatches): batch_from = k * batch_size batch_to = min([(k + 1) * batch_size + 1, n_eval + 1]) if k == nbatches-1: batch_to = len(X_tot) xmin = np.amin(X_tot[batch_from:batch_to]) xmax = np.amax(X_tot[batch_from:batch_to]) nstar = len(X_tot[batch_from:batch_to])*resolution batch_to += 0.1*batch_size batch_from -= 0.1*batch_size if batch_from < 0: batch_from = 0 X = X_tot[batch_from:batch_to] Y = Y_tot[batch_from:batch_to] Yerr = Yerr_tot[batch_from:batch_to] # mesh the input space for evaluations of the prediction x = np.linspace(xmin, xmax, nstar) # fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, Y, Yerr) # make the prediction on the meshed x-axis y_pred, sigma = gp.predict(x) xrec_full += list(x) yrec_full += list(y_pred) yerr_full += list(sigma) yerr_full = np.array(yerr_full) yrec_full = np.array(yrec_full) # plot the function, the prediction and the 95% confidence interval based on # the standard deviation pl.cla() pl.plot(X_tot, Y_tot, label='Observations') pl.plot(xrec_full, yrec_full, label='Prediction') pl.fill(np.concatenate([xrec_full, xrec_full[::-1]]), np.concatenate([yrec_full - 1.9600 * yerr_full, (yrec_full + 1.9600 * yerr_full)[::-1]]), alpha=0.5, fc='DarkGoldenRod', ec="None", label='95% confidence interval') pl.xlabel(r'$\lambda \ (\AA)$', fontsize=16) pl.ylabel('$\mathrm{Flux \ (erg/s/cm^2/\AA)}$', fontsize=16) pl.legend(loc='upper right') pl.title("SNe 2011fe %+.1f days relative to B-band max" %time) pl.ylim(-0.2e-12, 1.2e-12) pl.savefig("figures/SN11fe_%02d.png" %i) pl.draw()
def gaussian_process(kernel, nb_training_points=6, nb_samples=10, plot_dist=False, plot_gt=True, save=None): """ Fit and plot a Gaussian process. # Parameters * kernel: an object deriving from gaussian_process.Kernel Kernel used to fit the GP * nb_training_points: int, default 6 How many points to fit * nb_samples: int, default 10 How many samples to draw * plot_dist: bool, default False If True, plot the mean and 95% confidence interval of the GP * plot_gt: bool, default True If True, plot the true underlying function * save: str, default None if not None, save fig under save """ # Prepare training and test set def f(x): return 2 * np.sin(2*x) / x rs = np.random.RandomState(5) x = np.linspace(0.1, 10., 50) rs.shuffle(x) x_train = x[:nb_training_points] y_train = f(x_train) x_pred = np.linspace(-1, 12., 1000) y = f(x_pred) # Fit and predict gp = GaussianProcess(kernel, random_state=rs) if nb_training_points > 0: gp.fit(x_train.reshape(-1, 1), y_train) y_pred, std_pred = gp.predict(x_pred.reshape(-1, 1), return_std=True) y_pred = y_pred.squeeze() std_pred += 1e-15 # Nobody likes 0 # Configure plot settings color = sns.diverging_palette(15, 255, n=9, s=90, l=40) fig = plt.figure(figsize=(12, 4)) sns.set_style("dark") ax = plt.Axes(fig, [0., 0., 1., 1.]) fig.add_axes(ax) # Plot ground truth if required if plot_gt: plt.plot(x_pred, y, c=color[1], lw=3, label=u'Truth') # Plot mean and 95% prediction interval if required if plot_dist: plt.plot(x_pred, y_pred, c=color[8], lw=3, label=u'Prediction', zorder=4) plt.fill(np.concatenate([x_pred, x_pred[::-1]]), np.concatenate([y_pred - 1.9600 * std_pred, (y_pred + 1.9600 * std_pred)[::-1]]), alpha=.4, fc=color[7], ec='None', label="95% prediction interval") # Plot required number of samples if nb_samples > 0: samples = gp.sample_y(x_pred.reshape(-1, 1), nb_samples) plt.plot(x_pred, samples) # Plot training set plt.scatter(x_train, y_train, facecolors=color[0], s=80, zorder=5) # More plot settings plt.xlim([-1, 12.]) plt.ylim([-5, 5.]) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) if save is None: plt.show() else: plt.savefig(save, bbox_inches='tight', pad_inches=0) plt.close()
mu[:, 0] - 3 * s, mu[:, 0] + 3 * s, color="#dddddd") plt.plot(xtest, mu, 'r--', lw=2) # plt.savefig('predictive.png', bbox_inches='tight') plt.title('Mean predictions plus 3 st.deviations') plt.axis([-5, 5, -3, 3]) # draw samples from the posterior at our test points. L = np.linalg.cholesky(cov_posterior) f_post = mu + np.dot(L, np.random.normal(size=(xtest.shape[0], 10))) plt.figure() plt.clf() plt.plot(xtest, f_post) plt.title('Ten samples from the GP posterior') plt.axis([-5, 5, -3, 3]) # plt.savefig('post.png', bbox_inches='tight') sample_and_plot() x = np.random.uniform(-5, 5, 10).reshape(-1, 1) g.predict(x, func(x)) sample_and_plot() x = np.random.uniform(-5, 5, 10).reshape(-1, 1) g.predict(x, func(x)) sample_and_plot() plt.show()
plt.clf() plt.plot(g.X, g.Y, 'r+', ms=20) plt.plot(xtest, ytest, 'b-') plt.gca().fill_between(xtest.flat, mu[:, 0] - 3*s, mu[:, 0] + 3*s, color="#dddddd") plt.plot(xtest, mu, 'r--', lw=2) # plt.savefig('predictive.png', bbox_inches='tight') plt.title('Mean predictions plus 3 st.deviations') plt.axis([-5, 5, -3, 3]) # draw samples from the posterior at our test points. L = np.linalg.cholesky(cov_posterior) f_post = mu + np.dot(L, np.random.normal(size=(xtest.shape[0], 10))) plt.figure() plt.clf() plt.plot(xtest, f_post) plt.title('Ten samples from the GP posterior') plt.axis([-5, 5, -3, 3]) # plt.savefig('post.png', bbox_inches='tight') sample_and_plot() x = np.random.uniform(-5, 5, 10).reshape(-1, 1) g.predict(x, func(x)) sample_and_plot() x = np.random.uniform(-5, 5, 10).reshape(-1, 1) g.predict(x, func(x)) sample_and_plot() plt.show()