def quadrature(self): from bayesquad.quadrature import compute_mean_gp_prod_gpy_2 budget = self.options['budget'] phis = np.empty((budget, 1)) rs = np.empty((budget, )) qs = np.empty((budget, )) phi_init = self.p.mean.reshape(1, 1) r_init = np.array(self.r.sample(phi_init)) q_init = np.array(self.q.sample(phi_init)) kern = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.) r_gp = GPy.models.GPRegression(phi_init, np.sqrt(2 * r_init).reshape(1, 1), kern) q_gp = GPy.models.GPRegression(phi_init, np.sqrt(2 * q_init).reshape(1, 1), kern) r_model = WarpedIntegrandModel(WsabiLGP(r_gp), self.p) q_model = WarpedIntegrandModel(WsabiLGP(q_gp), self.p) phis[0, :] = phi_init rs[0] = r_init qs[0] = q_init for i in range(1, budget): phi = select_batch(r_model, 1)[0].reshape(1, 1) r = self.r.sample(phi) q = self.q.sample(phi) r_model.update(phi, r.reshape(1, 1)) q_model.update(phi, q.reshape(1, 1)) r_gp.optimize() q_gp.optimize() rs[i] = r qs[i] = q phis[i, :] = phi rq_gp = GPy.models.GPRegression( phis[:i + 1, :], q_model.gp._gp.Y * r_model.gp._gp.Y, kern) rq_gp.optimize() alpha_q = q_model.gp._alpha alpha_r = r_model.gp._alpha r_int_mean = r_model.integral_mean()[0] n1 = alpha_r * alpha_q n2 = 0.5 * alpha_r * compute_mean_gp_prod_gpy_2( self.p, q_model.gp._gp, q_model.gp._gp) n3 = 0.5 * alpha_q * compute_mean_gp_prod_gpy_2( self.p, r_model.gp._gp, r_model.gp._gp) n4 = 0.25 * (compute_mean_gp_prod_gpy_2(self.p, rq_gp, rq_gp)) rq_int_mean = n1 + n2 + n3 + n4 self.results[i] = rq_int_mean / r_int_mean if i % self.options['display_step'] == 0: print('Samples', phi, "Numerator: ", rq_int_mean, "Denominator", r_int_mean) return self.results[-1]
def quadrature(self): budget = self.options['budget'] phis = np.empty((budget, 1)) rs = np.empty((budget, )) rqs = np.empty((budget, )) phi_init = self.p.mean.reshape(1, 1) r_init = self.r.sample(phi_init) kern = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.) rq_init = r_init * self.q.sample(phi_init) r_gp = GPy.models.GPRegression(phi_init, np.sqrt(2 * r_init).reshape(1, 1), kern) rq_gp = GPy.models.GPRegression(phi_init, np.sqrt(2 * rq_init).reshape(1, 1), kern) r_model = WarpedIntegrandModel(WsabiLGP(r_gp), self.p) rq_model = WarpedIntegrandModel(WsabiLGP(rq_gp), self.p) phis[0, :] = phi_init rs[0] = r_init rqs[0] = rq_init for i in range(1, budget): phi = (select_batch(r_model, 1)[0]).reshape(1, 1) # phi is 1 dimensional! phis[i, :] = phi rs[i] = self.r.sample(phi) rqs[i] = rs[i] * self.q.sample(phi) r_model.update(phi, rs[i].reshape(1, 1)) rq_model.update(phi, rqs[i].reshape(1, 1)) r_gp.optimize() rq_gp.optimize() rq_int_mean = rq_model.integral_mean()[0] r_int_mean = r_model.integral_mean()[0] self.results[i] = rq_int_mean / r_int_mean if i % 10 == 1: print('Samples', phi, "Numerator: ", rq_int_mean, "Denominator", r_int_mean) if self.options['plot_iterations']: self.draw_samples(i, phis, rs, rqs, r_model, rq_model) plt.show() return self.results[-1]
def wsabi(self, verbose=True): # Allocating number of maximum evaluations start = time.time() budget = self.options['wsabi_budget'] batch_count = 1 test_x = self.model.X_test test_y = np.squeeze(self.model.Y_test) # Allocate memory of the samples and results log_phi = np.zeros(( budget * batch_count, self.dimensions, )) # The log-hyperparameter sampling points log_r = np.zeros( (budget * batch_count, )) # The log-likelihood function q = np.zeros((test_x.shape[0], budget * batch_count)) # Prediction # Set prior mean to the MAP value # log_phi_initial = np.zeros(self.dimensions).reshape(1, -1) log_phi_initial = self.options['prior_mean'].reshape(1, -1) log_r_initial = np.sqrt(2 * np.exp( self.model.log_sample(phi=np.exp(log_phi_initial).reshape(-1))[0])) # print(log_r_initial) pred = np.zeros((test_x.shape[0], )) # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the # MAP values. kern = GPy.kern.RBF( self.dimensions, variance=.1, #0.1 lengthscale=.1) #0.1 log_r_gp = GPy.models.GPRegression(log_phi_initial, log_r_initial.reshape(1, -1), kern) log_r_model = WarpedIntegrandModel(WsabiLGP(log_r_gp), self.prior) # Firstly, within the given allowance, compute an estimate of the model evidence. Model evidence is the common # denominator for all predictive distributions. for i_a in range(budget): log_phi_i = np.array( select_batch(log_r_model, batch_count, "Kriging Believer")).reshape(batch_count, -1) log_r_i = self.model.log_sample(phi=np.exp(log_phi_i))[0] if verbose: logging.info('phi: ' + str(log_phi_i) + ' log_lik: ' + str(log_r_i)) log_r[i_a:i_a + batch_count] = log_r_i log_phi[i_a:i_a + batch_count, :] = log_phi_i log_r_model.update(log_phi_i, np.exp(log_r_i).reshape(1, -1)) quad_time = time.time() max_log_r = max(log_r) r = np.exp(log_r - max_log_r) r_gp = GPy.models.GPRegression(log_phi[:1, :], np.sqrt(2 * r[0].reshape(1, 1)), kern) r_model = WarpedIntegrandModel(WsabiLGP(r_gp), self.prior) r_model.update(log_phi[1:, :], r[1:].reshape(-1, 1)) r_gp.optimize() r_int = np.exp(np.log(r_model.integral_mean()[0]) + max_log_r) # Model evidence log_r_int = np.log(r_int) # Model log-evidence print( "Estimate of model evidence: ", r_int, ) print("Model log-evidence ", log_r_int) # Secondly, compute and marginalise the predictive distribution for each individual points for i_x in range(test_x.shape[0]): # Note that we do not active sample again for q, we just use the same samples sampled when we compute # the log-evidence _, q_initial = self.model.log_sample(phi=np.exp(log_phi_initial), x=test_x[i_x, :]) # Initialise GPy GP surrogate for and q(\phi)r(\phi) # Sample for q values for i_b in range(budget * batch_count): log_phi_i = log_phi[i_b, :] log_r_i, q_i = self.model.log_sample(phi=np.exp(log_phi_i), x=test_x[i_x, :]) q[i_x, i_b] = q_i # Enforce positivity in q q_x = q[i_x, :] q_min = np.min(q_x) if q_min < 0: q_x = q_x - q_min else: q_min = 0 # Do the same exponentiation and rescaling trick for q log_rq_x = log_r + np.log(q_x) max_log_rq = np.max(log_rq_x) rq = np.exp(log_rq_x - max_log_rq) rq_gp = GPy.models.GPRegression(log_phi[:1, :], np.sqrt(2 * rq[0].reshape(1, 1)), kern) rq_model = WarpedIntegrandModel(WsabiLGP(rq_gp), self.prior) rq_model.update(log_phi[1:, :], rq[1:].reshape(-1, 1)) rq_gp.optimize() # Now estimate the posterior # rq_int = rq_model.integral_mean()[0] + q_min * r_int rq_int = np.exp(np.log(rq_model.integral_mean()[0]) + max_log_rq) + q_min * r_int # Similar for variance pred[i_x] = rq_int / r_int logging.info('Progress: ' + str(i_x + 1) + '/' + str(test_x.shape[0])) if verbose: logging.info('Prediction' + str(pred[i_x]) + ' .Label: ' + str(test_y[i_x])) labels = pred.copy() labels[labels < 0.5] = 0 labels[labels >= 0.5] = 1 labels = np.squeeze(labels) accuracy, precision, recall, f1 = self.model.score( np.squeeze(test_y), labels) non_zero = np.count_nonzero(np.squeeze(test_y) - np.squeeze(labels)) print("------ WSABI Summary -----------") print("Number of mismatch: " + str(non_zero)) print("Accuracy: " + str(accuracy)) print("Precision: " + str(precision)) print("Recall: " + str(recall)) print("F1 score: " + str(f1)) if verbose: print("Ground truth labels: " + str(test_y)) print("Predictions: " + str(labels)) print('Predictive Probabilities: ' + str(pred)) end = time.time() print("Active Sampling Time: ", quad_time - start) print("Total Time: ", end - start) # Save the results labels = labels.reshape(-1) pred = pred.reshape(-1) test_y = test_y.reshape(-1) res = np.vstack([labels, pred, test_y]).T res = pd.DataFrame(res, columns=['pred', 'pred_prob', 'labels']) res.to_csv('svm_wsabi.csv') return accuracy, precision, recall, f1
def plot_integrand_posterior(integrand_model: WarpedIntegrandModel): z = integrand_model.posterior_mean_and_variance(PLOTTING_DOMAIN)[0] plot_data(z, 132, "Posterior Mean")
x = np.atleast_2d(x) return np.atleast_2d((((np.sin(x) + 0.5 * np.cos(3 * x))**2) / ((x / 2)**2 + 0.3)).prod(axis=1)) initial_x = np.array([[0, 0]]) initial_y = np.sqrt(2 * true_function(initial_x)) k = GPy.kern.RBF(2, variance=2, lengthscale=2) lik = GPy.likelihoods.Gaussian(variance=1e-10) prior = Gaussian(mean=np.array([0, 0]), covariance=2 * np.eye(2)) gpy_gp = GPy.core.GP(initial_x, initial_y, kernel=k, likelihood=lik) warped_gp = WsabiLGP(gpy_gp) model = WarpedIntegrandModel(warped_gp, prior) def true_integrand(x): return true_function(x) * prior(x) # Set up plotting. LOWER_LIMIT = -4 UPPER_LIMIT = 4 PLOTTING_RESOLUTION = 200 COLOUR_MAP = "summer" def get_plotting_domain(lower_limit, upper_limit, resolution):
def wsabi_bqr(self, verbose=True, compute_var=True): from bayesquad.quadrature import compute_mean_gp_prod_gpy_2 # Allocating number of maximum evaluations start = time.time() budget = self.options['wsabi_budget'] test_x = self.gpr.X_test test_y = self.gpr.Y_test # Allocate memory of the samples and results log_phi = np.zeros(( budget, self.dimensions, )) # The log-hyperparameter sampling points log_r = np.zeros((budget, )) # The log-likelihood function q = np.zeros((test_x.shape[0], budget)) # Prediction var = np.zeros((test_x.shape[0], budget)) # Posterior variance # Initial points - note that as per GPML convention, the hyperparameters are expressed in log scale # Initialise to the MAP estimate map_model, _, _ = self.maximum_a_posterior(num_restarts=1, max_iters=1000, verbose=False) if isinstance(self.gpr, PeriodicGPRegression): self.gpr.set_params( variance=map_model.std_periodic.variance, gaussian_noise=map_model.Gaussian_noise.variance) elif isinstance(self.gpr, RBFGPRegression): self.gpr.set_params( variance=map_model.rbf.variance, gaussian_noise=map_model.Gaussian_noise.variance) # Set prior mean to the MAP value # self.prior = Gaussian(mean=map_vals.reshape(-1), covariance=self.options['prior_variance']) log_phi_initial = self.options['prior_mean'].reshape(1, -1) log_r_initial = np.sqrt(2 * np.exp( self.gpr.log_sample(phi=np.exp(log_phi_initial.reshape(-1)))[0])) pred = np.zeros((test_x.shape[0], )) pred_var = np.zeros((test_x.shape[0], )) # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the # MAP values. kern = GPy.kern.RBF(self.dimensions, variance=1., lengthscale=1.) log_r_gp = GPy.models.GPRegression(log_phi_initial, log_r_initial.reshape(1, -1), kern) log_r_model = WarpedIntegrandModel(WsabiLGP(log_r_gp), self.prior) # Firstly, within the given allowance, compute an estimate of the model evidence. Model evidence is the common # denominator for all predictive distributions. for i_a in range(budget): log_phi_i = np.array( select_batch(log_r_model, 1, "Kriging Believer")).reshape(1, -1) try: log_r_i = self.gpr.log_sample(phi=np.exp(log_phi_i))[0] log_r[i_a:i_a + 1] = log_r_i log_phi[i_a:i_a + 1, :] = log_phi_i log_r_model.update(log_phi_i, np.exp(log_r_i).reshape(1, -1)) print(np.exp(log_phi_i), log_r_i) except np.linalg.linalg.LinAlgError: print('Error!') continue max_log_r = np.max(log_r) r = np.exp(log_r - max_log_r) # r = np.exp(log_r) r_gp = GPy.models.GPRegression(log_phi[:1, :], np.sqrt(2 * r[0].reshape(1, 1)), kern) r_model = WarpedIntegrandModel(WsabiLGP(r_gp), self.prior) r_model.update(log_phi[1:, :], r[1:].reshape(-1, 1)) r_gp.optimize() r_int_prime = r_model.integral_mean()[0] # Model evidence r_int = np.exp(np.log(r_int_prime) + max_log_r) # r_int = r_int_prime r = np.exp(np.log(log_r) + max_log_r) print( "Estimate of model evidence: ", r_int, ) # Secondly, compute and marginalise the predictive distribution for each individual points for i_x in range(test_x.shape[0]): # Note that we do not active sample again for q, we just use the same samples sampled when we compute # the log-evidence _, q_initial, var_initial = self.gpr.log_sample( phi=np.exp(log_phi_initial), x=test_x[i_x, :]) # Initialise GPy GP surrogate for and q(\phi) - note that this is a BQZ approach and we do not model rq # as one GP but separate GPs to account for correlation # Sample for q values for i_b in range(budget): log_phi_i = log_phi[i_b, :] _, q[i_x, i_b], var[i_x, i_b] = self.gpr.log_sample( phi=np.exp(log_phi_i), x=test_x[i_x, :]) # Enforce positivity in q q_x = q[i_x, :].copy() var_x = var[i_x, :].copy() #sns.distplot(q_x, bins=20) #plt.axvline(test_y[i_x], color='red', label='Ground Truth') #plt.xlabel("$\phi$") #plt.ylabel("$q(\phi)$") # plt.legend() #plt.show() q_min = np.min(q_x) if q_min < 0: q_x = q_x - q_min else: q_min = 0 # Do the same exponentiation and rescaling trick for q q_gp = GPy.models.GPRegression( log_phi[:1, :], np.sqrt(2 * q_x[0].reshape(1, 1)), GPy.kern.RBF(self.dimensions, variance=2., lengthscale=2.)) q_model = WarpedIntegrandModel(WsabiLGP(q_gp), self.prior) q_model.update(log_phi[1:, :], q_x[1:].reshape(-1, 1)) q_gp.optimize() #display(q_gp) #q_int = q_model.integral_mean()[0] # Evaluate numerator alpha_q = q_model.gp._alpha alpha_r = r_model.gp._alpha rq_gp = GPy.models.GPRegression( log_phi, q_model.gp._gp.Y * r_model.gp._gp.Y, GPy.kern.RBF(self.dimensions, variance=1., lengthscale=1.)) rq_gp.optimize() #q_sq_gp = GPy.models.GPRegression(log_phi, (q_x - alpha_q).reshape(-1, 1), # GPy.kern.RBF(self.dimensions, # variance=2., # lengthscale=1.)) #q_sq_gp.optimize() #r_sq_gp = GPy.models.GPRegression(log_phi, (r-alpha_r).reshape(-1, 1), # GPy.kern.RBF(self.dimensions, # variance=1., # lengthscale=1.) # ) #r_sq_gp.optimize() #display(rq_gp) #display(q_sq_gp) #print('---------------') #display(r_sq_gp) #display(q_sq_gp) #print( compute_mean_gp_prod_gpy_2(self.prior, q_gp, q_gp)) #print( q_int ) #exit() n1 = alpha_r * alpha_q n2 = 0.5 * alpha_q * compute_mean_gp_prod_gpy_2( self.prior, r_gp, r_gp) n3 = 0.5 * alpha_r * compute_mean_gp_prod_gpy_2( self.prior, q_gp, q_gp) n4 = 0.25 * compute_mean_gp_prod_gpy_2(self.prior, rq_gp, rq_gp) n = (n1 + n2 + n3 + n4) res = n / r_int_prime + q_min print('res', n1, n2, n3, n4, res) pred[i_x] = res #print('final', pred[i_x]) if compute_var: var_gp = GPy.models.GPRegression( log_phi[:1, :], np.sqrt(2 * var_x[0].reshape(1, 1)), kern) var_model = WarpedIntegrandModel(WsabiLGP(var_gp), self.prior) var_model.update(log_phi[1:, :], var_x[1:].reshape(-1, 1)) var_gp.optimize() rvar_gp = GPy.models.GPRegression( log_phi, var_model.gp._gp.Y * r_model.gp._gp.Y, kern) rvar_gp.optimize() alpha_var = var_model.gp._alpha var_num = alpha_r * alpha_var + \ 0.5 * alpha_r * compute_mean_gp_prod_gpy_2(self.prior, var_model.gp._gp, var_model.gp._gp) + \ 0.5 * alpha_var * compute_mean_gp_prod_gpy_2(self.prior, r_model.gp._gp, r_model.gp._gp) + \ 0.25 * (compute_mean_gp_prod_gpy_2(self.prior, rvar_gp, rvar_gp)) pred_var[i_x] = var_num / r_int logging.info('Progress: ' + str(i_x + 1) + '/' + str(test_x.shape[0])) rmse = self.compute_rmse(pred, test_y) print('Root Mean Squared Error:', rmse) ll, cs = None, None if compute_var: ll, cs = self.compute_ll_cs(pred, pred_var, test_y) print('Log-likelihood', ll) print('Calibration score', cs) end = time.time() print("Total Time: ", end - start) print( "Estimate of model evidence: ", r_int, ) print("Model log-evidence ", np.log(r_int)) if verbose: self.visualise(pred, pred_var, test_y) return rmse, ll, None, np.log(r_int)
def wsabi(self, verbose=True): # Allocating number of maximum evaluations start = time.time() budget = self.options['wsabi_budget'] batch_count = 1 test_x = self.gpr.X_test test_y = self.gpr.Y_test # Allocate memory of the samples and results log_phi = np.zeros(( budget * batch_count, self.dimensions, )) # The log-hyperparameter sampling points log_r = np.zeros( (budget * batch_count, )) # The log-likelihood function q = np.zeros((test_x.shape[0], budget * batch_count)) # Prediction var = np.zeros( (test_x.shape[0], budget * batch_count)) # Posterior variance # Initial points - note that as per GPML convention, the hyperparameters are expressed in log scale # Initialise to the MAP estimate map_model, _, _ = self.maximum_a_posterior(num_restarts=1, max_iters=1000, verbose=False) display(map_model) # mean_vals = map_model.param_array # self.gpr.reset_params() if isinstance(self.gpr, PeriodicGPRegression): pass #self.gpr.set_params(variance=map_model.std_periodic.variance, # gaussian_noise=map_model.Gaussian_noise.variance) elif isinstance(self.gpr, RBFGPRegression): self.gpr.set_params( variance=map_model.rbf.variance, gaussian_noise=map_model.Gaussian_noise.variance) # Set prior mean to the MAP value # self.prior = Gaussian(mean=mean_vals.reshape(-1), covariance=self.options['prior_variance']) log_phi_initial = self.options['prior_mean'].reshape(1, -1) log_r_initial = np.sqrt(2 * np.exp( self.gpr.log_sample(phi=np.exp(log_phi_initial.reshape(-1)))[0])) #log_r_initial = self.gpr.log_sample( # phi=np.exp(log_phi_initial.reshape(-1)))[0] pred = np.zeros((test_x.shape[0], )) pred_var = np.zeros((test_x.shape[0], )) # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the # MAP values. kern = GPy.kern.RBF(self.dimensions, variance=1., lengthscale=1) log_r_gp = GPy.models.GPRegression(log_phi_initial, log_r_initial.reshape(1, -1), kern) log_r_model = WarpedIntegrandModel(WsabiLGP(log_r_gp), self.prior) print(self.prior.mean) # Firstly, within the given allowance, compute an estimate of the model evidence. Model evidence is the common # denominator for all predictive distributions. for i_a in range(budget - 1): log_phi_i = np.array( select_batch(log_r_model, batch_count, "Kriging Believer")).reshape(batch_count, -1) try: log_r_i = self.gpr.log_sample(phi=np.exp(log_phi_i))[0] log_r[i_a:i_a + batch_count] = log_r_i log_phi[i_a:i_a + batch_count, :] = log_phi_i log_r_model.update(log_phi_i, np.exp(log_r_i).reshape(1, -1)) print(np.exp(log_phi_i), log_r_i) except np.linalg.linalg.LinAlgError: continue quad_time = time.time() max_log_r = max(log_r) # Save the evaluations log_r_pd = pd.Series(log_r, name='lml') log_r_pd.to_csv('lml_soton.csv') r = np.exp(log_r - max_log_r) r_gp = GPy.models.GPRegression(log_phi[:1, :], np.sqrt(2 * r[0].reshape(1, 1)), kern) r_model = WarpedIntegrandModel(WsabiLGP(r_gp), self.prior) r_model.update(log_phi[1:, :], r[1:].reshape(-1, 1)) r_gp.optimize() r_int = np.exp(np.log(r_model.integral_mean()[0]) + max_log_r) # Model evidence log_r_int = np.log(r_int) # Model log-evidence # Visualise the model parameter posterior # neg_log_post = np.array((budget, )) # Negative log-posterior # rp = np.array((budget, )) # for i in range(budget): # neg_log_post[i] = (log_r[i] + self.prior.log_eval(log_phi[i, :]) - log_r_int) # Then train a GP for the log-posterior surface # log_posterior_gp = GPy.models.GPRegression(np.exp(log_phi), np.exp(neg_log_post).reshape(-1, 1), kern) # Secondly, compute and marginalise the predictive distribution for each individual points for i_x in range(test_x.shape[0]): # Note that we do not active sample again for q, we just use the same samples sampled when we compute # the log-evidence _, q_initial, var_initial = self.gpr.log_sample( phi=np.exp(log_phi_initial), x=test_x[i_x, :]) # Initialise GPy GP surrogate for and q(\phi)r(\phi) # Sample for q values for i_b in range(budget * batch_count): log_phi_i = log_phi[i_b, :] log_r_i, q_i, var_i = self.gpr.log_sample( phi=np.exp(log_phi_i), x=test_x[i_x, :]) q[i_x, i_b] = q_i var[i_x, i_b] = var_i # Enforce positivity in q q_x = q[i_x, :] var_x = var[i_x, :] q_min = np.min(q_x) if q_min < 0: q_x = q_x - q_min else: q_min = 0 # Do the same exponentiation and rescaling trick for q log_rq_x = log_r + np.log(q_x) max_log_rq = np.max(log_rq_x) rq = np.exp(log_rq_x - max_log_rq) rq_gp = GPy.models.GPRegression(log_phi[:1, :], np.sqrt(2 * rq[0].reshape(1, 1)), kern) rq_model = WarpedIntegrandModel(WsabiLGP(rq_gp), self.prior) rq_model.update(log_phi[1:, :], rq[1:].reshape(-1, 1)) rq_gp.optimize() # Now estimate the posterior # rq_int = rq_model.integral_mean()[0] + q_min * r_int rq_int = np.exp(np.log(rq_model.integral_mean()[0]) + max_log_rq) + q_min * r_int # Similar for variance log_rvar_x = log_r + np.log(var_x) max_log_rvar = np.max(log_rvar_x) rvar = np.exp(log_rvar_x - max_log_rvar) rvar_gp = GPy.models.GPRegression( log_phi[:1, :], np.sqrt(2 * rvar[0].reshape(1, 1)), kern) rvar_model = WarpedIntegrandModel(WsabiLGP(rvar_gp), self.prior) rvar_model.update(log_phi[1:, :], rvar[1:].reshape(-1, 1)) rvar_gp.optimize() rvar_int = np.exp( np.log(rvar_model.integral_mean()[0]) + max_log_rvar) pred[i_x] = rq_int / r_int pred_var[i_x] = rvar_int / r_int print(pred_var[i_x]) logging.info('Progress: ' + str(i_x + 1) + '/' + str(test_x.shape[0])) rmse = self.compute_rmse(pred, test_y) ll, cs = self.compute_ll_cs(pred, pred_var, test_y) print('Root Mean Squared Error:', rmse) print('Log-likelihood', ll) print('Calibration score', cs) end = time.time() print("Active Sampling Time: ", quad_time - start) print("Total Time: ", end - start) print( "Estimate of model evidence: ", r_int, ) print("Model log-evidence ", log_r_int) if verbose: self.visualise(pred, pred_var, test_y) return rmse, ll, quad_time - start, log_r_int
def bq(self, verbose=True): """ Marginalisation using vanilla Bayesian Quadrature - we use Amazon Emukit interface for this purpose :return: """ def _rp_emukit(x: np.ndarray) -> np.ndarray: n, d = x.shape res = np.exp( self.gpr.log_sample(phi=np.exp(x))[0] + np.log(self.prior(x))) logging.info("Query point" + str(x) + " .Log Likelihood: " + str(-np.log(res))) return np.array(res).reshape(n, 1) def rp_emukit(): # Wrap around Emukit interface from emukit.core.loop.user_function import UserFunctionWrapper return UserFunctionWrapper(_rp_emukit), _rp_emukit start = time.time() budget = self.options['naive_bq_budget'] test_x = self.gpr.X_test test_y = self.gpr.Y_test q = np.zeros((test_x.shape[0], budget)) var = np.zeros((test_x.shape[0], budget)) # Initial points - note that as per GPML convention, the hyperparameters are expressed in log scale # Initialise to the MAP estimate map_model, _, _ = self.maximum_a_posterior(num_restarts=1, max_iters=500, verbose=False) if isinstance(self.gpr, RBFGPRegression): self.gpr.set_params( variance=map_model.rbf.variance, gaussian_noise=map_model.Gaussian_noise.variance) elif isinstance(self.gpr, PeriodicGPRegression): self.gpr.set_params( variance=map_model.std_periodic.variance, gaussian_noise=map_model.Gaussian_noise.variance) log_phi_initial = self.prior.mean.reshape(1, -1) #r_initial = np.exp(self.gpr.log_sample(phi=np.exp(log_phi_initial))[0] + np.log(self.prior(log_phi_initial))) r = np.empty((budget, )) samples = np.empty((budget, self.dimensions)) pred = np.zeros((test_x.shape[0], )) pred_var = np.zeros((test_x.shape[0], )) log_r_initial = np.sqrt(2 * np.exp( self.gpr.log_sample(phi=np.exp(log_phi_initial.reshape(-1)))[0])) # log_r_initial = self.gpr.log_sample( # phi=np.exp(log_phi_initial.reshape(-1)))[0] # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the # MAP values. kern = GPy.kern.RBF(self.dimensions, variance=1., lengthscale=1) log_r_gp = GPy.models.GPRegression(log_phi_initial, log_r_initial.reshape(1, -1), kern) log_r_model = WarpedIntegrandModel(WsabiLGP(log_r_gp), self.prior) # Firstly, within the given allowance, compute an estimate of the model evidence. Model evidence is the common # denominator for all predictive distributions. for i_a in range(budget): log_phi_i = np.array( select_batch(log_r_model, 1, "Kriging Believer")).reshape(1, -1) try: log_r_i = self.gpr.log_sample(phi=np.exp(log_phi_i))[0] r[i_a] = log_r_i samples[i_a, :] = log_phi_i log_r_model.update(log_phi_i, np.exp(log_r_i).reshape(1, -1)) print(np.exp(log_phi_i), log_r_i) except np.linalg.linalg.LinAlgError: continue r = np.exp(r + np.log(self.prior(samples))) r_gp = GPy.models.GPRegression(samples, r.reshape(-1, 1), kern) r_model = self._wrap_emukit(r_gp) #r_loop = VanillaBayesianQuadratureLoop(model=r_model) # Firstly, within the given allowance, compute an estimate of the model evidence. Model evidence is the common # denominator for all predictive distributions. #r_loop.run_loop(user_function=rp_emukit()[0], stopping_condition=budget) #log_phi = r_loop.loop_state.X #r = r_loop.loop_state.Y.reshape(-1) #np.savetxt('bq_sotonmet_samples.csv', r) quad_time = time.time() r_int = r_model.integrate()[0] # Model evidence print( "Estimate of model evidence: ", r_int, ) print("Model log-evidence ", np.log(r_int)) for i_x in range(test_x.shape[0]): # Note that we do not active sample again for q, we just use the same samples sampled when we compute # the log-evidence _, q_initial, var_initial = self.gpr.log_sample( phi=np.exp(log_phi_initial), x=test_x[i_x, :]) # Initialise GPy GP surrogate for and q(\phi)r(\phi) # Sample for q values q[i_x, 0] = q_initial var[i_x, 0] = var_initial for i_b in range(0, budget): log_phi_i = samples[i_b, :] _, q_i, var_i = self.gpr.log_sample(phi=np.exp(log_phi_i), x=test_x[i_x, :]) q[i_x, i_b] = q_i var[i_x, i_b] = var_i # Construct rq vector q_x = q[i_x, :] var_x = var[i_x, :] rq = r * q_x rq_gp = GPy.models.GPRegression(samples, rq.reshape(-1, 1), kern) rq_model = self._wrap_emukit(rq_gp) rq_int = rq_model.integrate()[0] rvar = r * var_x rvar_gp = GPy.models.GPRegression(samples, rvar.reshape(-1, 1), kern) rvar_model = self._wrap_emukit(rvar_gp) rvar_int = rvar_model.integrate()[0] # Now estimate the posterior pred[i_x] = rq_int / r_int pred_var[i_x] = rvar_int / r_int logging.info('Progress: ' + str(i_x + 1) + '/' + str(test_x.shape[0])) rmse = self.compute_rmse(pred, test_y) ll, cs = self.compute_ll_cs(pred, pred_var, test_y) logging.info(pred, test_y) print('Root Mean Squared Error:', rmse) print('Log-likelihood:', ll) print('Calibration Score:', cs) end = time.time() print("Active Sampling Time: ", quad_time - start) print("Total Time elapsed: ", end - start) if verbose: self.visualise(pred, pred_var, test_y) return rmse, ll, quad_time - start, np.log(r_int)
def bmc(self): # Allocating number of maximum evaluations start = time.time() budget = self.options['wsabi_budget'] batch_count = 1 test_x = self.gpr.X_test test_y = self.gpr.Y_test # Allocate memory of the samples and results samples = np.zeros(( budget * batch_count, self.dimensions, )) # The log-hyperparameter sampling points log_r = np.zeros( (budget * batch_count, )) # The log-likelihood function # Initial points - note that as per GPML convention, the hyperparameters are expressed in log scale # Initialise to the MAP estimate map_model, _, _ = self.maximum_a_posterior(num_restarts=1, max_iters=1000, verbose=False) display(map_model) # mean_vals = map_model.param_array # self.gpr.reset_params() if isinstance(self.gpr, PeriodicGPRegression): pass # self.gpr.set_params(variance=map_model.std_periodic.variance, # gaussian_noise=map_model.Gaussian_noise.variance) elif isinstance(self.gpr, RBFGPRegression): self.gpr.set_params( variance=map_model.rbf.variance, gaussian_noise=map_model.Gaussian_noise.variance) # Set prior mean to the MAP value # self.prior = Gaussian(mean=mean_vals.reshape(-1), covariance=self.options['prior_variance']) log_phi_initial = self.options['prior_mean'].reshape(1, -1) log_r_initial = np.sqrt(2 * np.exp( self.gpr.log_sample(phi=np.exp(log_phi_initial.reshape(-1)))[0])) # log_r_initial = self.gpr.log_sample( # phi=np.exp(log_phi_initial.reshape(-1)))[0] pred = np.zeros((budget, test_x.shape[0])) pred_var = np.zeros(( budget, test_x.shape[0], )) # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the # MAP values. kern = GPy.kern.RBF(self.dimensions, variance=1., lengthscale=1) log_r_gp = GPy.models.GPRegression(log_phi_initial, log_r_initial.reshape(1, -1), kern) log_r_model = WarpedIntegrandModel(WsabiLGP(log_r_gp), self.prior) # Firstly, within the given allowance, compute an estimate of the model evidence. Model evidence is the common # denominator for all predictive distributions. for i_a in range(budget - 1): log_phi_i = np.array( select_batch(log_r_model, batch_count, "Kriging Believer")).reshape(batch_count, -1) try: log_r_i = self.gpr.log_sample(phi=np.exp(log_phi_i))[0] log_r[i_a:i_a + batch_count] = log_r_i samples[i_a:i_a + batch_count, :] = log_phi_i log_r_model.update(log_phi_i, np.exp(log_r_i).reshape(1, -1)) print(np.exp(log_phi_i), log_r_i) except np.linalg.linalg.LinAlgError: continue for i in range(budget): if isinstance(self.gpr, PeriodicGPRegression): try: # print(samples) self.gpr.set_params(lengthscale=np.exp(samples[i, 0]), period=np.exp(samples[i, 1]), variance=np.exp(samples[i, 2]), gaussian_noise=np.exp(samples[i, 3])) except np.linalg.LinAlgError: print('LinAlg Error') pass else: self.gpr.set_params(samples[i, :]) tmp_res = self.gpr.model.predict(test_x) pred[i, :] = (tmp_res[0]).reshape(-1) pred_var[i, :] = (tmp_res[1]).reshape(-1) logging.info("Progress: " + str(i) + " / " + str(budget)) pred = pred[50:, :] pred_var = pred_var[50:, :] pred = pred.sum(axis=0) / (budget - 50) pred_var = pred_var.sum(axis=0) / (budget - 50) rmse = self.compute_rmse(pred, test_y) ll, cs = self.compute_ll_cs(pred, pred_var, test_y) print("Root Mean Squared Error", rmse) print("Log-likelihood", ll) print("Calibration Score", cs) self.visualise(pred, pred_var, test_y) return rmse, ll, None, None
def wsabi(self, same_query_pts=True): budget = self.options['wsabi_bq_budget'] samples = np.zeros((budget, self.gpr.dimensions )) # Array to store all the x locations of samples lik = np.zeros(( budget, )) # Array to store all the log-likelihoods evaluated at x intv = np.zeros((budget, )) # Initial points initial_x = np.zeros( (self.dimensions, 1)).reshape(1, -1) # Set the initial sample to the prior mean initial_y = np.array(self.gpr.sample(initial_x)).reshape(1, -1) # Setting up kernel kern = GPy.kern.RBF( self.dimensions, variance=self.options['naive_bq_kern_variance'], lengthscale=self.options['naive_bq_kern_lengthscale']) # Initial guess for the GP for BQ gpy_gp_lik = GPy.models.GPRegression( initial_x, initial_y, kernel=kern, ) warped_gp = WsabiLGP(gpy_gp_lik) model = WarpedIntegrandModel(warped_gp, prior=self.prior) for i in range(budget): samples[i, :] = np.array( select_batch(model, 1, 'Local Penalisation')).reshape(1, -1) lik[i] = np.array(self.gpr.sample(samples[i, :])).reshape(1, -1) model.update(samples[i, :], lik[i]) gpy_gp_lik.optimize() intv = model.integral_mean()[0] print("Integral mean estimated:", intv) # Generate query points meshgrid for the posterior distribution and the priors evaluated on these points if same_query_pts: query_points = np.empty((budget + 1, self.gpr.dimensions)) query_points[0] = initial_x query_points[1:] = samples prior_query_points = self.prior(query_points) unwarped_y = np.squeeze(warped_gp._unwarped_Y) else: query_points, prior_query_points = self._gen_meshgrid_query_points( ) # Evaluate at the query points on the likelihood surface generated by the GP surrogate warped_lik = gpy_gp_lik.predict(query_points)[0] # Unwarp the warped likelihood outputs unwarped_y = self.unwarp(warped_lik, warped_gp._alpha) posterior_query_points = (unwarped_y * prior_query_points / intv).reshape(-1, 1) # Initialise another GP for the posterior distribution gpy_gp_post = GPy.models.GPRegression( query_points, posterior_query_points, kernel=kern, ) gpy_gp_post.optimize() #plt.subplot(211) #gpy_gp_lik.plot() #plt.subplot(212) gpy_gp_post.plot(plot_limits=[[-5, -5], [5, 5]]) plt.show()
def wsabi(X_pred, y_grd, log_lik_handle, param_dim=5, prior_mean=np.zeros((5, 1)), prior_var=100 * np.eye(5)): # Allocating number of maximum evaluations start = time.time() prior = Gaussian(mean=prior_mean.reshape(-1), covariance=prior_var) # Initial grid sampling log_phis = np.mgrid[-1:1.1:1, -1:1.1:1, -1:1.1:1, -1:1.1:1, 0:25:5].reshape(5, -1).T n = log_phis.shape[0] phis = log_phis.copy() phis[:, :-1] = np.exp(phis[:, :-1]) # Allocate memory of the samples and results log_r = np.zeros((n, 1)) # The log-likelihood function q = np.zeros((n, 1)) # Prediction # var = np.zeros((n, )) # Posterior variance for i in range(n): log_r[i, :], q[i, :], _ = log_lik_handle(phi=phis[i, :], x_pred=X_pred) print(phis[i, :], log_r[i, :], q[i, :]) r = np.exp(log_r) # Setting up kernel - Note we only marginalise over the lengthscale terms, other hyperparameters are set to the # MAP values. kern = GPy.kern.RBF(param_dim, variance=1., lengthscale=1.) # kern.plot(ax=plt.gca()) r_gp = GPy.models.GPRegression(phis[:1, :], r[:1, :], kern) r_model = WarpedIntegrandModel(WsabiLGP(r_gp), prior) r_model.update(phis[1:, :], r[1:, :]) r_gp.optimize() r_int = r_model.integral_mean()[0] # Model evidence log_r_int = np.log(r_int) # Model log-evidence print( "Estimate of model evidence: ", r_int, ) print("Model log-evidence ", log_r_int) # Enforce positivity in q q_min = np.min(q) if q_min < 0: q -= q_min else: q_min = 0 # Do the same exponentiation and rescaling trick for q log_rq_x = log_r + np.log(q) max_log_rq = np.max(log_rq_x) rq = np.exp(log_rq_x - max_log_rq) rq_gp = GPy.models.GPRegression(phis, np.sqrt(2 * rq.reshape(-1, 1)), kern) rq_model = WarpedIntegrandModel(WsabiLGP(rq_gp), prior) rq_model.update(phis, rq) rq_gp.optimize() # Now estimate the posterior # rq_int = rq_model.integral_mean()[0] + q_min * r_int rq_int = np.exp(np.log(rq_model.integral_mean()[0]) + max_log_rq) + q_min * r_int print("rq_int", rq_int) # Similar for variance #log_rvar_x = log_r + np.log(var) #max_log_rvar = np.max(log_rvar_x) #rvar = np.exp(log_rvar_x - max_log_rvar) #rvar_gp = GPy.models.GPRegression(phis[:1, :], np.sqrt(2 * rvar[0].reshape(1, 1)), kern) #rvar_model = WarpedIntegrandModel(WsabiLGP(rvar_gp), prior) #rvar_model.update(phis[1:, :], rvar[1:].reshape(-1, 1)) #rvar_gp.optimize() #rvar_int = np.exp(np.log(rvar_model.integral_mean()[0]) + max_log_rvar) pred = rq_int / r_int #pred_var = rvar_int / r_int print('pred', pred) print('actual', y_grd) end = time.time() print("Total Time: ", end - start) return pred, None
def wsabi_bq(self, rebase=False): """ Marginalise the marginal log-likelihood using WSABI Bayesian Quadrature :return: """ budget = self.options['naive_bq_budget'] samples = np.zeros((budget, self.gpr.dimensions )) # Array to store all the x locations of samples yv = np.zeros(( budget, )) # Array to store all the log-likelihoods evaluated at x yv_scaled = np.zeros((budget, )) intv = np.zeros( (budget, ) ) # Array to store the current estimate of the marginalised integral log_intv = np.zeros((budget, )) # Initial points initial_x = np.zeros((self.dimensions, 1)).reshape( 1, -1) + 1e-6 # Set the initial sample to the prior mean if rebase: initial_y = np.array([1]).reshape(1, -1) else: initial_y = np.array(self.gpr.sample(initial_x)).reshape(1, -1) # Prior in log space prior_mean = self.options['prior_mean'].reshape(-1) prior_cov = self.options['prior_variance'] prior = Gaussian(mean=prior_mean, covariance=prior_cov) # Setting up kernel - noting the log-transformation kern = GPy.kern.RBF( self.dimensions, variance=self.options['naive_bq_kern_variance'], lengthscale=self.options['naive_bq_kern_lengthscale']) # Initial guess for the GP for BQ gpy_gp = GPy.models.GPRegression( initial_x, initial_y, kernel=kern, ) warped_gp = WsabiLGP(gpy_gp) model = WarpedIntegrandModel(warped_gp, prior=prior) if rebase: for i in range(budget): samples[i, :] = np.array( select_batch(model, 1, LOCAL_PENALISATION)).reshape(1, -1) yv[i] = np.array(self.gpr.log_sample(samples[i, :])).reshape( 1, -1) scaling = np.max(yv[:i + 1]) yv_scaled[:i + 1] = np.exp(yv[:i + 1] - scaling) x = samples[:i + 1] y = yv_scaled[:i + 1].reshape(-1, 1) if i % 20 == 0 and i > 0: # Create a new model since all x and y data have been replaced due to rebasing gpy_gp = GPy.models.GPRegression( x, y, kernel=kern, ) warped_gp = WsabiLGP(gpy_gp) model = WarpedIntegrandModel(warped_gp, prior) gpy_gp.optimize() log_intv[i] = np.log((model.integral_mean())[0]) + scaling intv[i] = np.exp(log_intv[i]) if i % 100 == 0: self.plot_iterations(i, log_intv, true_val=self.gpr.grd_log_evidence) else: for i in range(budget): samples[i, :] = np.array( select_batch(model, 1, LOCAL_PENALISATION)).reshape(1, -1) yv[i] = np.array(self.gpr.sample(samples[i, :])).reshape(1, -1) model.update(samples[i, :], yv[i]) gpy_gp.optimize() intv[i] = (model.integral_mean())[0] log_intv[i] = np.log(intv[i]) if i % 100 == 0: self.plot_iterations(i, log_intv, true_val=self.gpr.grd_log_evidence) # gpy_gp.plot() plt.show() return yv[-1], intv[-1]
def wsabi(gpy_gp: GPy.core.GP, x, kern=None, noise=None): """Initial grid sampling, followed by WSABI quadrature""" from ratio.posterior_mc_inference import PosteriorMCSampler budget = 50 x = np.array(x).reshape(1, 1) #sampler = PosteriorMCSampler(gpy_gp) #log_params = np.log(sampler.hmc(num_iters=budget, mode='gpy')) #print(log_params) log_params = np.empty((budget, 3)) #for i in range(budget): # log_params[i, :] = scipy.stats.multivariate_normal.rvs(mean=np.array([0, 0, 0]), cov=4*np.eye(3)) log_params = np.mgrid[0:4.1:1, 0:4.1:1, -4:-0.1:1.5].reshape(3, -1).T budget = log_params.shape[0] if kern is None: kern = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.) if noise is None: noise = 1e-3 prior = Gaussian(mean=np.array([0, 0, 0]), covariance=4 * np.eye(3)) log_phis = np.empty((budget, 3)) log_liks = np.empty((budget, )) pred_means = np.empty((budget, )) pred_vars = np.empty((budget, )) for i in range(log_params.shape[0]): log_phi = log_params[i, :] _set_model(gpy_gp, np.exp(log_phi)) log_lik = gpy_gp.log_likelihood() #print('params', log_phi, log_lik) log_phis[i] = log_phi log_liks[i] = log_lik pred_means[i], pred_vars[i] = gpy_gp.predict_noiseless(x) if np.max(log_liks) > 15.: # For highly peaked likelihoods, we do not use quadrature and simply use MAP estimate idx = log_liks.argmax() return pred_means[idx], pred_vars[idx], kern, noise r_gp = GPy.models.GPRegression( log_params[:1, :], np.sqrt(2 * np.exp(log_liks[0])).reshape(1, -1), kern) r_gp.Gaussian_noise.variance = noise r_model = WarpedIntegrandModel(WsabiLGP(r_gp), prior) r_model.update(log_phis[1:, :], np.exp(log_liks[1:]).reshape(1, -1)) #print(r_gp.X, r_gp.Y) #from IPython.display import display #display(r_gp) r_gp.optimize() r_int = r_model.integral_mean()[0] q_min = np.min(pred_means) pred_means -= q_min rq = np.exp(log_liks) * pred_means rq_gp = GPy.models.GPRegression(log_phis[:1, :], np.sqrt(2 * rq[0]).reshape(1, -1), kern) rq_model = WarpedIntegrandModel((WsabiLGP(rq_gp)), prior) rq_model.update(log_phis[1:, :], rq[1:].reshape(1, -1)) rq_gp.optimize() rq_int = rq_model.integral_mean()[0] + q_min * r_int rvar = np.exp(log_liks) * pred_vars rvar_gp = GPy.models.GPRegression(log_phis[:1, :], np.sqrt(2 * rvar[0]).reshape(1, -1), kern) rvar_model = WarpedIntegrandModel((WsabiLGP(rvar_gp)), prior) rvar_model.update(log_phis[1:, :], rvar[1:].reshape(1, -1)) rvar_gp.optimize() rvar_int = rvar_model.integral_mean()[0] return rq_int / r_int, rvar_int / r_int, r_gp.kern, r_gp.Gaussian_noise.variance