def param_maximum_likelihood(gpy_gp: GPy.models.GPRegression, test_model: bool = True, test_X: np.ndarray = None, test_Y: np.ndarray = None, variance_prior: GPy.priors.Prior = None, lengthscale_prior: GPy.priors.Prior = None, noise_prior: GPy.priors.Prior = None, fix_noise_params=None): """ Find the maximum likelihood estimates of the hyperparameters. The last element in the vector returned is the Gaussian noise hyperparameter that we are interested in. The hyperparameters correspond to $ \theta^* = argmin(-log(P(Y|\theta))) $ If prior_mean and prior_var arguments (the parameter prior and covariance, assuming a Gaussian parameter prior $p(\theta)$ are provided, the optimised hyperparameters corrspond to the MAP-II estimate: $ \theta^* = argmin(-log(P(Y|/theta) - log(P(\theta)) $ :param gpy_gp: An initialised GPy GPRegression object :param test_model: toggle whether to display the information of the fitted GPRegression model :return: The vector of hyperparameters found from maximum likelihood estimate """ if test_model and (test_X is None or test_Y is None): raise ValueError() if variance_prior is not None: gpy_gp.kern.variance.set_prior(variance_prior) if lengthscale_prior is not None: gpy_gp.kern.lengthscale.set_prior(lengthscale_prior) if noise_prior is not None: gpy_gp.Gaussian_noise.variance.set_prior(noise_prior) if fix_noise_params is not None: gpy_gp.Gaussian_noise[:] = fix_noise_params gpy_gp.Gaussian_noise.variance.fix() start = time.time() gpy_gp.optimize(messages=True, max_iters=MLE_optimisation_iteration) gpy_gp.optimize_restarts(num_restarts=MLE_optimisation_restart) res = [ gpy_gp.kern.variance, gpy_gp.kern.lengthscale, gpy_gp.Gaussian_noise.variance ] end = time.time() rmse, ll = np.nan, np.nan if test_model: if noise_prior is not None: model = 'MAP' else: model = 'MLE' print("----------------- Testing " + model + " -------------------") print("Fix noise hyperparameter ?", fix_noise_params) print("Clock time: ", end - start) display(gpy_gp) rmse, ll = test_gp( gpy_gp, test_X, test_Y, ) return gpy_gp, res, rmse, ll, end - start
def create_slice_1d(i: int, experiment: bopt.Experiment, resolution: int, n_dims: int, x_slice: List[float], model: GPy.models.GPRegression, sample: bopt.Sample, show_marginal: int) -> Slice1D: param = experiment.hyperparameters[i] grid = param.range.grid(resolution) X_plot = np.zeros([resolution, n_dims], dtype=np.float32) for dim in range(n_dims): if dim == i: X_plot[:, dim] = grid else: X_plot[:, dim] = np.full([resolution], x_slice[dim], dtype=np.float32) X_plot_marginal = grid.reshape(-1, 1) if show_marginal == 1: others = experiment.predictive_samples_before(sample) X_m, Y_m = bopt.SampleCollection(others).to_xy() X_m = X_m[:, i].reshape(-1, 1) model = create_gp_for_data(experiment, [param], X_m, Y_m) mu, var = model.predict(X_plot_marginal) else: mu, var = model.predict(X_plot) mu = mu.reshape(-1) sigma = np.sqrt(var).reshape(-1) acq = bopt.ExpectedImprovement().raw_call(mu, sigma, model.Y.max())\ .reshape(-1) other_samples: Dict[str, List[float]] = defaultdict(list) for other in experiment.predictive_samples_before(sample) + [sample]: other_x, other_y = other.to_xy() other_x = float(other_x.tolist()[i]) if param.range.is_logscale(): other_x = 10.0**other_x other_samples["x"].append(other_x) other_samples["y"].append(other_y) if param.range.is_logscale(): x_slice_at = 10.0**x_slice[i] x = 10.0**grid else: x_slice_at = x_slice[i] x = grid return Slice1D(param, x.tolist(), x_slice_at, mu.tolist(), sigma.tolist(), acq.tolist(), other_samples, model)
def predict(model: GPy.models.GPRegression, X: np.array) -> Tuple[np.array, np.array]: """Wrapper function for the prediction method of a GPy regression model. It return the standard deviation instead of the variance""" assert isinstance( model, GPy.models.GPRegression ), "This wrapper function is written for GPy.models.GPRegression" mu, var = model.predict(X) return mu, np.sqrt(var)
def param_hmc(gpy_gp: GPy.models.GPRegression, test_model: bool = True, variance_prior: GPy.priors.Prior = None, lengthscale_prior: GPy.priors.Prior = None, noise_prior: GPy.priors.Prior = None, test_X: np.ndarray = None, test_Y: np.ndarray = None, plot_distributions: bool = False): """ Compute the posterior distribution of the parameters using hybrid Monte Carlo, and then set the hyperparameters to the mean of each term. This is a more Bayesian approach over MLE-II or MAP-II. :param gpy_gp: :param test_model: :param test_X: :param test_Y: :return: """ if test_model and (test_X is None or test_Y is None): raise ValueError() if variance_prior is not None: gpy_gp.kern.variance.set_prior(variance_prior) if lengthscale_prior is not None: gpy_gp.kern.lengthscale.set_prior(lengthscale_prior) if noise_prior is not None: gpy_gp.Gaussian_noise.variance.set_prior(noise_prior) display(gpy_gp) start = time.time() hmc = GPy.inference.mcmc.HMC(gpy_gp, stepsize=5e-2) t = hmc.sample(num_samples=MCMC_samples) if plot_distributions: df = pd.DataFrame(t, columns=gpy_gp.parameter_names_flat()) ax = sns.distplot( df.iloc[:, -1], color='r', ) plt.show() samples = t[MCMC_burn_in:, :] # gpy_gp.kern.variance[:] = samples[:, -2].mean() # gpy_gp.kern.lengthscale[:] = samples[:, :-2].mean() gpy_gp.Gaussian_noise.variance[:] = samples[:, -1].mean() res = [ gpy_gp.kern.variance, gpy_gp.kern.lengthscale, gpy_gp.Gaussian_noise.variance ] end = time.time() rmse, ll = np.nan, np.nan if test_model: print("----------------- Testing HMC -------------------") display(gpy_gp) print("Clock time: ", end - start) rmse, ll = test_gp( gpy_gp, test_X, test_Y, ) return gpy_gp, res, rmse, ll, end - start
def test_gp(gpy_gp: GPy.models.GPRegression, data_X: np.ndarray, data_Y: np.ndarray, display_model: bool = False) -> float: """ Evaluate the goodness of the model fit by RMSE value :param gpy_gp: the GPy regression model :param data_X: the query data points. Typically of x of validation data-set :param data_Y: the labeels. Typically of the y of validation data-set :param display_model: bool :return: """ assert data_X.shape[0] == data_Y.shape[ 0], "Lengths of x and labels mismatch" assert data_X.shape[ 1] == gpy_gp.input_dim, "Dimension of x and the model dimension mismatch" mean_pred, var_pred = gpy_gp.predict(Xnew=data_X) rmse = np.sqrt(((mean_pred - np.squeeze(data_Y, -1))**2).mean()) if display_model is True: print("Root Mean Squared Error (RMSE): ", str(rmse)) return rmse
def test_gp(gpy_gp: GPy.models.GPRegression, data_X: np.ndarray, data_Y: np.ndarray, display_model: bool = False): """ Evaluate the goodness of the model fit by RMSE value :param gpy_gp: the GPy regression model :param data_X: the query data points. Typically of x of validation data-set :param data_Y: the labeels. Typically of the y of validation data-set :param display_model: bool :return: """ assert data_X.shape[0] == data_Y.shape[ 0], "Lengths of x and labels mismatch" assert data_X.shape[ 1] == gpy_gp.input_dim, "Dimension of x and the model dimensions mismatch" data_Y = np.squeeze(data_Y, -1) mean_pred, var_pred = gpy_gp.predict_noiseless(Xnew=data_X) n_test = mean_pred.shape[0] rmse = np.sqrt(mean_squared_error(data_Y, mean_pred)) ll = 0. for i in range(n_test): ll += norm.logpdf(data_Y[i], loc=mean_pred[i], scale=np.sqrt(var_pred[i])) print("Root Mean Squared Error (RMSE) for Testing: ", str(rmse)) print("Log-likelihood (LL): ", str(ll)) if display_model is True: plt.plot(mean_pred.reshape(-1), marker=".", color="red", label='Prediction') plt.plot(np.squeeze(data_Y), marker=".", color='blue', label='Ground Truth') plt.legend() plt.show() return rmse, ll
def create_slice_2d(i: int, j: int, experiment: bopt.Experiment, resolution: int, n_dims: int, x_slice: List[float], model: GPy.models.GPRegression, sample: bopt.Sample, show_marginal: int) -> Slice2D: p1 = experiment.hyperparameters[i] p2 = experiment.hyperparameters[j] d1 = p1.range.grid(resolution) d2 = p2.range.grid(resolution) g1, g2 = np.meshgrid(d1, d2) gs = [0.0] * len(x_slice) gs[i] = g1 gs[j] = g2 for dim in range(len(x_slice)): if dim not in [i, j]: gs[dim] = np.full(g1.shape, x_slice[dim]) grid = np.stack(gs, axis=-1) X_pred = grid.reshape(resolution * resolution, -1) if show_marginal == 1: others = experiment.predictive_samples_before(sample) X_m, Y_m = bopt.SampleCollection(others).to_xy() X_m = X_m[:, [i, j]].reshape(-1, 2) model = create_gp_for_data(experiment, [p1, p2], X_m, Y_m) mu, var = model.predict(X_pred[:, [i, j]]) else: mu, var = model.predict(X_pred) mu = mu.reshape(-1) sigma = np.sqrt(var).reshape(-1) acq = bopt.ExpectedImprovement().raw_call(mu, sigma, model.Y.max())\ .reshape(-1) mu = mu.reshape(resolution, resolution) sigma = sigma.reshape(resolution, resolution) acq = acq.reshape(resolution, resolution) other_samples: Dict[str, List[float]] = defaultdict(list) for other in experiment.predictive_samples_before(sample) + [sample]: other_x, other_y = other.to_xy() other_x1 = float(other_x.tolist()[i]) other_x2 = float(other_x.tolist()[j]) if p1.range.is_logscale(): other_x1 = 10.0**other_x1 if p2.range.is_logscale(): other_x2 = 10.0**other_x2 other_samples["x1"].append(other_x1) other_samples["x2"].append(other_x2) other_samples["y"].append(other_y) if p1.range.is_logscale(): x1 = (10.0**d1).tolist() x1_slice_at = 10.0**x_slice[i] else: x1 = d1.tolist() x1_slice_at = x_slice[i] if p2.range.is_logscale(): x2 = (10.0**d2).tolist() x2_slice_at = 10.0**x_slice[j] else: x2 = d2.tolist() x2_slice_at = x_slice[j] return Slice2D(p1, p2, x1, x2, x1_slice_at, x2_slice_at, mu.tolist(), other_samples, model)
def lin_shrinkage(gpy_gp: GPy.models.GPRegression, test_model: bool = True, test_X: np.ndarray = None, test_Y: np.ndarray = None, c: float = 1e-6, plot_stuff=False): """ Use linear shrinkage method to estimate the Gaussian noise hyperparameter :param gpy_gp: :param test_model: :return: """ start = time.time() train_x = gpy_gp.X priors = GPy.priors.LogGaussian(mu=0., sigma=2.) gpy_gp.kern.variance.set_prior(priors) gpy_gp.kern.lengthscale.set_prior(priors) i, j = 0, 0 metrics = np.empty((5, 3 + gpy_gp.input_dim)) kern_dim = 1 + gpy_gp.input_dim gpy_gp.Gaussian_noise.fix() lml = -np.inf while j < 1: i = 0 theta = np.exp( multivariate_normal(mean=np.zeros((kern_dim, )), cov=4 * np.eye(kern_dim)).rvs()) while i < 1000: gpy_gp.optimize(start=theta, max_iters=1) K = gpy_gp.kern.K(train_x) eig = np.linalg.eigvals(K).real eig = np.sort(eig)[::-1] delta_lambda = np.diff(eig) try: n_outlier = delta_lambda[np.abs(delta_lambda) > c].argmax() except ValueError: n_outlier = 0 eig_bulk = eig[n_outlier:] # bm = np.sum(eig_bulk) / eig_bulk.shape[0] lambda_b = np.median(eig_bulk) alpha = 1. / (1. + lambda_b) gpy_gp.Gaussian_noise.variance[:] = (1 - alpha) gpy_gp.kern.variance[:] *= alpha theta = gpy_gp.param_array[:-1] # print(i, lml, lambda_b) # optimise the rest of the hyperparameters lml = gpy_gp.log_likelihood() i += 1 if plot_stuff: n_eig = eig.shape[0] plt.subplot(211) plt.axvline(int(len(eig) / 2)) plt.plot(np.log(eig), marker='.', linestyle='None') plt.subplot(212) plt.hist(eig[int(len(eig) / 5):], bins=80) plt.axvline(lambda_b) plt.show() print('LML After Optimisation Restart', gpy_gp.log_likelihood()) print('Eig lambda_b/avg', lambda_b) metrics[j, :-1] = gpy_gp.param_array metrics[j, -1] = lml j += 1 # print(metrics) best_iter = metrics[:, -1].argmax() gpy_gp.kern.variance[:] = metrics[best_iter, 0] gpy_gp.kern.lengthscale[:] = metrics[best_iter, 1:-2] gpy_gp.Gaussian_noise.variance[:] = metrics[best_iter, -2] # print(metrics) final_params = metrics[best_iter, :-1] end = time.time() rmse, ll = np.nan, np.nan if test_model: print("----------------- Testing Linear Shrinkage -------------------") display(gpy_gp) print("Clock time: ", end - start) rmse, ll = test_gp( gpy_gp, test_X, test_Y, ) gpy_gp.Gaussian_noise.unfix() gpy_gp.unset_priors() return gpy_gp, final_params, rmse, ll, end - start
def param_thikonov(gpy_gp: GPy.models.GPRegression, test_model: bool = True, test_X: np.ndarray = None, test_Y: np.ndarray = None, c: float = 1e-3, plot_stuff=False, save_stuff=True): """ Use Thikonov Regularisation method to estimate the Gaussian noise hyperparameter 1. We compute the eigenvalue-eigenvector decomposition of the K matrix 2. Compute the number of outliers in the eigenspectrum 3. Estimate the bulk mean of the eigenvalues - this will be used as the noise variance :param gpy_gp: :param test_model: :return: """ start = time.time() train_x = gpy_gp.X priors = GPy.priors.LogGaussian(mu=0., sigma=2.) gpy_gp.kern.variance.set_prior(priors) gpy_gp.kern.lengthscale.set_prior(priors) i, j = 0, 0 metrics = np.empty((10, 3 + gpy_gp.input_dim)) kern_dim = 1 + gpy_gp.input_dim gpy_gp.Gaussian_noise.fix() lml = -np.inf C = np.zeros((10, 25)) while j < 10: i = 0 theta = np.exp( multivariate_normal(mean=np.zeros((kern_dim, )), cov=4 * np.eye(kern_dim)).rvs()) while i < 25: gpy_gp.optimize(start=theta, max_iters=20) K = gpy_gp.kern.K(train_x) eig = np.linalg.eigvals(K).real eig = np.sort(eig)[::-1] sigma = np.std(K) delta_lambda = np.empty(eig.shape[0] - 1) for k in range(1, eig.shape[0]): delta_lambda[k - 1] = (eig[k] - eig[k - 1]) / eig[0] try: n_outlier = delta_lambda[np.abs(delta_lambda) > c].argmax() except ValueError: n_outlier = 0 eig_bulk = eig[n_outlier:] # eig_bulk = eig[eig <= eig_max] # bm = np.sum(eig_bulk) / eig_bulk.shape[0] med = np.median(eig_bulk) # mean = np.median(eig) gpy_gp.Gaussian_noise.variance[:] = med theta = gpy_gp.param_array[:-1] # print(i, lml, med) # optimise the rest of the hyperparameters lml = gpy_gp.log_likelihood() if save_stuff: C[j, i] = med if i % 10 == 0: #np.savetxt('output/7Mar/covariance_matrix_'+str(j)+'_'+str(i)+'.txt', K) #np.savetxt('output/7Mar/median_bulk_'+str(j)+'_'+str(i)+'.txt', med.reshape(1, 1)) pass i += 1 if plot_stuff: plt.subplot(211) plt.axvline(int(len(eig) / 2)) plt.plot(np.log(eig), marker='.', linestyle='None') plt.subplot(212) plt.hist(eig[int(len(eig) / 5):], bins=80) plt.axvline(med) plt.show() print('LML After Optimisation Restart', gpy_gp.log_likelihood(), j) print('Eig med/avg', med) metrics[j, :-1] = gpy_gp.param_array metrics[j, -1] = lml j = j + 1 # print(metrics) best_iter = metrics[:, -1].argmax() gpy_gp.kern.variance[:] = metrics[best_iter, 0] gpy_gp.kern.lengthscale[:] = metrics[best_iter, 1:-2] gpy_gp.Gaussian_noise.variance[:] = metrics[best_iter, -2] # print(metrics) np.savetxt('output/7Mar/beta_.txt', C) final_params = metrics[best_iter, :-1] end = time.time() rmse, ll = np.nan, np.nan if test_model: print("----------------- Testing Thikonov -------------------") display(gpy_gp) print("Clock time: ", end - start) rmse, ll = test_gp( gpy_gp, test_X, test_Y, ) gpy_gp.Gaussian_noise.unfix() gpy_gp.unset_priors() return gpy_gp, final_params, rmse, ll, end - start