def compute_covariance(x: np.ndarray, kernel: RBF) -> tuple: assert x.ndim <= 2 if x.ndim == 1: x = x.reshape(-1, 1) K_xx = kernel.K(x) K_xx_cho = jitchol(K_xx) cholesky_inv = np.linalg.inv(K_xx_cho) K_xx_inv = cholesky_inv.T @ cholesky_inv return K_xx, K_xx_inv
class ChangepointRBF(Kern): def __init__(self, input_dim, variance1=1., variance2=1., lengthscale1=1., lengthscale2=1., xc=1, active_dims=None): super(ChangepointRBF, self).__init__(input_dim, active_dims, 'chngpt') assert input_dim == 1, "For this kernel we assume input_dim = 1" self.variance1 = Param('variance1', variance1) self.variance2 = Param('variance2', variance2) self.lengthscale1 = Param('lengthscale1', lengthscale1) self.lengthscale2 = Param('lengthscale2', lengthscale2) self.rbf = RBF(input_dim=input_dim, lengthscale=1., variance=1.) self.xc = Param('xc', xc) self.add_parameters(self.variance1, self.variance2, self.lengthscale1, self.lengthscale2, self.xc) def parameters_changed(self): pass def K(self, X, X2): """Covariance matrix""" u1 = self.u(X) a1 = self.a(X) if X2 is None: u2 = u1 a2 = a1 else: u2 = self.u(X2) a2 = self.a(X2) return a1 * a2 * self.rbf.K(X=u1, X2=u2) def Kdiag(self, X): """Diagonal of covariance matrix""" u = self.u(X) a = self.a(X) return a * self.rbf.Kdiag(u) def u(self, X: np.ndarray): """u operation in the paper""" u = np.empty(X.shape) for i in X.shape[0]: if X[i] < self.xc: u[i] = X[i] / self.variance1 else: u[i] = self.xc/self.variance1 + (X[i] - self.xc)/self.variance2 return u def a(self, X: np.ndarray): """a operation in the paper""" a = np.empty(X.shape) for i in X.shape[0]: if X[i] < self.xc: a[i] = self.lengthscale1 else: a[i] = self.lengthscale2 return a
def get_data(kernel_name, variance_value=1.0, n_traces=3, lengthscale=1.0): n_dims = 100 n_frames = 20 #n_traces = 3 x = np.linspace(0, 10, n_dims)[:, np.newaxis] if kernel_name == "RBF": kernel = RBF(input_dim=1, variance=variance_value, lengthscale=lengthscale) elif kernel_name == "Brownian": kernel = Brownian(input_dim=1, variance=variance_value) elif kernel_name == "Matern32": kernel = Matern32(input_dim=1, variance=variance_value) elif kernel_name == "Cosine": kernel = Cosine(input_dim=1, variance=variance_value) elif kernel_name == "Exponential": kernel = Exponential(input_dim=1, variance=variance_value) elif kernel_name == "Linear": kernel = Linear(input_dim=1) elif kernel_name == "GridRBF": kernel = GridRBF(input_dim=1, variance=variance_value) elif kernel_name == "MLP": kernel = MLP(input_dim=1, variance=variance_value) elif kernel_name == "PeriodicMatern32": kernel = PeriodicMatern32(input_dim=1, variance=variance_value) elif kernel_name == "Spline": kernel = Spline(input_dim=1, variance=variance_value) elif kernel_name == "White": kernel = White(input_dim=1, variance=variance_value) elif kernel_name == "StdPeriodic": kernel = StdPeriodic(input_dim=1, variance=variance_value) else: raise ValueError("Unknown Kernel name") kernel_matrix = kernel.K(x, x) gaussian_process_animation = GaussianProcessAnimation(kernel_matrix, n_dims=n_dims, n_frames=n_frames) frames = gaussian_process_animation.get_traces(n_traces) data = np.stack(frames).transpose((2, 0, 1)) return data
from GPy.kern import RBF, Brownian, Cosine import numpy as np from numpy.linalg import eig import matplotlib.pyplot as plt from sklearn.decomposition import KernelPCA kernel = RBF(input_dim=1, variance=2.0) #kernel = Brownian(input_dim=1, variance=2.0) #kernel = Cosine(input_dim=1, variance=2.0) values = [] r = range(10, 300) for n_dims in r: x = np.linspace(0, 10, n_dims)[:, np.newaxis] k = kernel.K(x, x) eigenvalues, eigenvectors = eig(k) first_eigenvalue = np.max(np.abs(eigenvalues)) values.append(first_eigenvalue / n_dims) plt.plot(list(r), values) plt.show()
import matplotlib.pyplot as plt from visualization import plot_gp, model_output N = 50 noise_var = 0.01 X = np.zeros((N, 1)) x_half = int(N / 2) X[:x_half, :] = np.linspace(0, 2, x_half)[:, None] # First cluster of inputs/covariates X[x_half:, :] = np.linspace( 8, 10, x_half)[:, None] # Second cluster of inputs/covariates rbf = RBF(input_dim=1) mu = np.zeros(N) cov = rbf.K(X) + np.eye(N) * np.sqrt(noise_var) y = np.random.multivariate_normal(mu, cov).reshape(-1, 1) # plt.scatter(X, y) # plt.show() gp_regression = GPRegression(X, y) gp_regression.optimize(messages=True) log_likelihood1 = gp_regression.log_likelihood() model_output(gp_regression, title="GP Regression with loglikelihood: " + str(log_likelihood1)) ################################# # inducing variables, u. Each inducing variable has its own associated input index, Z, which lives in the same space as X. Z = np.hstack((np.linspace(2.5, 4., 3), np.linspace(7, 8.5, 3)))[:, None]
class GPQuad(BayesianQuadratureTransform): def __init__(self, dim, unit_sp=None, hypers=None): super(GPQuad, self).__init__(dim, unit_sp, hypers) # GPy RBF kernel with given hypers self.kern = RBF(self.d, variance=self.hypers['sig_var'], lengthscale=self.hypers['lengthscale'], ARD=True) def weights_rbf(self, unit_sp, hypers): # BQ weights for RBF kernel with given hypers, computations adopted from the GP-ADF code [Deisenroth] with # the following assumptions: # (A1) the uncertain input is zero-mean with unit covariance # (A2) one set of hyper-parameters is used for all output dimensions (one GP models all outputs) d, n = unit_sp.shape # GP kernel hyper-parameters alpha, el, jitter = hypers['sig_var'], hypers['lengthscale'], hypers[ 'noise_var'] assert len(el) == d # pre-allocation for convenience eye_d, eye_n = np.eye(d), np.eye(n) iLam1 = np.atleast_2d(np.diag(el**-1)) # sqrt(Lambda^-1) iLam2 = np.atleast_2d(np.diag(el**-2)) inp = unit_sp.T.dot( iLam1 ) # sigmas / el[:, na] (x - m)^T*sqrt(Lambda^-1) # (numSP, xdim) K = np.exp(2 * np.log(alpha) - 0.5 * maha(inp, inp)) iK = cho_solve(cho_factor(K + jitter * eye_n), eye_n) B = iLam2 + eye_d # (D, D) c = alpha**2 / np.sqrt(det(B)) t = inp.dot(inv(B)) # inn*(P + Lambda)^-1 l = np.exp(-0.5 * np.sum(inp * t, 1)) # (N, 1) zet = 2 * np.log(alpha) - 0.5 * np.sum(inp * inp, 1) inp = inp.dot(iLam1) R = 2 * iLam2 + eye_d t = 1 / np.sqrt(det(R)) L = np.exp((zet[:, na] + zet[:, na].T) + maha(inp, -inp, V=0.5 * inv(R))) q = c * l # evaluations of the kernel mean map (from the viewpoint of RHKS methods) # mean weights wm_f = q.dot(iK) iKQ = iK.dot(t * L) # covariance weights wc_f = iKQ.dot(iK) # cross-covariance "weights" wc_fx = np.diag(q).dot(iK) # used for self.D.dot(x - mean).dot(wc_fx).dot(fx) self.D = inv(eye_d + np.diag(el**2)) # S(S+Lam)^-1; for S=I, (I+Lam)^-1 # model variance; to be added to the covariance # this diagonal form assumes independent GP outputs (cov(f^a, f^b) = 0 for all a, b: a neq b) self.model_var = np.diag((alpha**2 - np.trace(iKQ)) * np.ones((d, 1))) return wm_f, wc_f, wc_fx def plot_gp_model(self, f, unit_sp, args, test_range=(-5, 5, 50), plot_dims=(0, 0)): # plot out_dim vs. in_dim in_dim, out_dim = plot_dims # test input must have the same dimension as specified in kernel test = np.linspace(*test_range) test_pts = np.zeros((self.d, len(test))) test_pts[in_dim, :] = test # function value observations at training points (unit sigma-points) y = np.apply_along_axis(f, 0, unit_sp, args) fx = np.apply_along_axis(f, 0, test_pts, args) # function values at test points K = self.kern.K(unit_sp.T) # covariances between sigma-points k = self.kern.K( test_pts.T, unit_sp.T) # covariance between test inputs and sigma-points kxx = self.kern.Kdiag(test_pts.T) # prior predictive variance k_iK = cho_solve(cho_factor(K), k.T).T gp_mean = k_iK.dot(y[out_dim, :]) # GP mean gp_var = np.diag(np.diag(kxx) - k_iK.dot(k.T)) # GP predictive variance # plot the GP mean, predictive variance and the true function plt.figure() plt.plot(test, fx[out_dim, :], color='r', ls='--', lw=2, label='true') plt.plot(test, gp_mean, color='b', ls='-', lw=2, label='GP mean') plt.fill_between(test, gp_mean + 2 * np.sqrt(gp_var), gp_mean - 2 * np.sqrt(gp_var), color='b', alpha=0.25, label='GP variance') plt.plot(unit_sp[in_dim, :], y[out_dim, :], color='k', ls='', marker='o', ms=8, label='data') plt.legend() plt.show() def _weights(self, sigma_points, hypers): return self.weights_rbf(sigma_points, hypers) def _fcn_eval(self, fcn, x, fcn_pars): return np.apply_along_axis(fcn, 0, x, fcn_pars) def _mean(self, weights, fcn_evals): return fcn_evals.dot(weights) def _covariance(self, weights, fcn_evals, mean_out): return fcn_evals.dot(weights).dot(fcn_evals.T) - np.outer( mean_out, mean_out.T) + self.model_var def _cross_covariance(self, weights, fcn_evals, x, mean_out, mean_in): return fcn_evals.dot(weights.T).dot((x - mean_in).T).dot(self.D) def _int_var_rbf(self, X, hyp, jitter=1e-8): """ Posterior integral variance of the Gaussian Process quadrature. X - vector (1, 2*xdim**2+xdim) hyp - kernel hyperparameters [s2, el_1, ... el_d] """ # reshape X to SP matrix X = np.reshape(X, (self.n, self.d)) # set kernel hyper-parameters s2, el = hyp[0], hyp[1:] self.kern.param_array[0] = s2 # variance self.kern.param_array[1:] = el # lengthscale K = self.kern.K(X) L = np.diag(el**2) # posterior variance of the integral ks = s2 * np.sqrt(det(L + np.eye(self.d))) * multivariate_normal( mean=np.zeros(self.d), cov=L).pdf(X) postvar = -ks.dot(solve(K + jitter * np.eye(self.n), ks.T)) return postvar def _int_var_rbf_hyp(self, hyp, X, jitter=1e-8): """ Posterior integral variance as a function of hyper-parameters :param hyp: RBF kernel hyper-parameters [s2, el_1, ..., el_d] :param X: sigma-points :param jitter: numerical jitter (for stabilizing computations) :return: posterior integral variance """ # reshape X to SP matrix X = np.reshape(X, (self.n, self.d)) # set kernel hyper-parameters s2, el = 1, hyp # sig_var hyper always set to 1 self.kern.param_array[0] = s2 # variance self.kern.param_array[1:] = el # lengthscale K = self.kern.K(X) L = np.diag(el**2) # posterior variance of the integral ks = s2 * np.sqrt(det(L + np.eye(self.d))) * multivariate_normal( mean=np.zeros(self.d), cov=L).pdf(X) postvar = s2 * np.sqrt(det(2 * inv(L) + np.eye(self.d)))**-1 - ks.dot( solve(K + jitter * np.eye(self.n), ks.T)) return postvar def _min_var_sigmas(self): # solver options op = {'disp': True} # bounds based on input unit Gaussian (-2*std, +2std) bnds = tuple((-2, 2) for i in range(self.n * self.d)) hyp = np.hstack((self.hypers['sig_var'], self.hypers['lengthscale'])) # unconstrained # res = minimize(self._gpq_postvar, self.X0, method='Nelder-Mead', options=op) # res = minimize(self._gpq_postvar, self.X0, method='SLSQP', bounds=bnds, options=op) res = minimize(self._int_var_rbf, self.unit_sp, args=hyp, method='L-BFGS-B', bounds=bnds, options=op) return res.x def _min_var_hypers(self): """ Finds kernel hyper-parameters minimizing the posterior integral variance. :return: optimized kernel hyper-parameters """ # solver options op = {'disp': True} hyp = self.hypers[ 'lengthscale'] # np.hstack((self.hypers['sig_var'], self.hypers['lengthscale'])) # bounds based on input unit Gaussian (-2*std, +2std) bnds = tuple((1e-3, 1000) for i in range(len(hyp))) # unconstrained # res = minimize(self._gpq_postvar, self.X0, method='Nelder-Mead', options=op) # res = minimize(self._gpq_postvar, self.X0, method='SLSQP', bounds=bnds, options=op) res = minimize(self._int_var_rbf_hyp, hyp, args=self.unit_sp, method='L-BFGS-B', bounds=bnds, options=op) return res.x def _min_logmarglik_hypers(self): # finds hypers by maximizing the marginal likelihood (empirical bayes) # the multiple output dimensions should be reflected in the log marglik pass def _min_intvar_logmarglik_hypers(self): # finds hypers by minimizing the sum of log-marginal likelihood and the integral variance objectives pass
def _compute_mean( prior: Union[Gaussian, Gaussian1D], gp: GP, kernel: RBF, X_D: np.ndarray = None, Y_D: Union[np.ndarray, int] = None, ): """ Compute the mean (i.e. expectation) of the integral :param prior: Prior :param gp: GP :param kernel: type of kernel - for now only the RBF kernel is supported :param X_D: Query points - if this argument is not supplied the evaluated points of the Gaussian process will be used :param Y_D: The functional value at X_D. Note that -1 is a special value. If Y_D is -1 is supplied, we are not interested in finding out the integral expectation but rather only interested in finding the inverse of the covariance matrix and value of vector n_s :return: mean: mean value of the integral, K_xx_inv: inverse of the full covariance matrix,n_s: the vector defined in Equation 7.1.7 in Mike's DPhil dissertation """ from GPy.util.linalg import jitchol # w, h are the lengthscale and variance of the RBF kernel - see Equation 7.1.4 in Mike's DPhil Dissertation w = kernel.lengthscale.values h = kernel.variance.values[0] #print("kerLengthScale: ", kernel.lengthscale.values[0], 'kerVar: ', kernel.variance.values[0]) # print("w: ", w, "h: ", h) if X_D is None: X_D = gp._gpy_gp.X if Y_D is None: Y_D = gp._gpy_gp.Y n, d = X_D.shape # n: number of samples, d: dimensionality of each sample # print("X_D: ", X_D, "Y_D: ", Y_D) if isinstance(prior, Gaussian1D): mu = prior.matrix_mean sigma = prior.matrix_variance else: mu = prior.mean sigma = prior.covariance # Defined in Equations 7.1.7 n_s = np.zeros((n, )) if d == 1: W = float(sigma) + w**2 mu = np.asscalar(mu) for i in range(n): n_s[i] = h * norm._pdf_point_est( X_D[i, :], loc=mu, scale=np.sqrt(W)) else: if len(w) > 1: assert len(w) == d w = np.diag(w) else: w = np.diag(np.array([w] * d)) W = sigma + w for i in range(n): n_s[i] = h * multivariate_normal._pdf_point_est( X_D[i, :], mean=mu, cov=W) K_xx = kernel.K(X_D) # Find the inverse of K_xx matrix via Cholesky decomposition (with jitter) K_xx_cho = jitchol(K_xx, ) choleksy_inverse = np.linalg.inv(K_xx_cho) K_xx_inv = choleksy_inverse.T @ choleksy_inverse if isinstance(Y_D, int) and Y_D == -1: return np.nan, K_xx_inv, n_s else: mean = n_s.T @ K_xx_inv @ Y_D return mean, K_xx_inv, n_s