def test_cglb_predict(): """ Test that 1.) The predict method returns the same variance estimate as SGPR. 2.) The predict method returns the same mean as SGPR for v=0. 3.) The predict method returns a mean very similar to GPR when CG is run to low tolerance. """ rng: np.random.RandomState = np.random.RandomState(999) train, z, xs = data(rng) noise = 0.2 gpr = GPR(train, kernel=SquaredExponential(), noise_variance=noise) sgpr = SGPR(train, kernel=SquaredExponential(), inducing_variable=z, noise_variance=noise) cglb = CGLB( train, kernel=SquaredExponential(), inducing_variable=z, noise_variance=noise, ) gpr_mean, _ = gpr.predict_y(xs, full_cov=False) sgpr_mean, sgpr_cov = sgpr.predict_y(xs, full_cov=False) cglb_mean, cglb_cov = cglb.predict_y( xs, full_cov=False, cg_tolerance=1e6) # set tolerance high so v stays at 0. assert np.allclose(sgpr_cov, cglb_cov) assert np.allclose(sgpr_mean, cglb_mean) cglb_mean, _ = cglb.predict_y(xs, full_cov=False, cg_tolerance=1e-12) assert np.allclose(gpr_mean, cglb_mean)
def compute_residual_eig(Y: np.ndarray, Kx: np.ndarray) -> np.ndarray: """Residual of Y based on Kx, a kernel matrix of X""" assert len(Y) == len(Kx) eig_Kx, eix = truncated_eigen(*eigdec(Kx, min(100, len(Kx) // 4))) phi_X = eix @ np.diag(np.sqrt(eig_Kx)) # X @ X.T is close to K_X n_feats = phi_X.shape[1] linear_kernel = Linear(n_feats, ARD=True) gp_model = GPR(phi_X, Y, linear_kernel + White(n_feats)) gp_model.optimize() new_Kx = linear_kernel.compute_K_symm(phi_X) sigma_squared = gp_model.kern.white.variance.value[0] return (pdinv(np.eye(len(Kx)) + new_Kx / sigma_squared) @ Y).squeeze()
def residual_kernel(K_Y: np.ndarray, K_X: np.ndarray, use_expectation=True, with_gp=True, sigma_squared=1e-3, return_learned_K_X=False): """Kernel matrix of residual of Y given X based on their kernel matrices, Y=f(X)""" import gpflow from gpflow.kernels import White, Linear from gpflow.models import GPR K_Y, K_X = centering(K_Y), centering(K_X) T = len(K_Y) if with_gp: eig_Ky, eiy = truncated_eigen(*eigdec(K_Y, min(100, T // 4))) eig_Kx, eix = truncated_eigen(*eigdec(K_X, min(100, T // 4))) X = eix @ diag(sqrt(eig_Kx)) # X @ X.T is close to K_X Y = eiy @ diag(sqrt(eig_Ky)) n_feats = X.shape[1] linear = Linear(n_feats, ARD=True) white = White(n_feats) gp_model = GPR(X, Y, linear + white) gpflow.train.ScipyOptimizer().minimize(gp_model) K_X = linear.compute_K_symm(X) sigma_squared = white.variance.value P = pdinv(np.eye(T) + K_X / sigma_squared) # == I-K @ inv(K+Sigma) in Zhang et al. 2011 if use_expectation: # Flaxman et al. 2016 Gaussian Processes for Independence Tests with Non-iid Data in Causal Inference. RK = (K_X + P @ K_Y) @ P else: # Zhang et al. 2011. Kernel-based Conditional Independence Test and Application in Causal Discovery. RK = P @ K_Y @ P if return_learned_K_X: return RK, K_X else: return RK
def residualize(Y, X=None, gp_kernel=None): """Residual of Y given X. Y_i - E[Y_i|X_i]""" import gpflow from gpflow.models import GPR if X is None: return Y - np.mean(Y) # nothing is residualized! if gp_kernel is None: gp_kernel = default_gp_kernel(X) m = GPR(X, Y, gp_kernel) gpflow.train.ScipyOptimizer().minimize(m) Yhat, _ = m.predict_y(X) return Y - Yhat
def regression_distance_k(Kx: np.ndarray, Ky: np.ndarray): warnings.warn('not tested yet!') import gpflow from gpflow.kernels import White, Linear from gpflow.models import GPR T = len(Kx) eig_Ky, eiy = truncated_eigen(*eigdec(Ky, min(100, T // 4))) eig_Kx, eix = truncated_eigen(*eigdec(Kx, min(100, T // 4))) X = eix @ diag(sqrt(eig_Kx)) # X @ X.T is close to K_X Y = eiy @ diag(sqrt(eig_Ky)) n_feats = X.shape[1] linear = Linear(n_feats, ARD=True) white = White(n_feats) gp_model = GPR(X, Y, linear + white) gpflow.train.ScipyOptimizer().minimize(gp_model) Kx = linear.compute_K_symm(X) sigma_squared = white.variance.value P = Kx @ pdinv(Kx + sigma_squared * np.eye(T)) M = P @ Ky @ P O = np.ones((T, 1)) N = O @ np.diag(M).T D = np.sqrt(N + N.T - 2 * M) return D
def _test_cg_gpr(config: ConfigDense, model: GPR, Xnew: tf.Tensor) -> tf.Tensor: """ Sample generation subroutine common to each unit test """ # Prepare preconditioner for CG X, y = model.data Kff = model.kernel(X, full_cov=True) max_rank = config.num_cond//(2 if config.num_cond > 1 else 1) preconditioner = get_default_preconditioner(Kff, diag=model.likelihood.variance, max_rank=max_rank) count = 0 L_joint = None samples = [] while count < config.num_samples: # Sample $u ~ N(q_mu, q_sqrt q_sqrt^{T})$ size = min(config.shard_size, config.num_samples - count) # Generate draws from the joint distribution $p(f(X), f(Xnew))$ (f, fnew), L_joint = common.sample_joint(model.kernel, X, Xnew, num_samples=size, L=L_joint) # Solve for update functions update_fns = cg_update(model.kernel, X, y, f + model.mean_function(X), tol=1e-6, diag=model.likelihood.variance, max_iter=config.num_cond, preconditioner=preconditioner) samples.append(fnew + update_fns(Xnew)) count += size samples = tf.concat(samples, axis=0) if model.mean_function is not None: samples += model.mean_function(Xnew) return samples
def _test_exact_gpr(config: ConfigDense, model: GPR, Xnew: tf.Tensor) -> tf.Tensor: """ Sample generation subroutine common to each unit test """ # Precompute Cholesky factor (optional) X, y = model.data Kyy = model.kernel(X, full_cov=True) Kyy = tf.linalg.set_diag( Kyy, tf.linalg.diag_part(Kyy) + model.likelihood.variance) Lyy = tf.linalg.cholesky(Kyy) count = 0 L_joint = None samples = [] while count < config.num_samples: # Sample $u ~ N(q_mu, q_sqrt q_sqrt^{T})$ size = min(config.shard_size, config.num_samples - count) # Generate draws from the joint distribution $p(f(X), f(Xnew))$ (f, fnew), L_joint = common.sample_joint(model.kernel, X, Xnew, num_samples=size, L=L_joint) # Solve for update functions update_fns = exact_update(model.kernel, X, y, f + model.mean_function(X), L=Lyy, diag=model.likelihood.variance) samples.append(fnew + update_fns(Xnew)) count += size samples = tf.concat(samples, axis=0) if model.mean_function is not None: samples += model.mean_function(Xnew) return samples
def main(config): assert config is not None, ValueError tf.random.set_seed(config.seed) gpflow_config.set_default_float(config.floatx) gpflow_config.set_default_jitter(config.jitter) X = tf.random.uniform([config.num_cond, config.input_dims], dtype=floatx()) Xnew = tf.random.uniform([config.num_test, config.input_dims], dtype=floatx()) for cls in SupportedBaseKernels: minval = config.rel_lengthscales_min * (config.input_dims**0.5) maxval = config.rel_lengthscales_max * (config.input_dims**0.5) lenscales = tf.random.uniform(shape=[config.input_dims], minval=minval, maxval=maxval, dtype=floatx()) kern = cls(lengthscales=lenscales, variance=config.kernel_variance) const = tf.random.normal([1], dtype=floatx()) K = kern(X, full_cov=True) K = tf.linalg.set_diag( K, tf.linalg.diag_part(K) + config.noise_variance) L = tf.linalg.cholesky(K) y = L @ tf.random.normal([L.shape[-1], 1], dtype=floatx()) + const model = GPR(kernel=kern, noise_variance=config.noise_variance, data=(X, y), mean_function=mean_functions.Constant(c=const)) mf, Sff = subroutine(config, model, Xnew) mg, Sgg = model.predict_f(Xnew, full_cov=True) tol = config.error_tol assert allclose(mf, mg, tol, tol) assert allclose(Sff, Sgg, tol, tol)
def compute_analytic_GP_predictions(X, y, kernel, noise_variance, X_star): """ Identify the mean and covariance of an analytic GPR posterior for test point locations. :param X: The train point locations, with a shape of [N x D]. :param y: The train targets, with a shape of [N x 1]. :param kernel: The kernel object. :param noise_variance: The variance of the observation model. :param X_star: The test point locations, with a shape of [N* x D]. :return: The mean and covariance of the noise-free predictions, with a shape of [N*] and [N* x N*] respectively. """ gpr_model = GPR(data=(X, y), kernel=kernel, noise_variance=noise_variance) f_mean, f_var = gpr_model.predict_f(X_star, full_cov=True) f_mean, f_var = f_mean[..., 0], f_var[0] assert f_mean.shape == (X_star.shape[0], ) assert f_var.shape == (X_star.shape[0], X_star.shape[0]) return f_mean, f_var
def test_single_layer(self): kern = RBF(1, lengthscales=0.1) layers = init_layers_linear(self.X, self.Y, self.X, [kern]) lik = Gaussian() lik.variance = self.lik_var last_layer = SGPR_Layer(layers[-1].kern, layers[-1].feature.Z.read_value(), self.D_Y, layers[-1].mean_function) layers = layers[:-1] + [last_layer] m_dgp = DGP_Collapsed(self.X, self.Y, lik, layers) L_dgp = m_dgp.compute_log_likelihood() mean_dgp, var_dgp = m_dgp.predict_f_full_cov(self.Xs, 1) m_exact = GPR(self.X, self.Y, kern) m_exact.likelihood.variance = self.lik_var L_exact = m_exact.compute_log_likelihood() mean_exact, var_exact = m_exact.predict_f_full_cov(self.Xs) assert_allclose(L_dgp, L_exact, atol=1e-5, rtol=1e-5) assert_allclose(mean_dgp[0], mean_exact, atol=1e-5, rtol=1e-5) assert_allclose(var_dgp[0], var_exact, atol=1e-5, rtol=1e-5)
def residual_kernel_matrix_kernel_real(Kx, Z, num_eig, ARD=True): """K_X|Z""" assert len(Kx) == len(Z) assert num_eig <= len(Kx) T = len(Kx) D = Z.shape[1] I = eye(T) eig_Kx, eix = truncated_eigen(*eigdec(Kx, num_eig)) rbf = RBF(D, ARD=ARD) white = White(D) gp_model = GPR(Z, 2 * sqrt(T) * eix @ diag(sqrt(eig_Kx)) / sqrt(eig_Kx[0]), rbf + white) gpflow.train.ScipyOptimizer().minimize(gp_model) sigma_squared = white.variance.value Kz_x = rbf.compute_K_symm(Z) P = I - Kz_x @ pdinv(Kz_x + sigma_squared * I) return P @ Kx @ P.T
def test_vs_single_layer(self): lik = Gaussian() lik_var = 0.01 lik.variance = lik_var N, Ns, D_Y, D_X = self.X.shape[0], self.Xs.shape[ 0], self.D_Y, self.X.shape[1] Y = np.random.randn(N, D_Y) Ys = np.random.randn(Ns, D_Y) kern = Matern52(self.X.shape[1], lengthscales=0.5) # mf = Linear(A=np.random.randn(D_X, D_Y), b=np.random.randn(D_Y)) mf = Zero() m_gpr = GPR(self.X, Y, kern, mean_function=mf) m_gpr.likelihood.variance = lik_var mean_gpr, var_gpr = m_gpr.predict_y(self.Xs) test_lik_gpr = m_gpr.predict_density(self.Xs, Ys) pred_m_gpr, pred_v_gpr = m_gpr.predict_f(self.Xs) pred_mfull_gpr, pred_vfull_gpr = m_gpr.predict_f_full_cov(self.Xs) kerns = [] kerns.append( Matern52(self.X.shape[1], lengthscales=0.5, variance=1e-1)) kerns.append(kern) layer0 = GPMC_Layer(kerns[0], self.X.copy(), D_X, Identity()) layer1 = GPR_Layer(kerns[1], mf, D_Y) m_dgp = DGP_Heinonen(self.X, Y, lik, [layer0, layer1]) mean_dgp, var_dgp = m_dgp.predict_y(self.Xs, 1) test_lik_dgp = m_dgp.predict_density(self.Xs, Ys, 1) pred_m_dgp, pred_v_dgp = m_dgp.predict_f(self.Xs, 1) pred_mfull_dgp, pred_vfull_dgp = m_dgp.predict_f_full_cov( self.Xs, 1) tol = 1e-4 assert_allclose(mean_dgp[0], mean_gpr, atol=tol, rtol=tol) assert_allclose(test_lik_dgp, test_lik_gpr, atol=tol, rtol=tol) assert_allclose(pred_m_dgp[0], pred_m_gpr, atol=tol, rtol=tol) assert_allclose(pred_mfull_dgp[0], pred_mfull_gpr, atol=tol, rtol=tol) assert_allclose(pred_vfull_dgp[0], pred_vfull_gpr, atol=tol, rtol=tol)
def get_model(model_enum, data, noise_variance, covariance_function, max_parallel=10000): if not isinstance(model_enum, ModelEnum): model_enum = ModelEnum(model_enum) if model_enum == ModelEnum.GP: gp_model = GPR(data, covariance_function, None, noise_variance) elif model_enum == ModelEnum.SSGP: gp_model = StateSpaceGP(data, covariance_function, noise_variance, parallel=False) elif model_enum == ModelEnum.PSSGP: gp_model = StateSpaceGP(data, covariance_function, noise_variance, parallel=True, max_parallel=max_parallel) else: raise ValueError("model not supported") return gp_model
def generate_gp_models( model_or_kernel: Union[GPModel, Kernel], data_list: List[RegressionData] ): """ Generates a list of GPModel objects with the same length as data_list. If a GPModel object was passed, the list will consist of deep copies of the GPModel, with the data reassigned. If a Kernel was passed, the list will consist of GPR (all containing the Kernel) instead. :param model_or_kernel: GPModel or Kernel object used to generate the list of models :param data_list: List of RegressionData. Each model will get one element. :return: """ assert isinstance(model_or_kernel, (Kernel, GPModel)), \ "The regression_source object needs to be an instance of either a Kernel or a GPModel, " assert all(map(lambda data: type(data) is tuple and len(data) is 2, data_list)), \ "data_list should be a list of tuples of length 2 (i.e. a list of RegressionData)" is_kernel = isinstance(model_or_kernel, Kernel) models = list() for data in data_list: # Ensures both the InputData and OutputData are in a format usable by tensorflow data = tuple(map(util.ensure_tf_matrix, data)) if is_kernel: # Appends a GPR object to the list of models if a Kernel was passed instead of a GPModel models.append(GPR(data, model_or_kernel)) else: # Appends a deepcopy of the passed GPModel to the list of models model = gf.utilities.deepcopy(model_or_kernel) model.data = data models.append(model) return models
def regression_distance(Y: np.ndarray, Z: np.ndarray, ard=True): """d(z,z') = |f(z)-f(z')| where Y=f(Z) + noise and f ~ GP""" import gpflow from gpflow.kernels import White, RBF from gpflow.models import GPR n, dims = Z.shape rbf = RBF(dims, ARD=ard) rbf_white = rbf + White(dims) gp_model = GPR(Z, Y, rbf_white) gpflow.train.ScipyOptimizer().minimize(gp_model) Kz_y = rbf.compute_K_symm(Z) Ry = pdinv(rbf_white.compute_K_symm(Z)) Fy = Y.T @ Ry @ Kz_y # F(z) M = Fy.T @ Fy O = np.ones((n, 1)) N = O @ (np.diag(M)[:, None]).T D = np.sqrt(N + N.T - 2 * M) return D, Kz_y
data = (x, y) inducing_variable = tf.random.uniform((M, D)) adam_learning_rate = 0.01 iterations = ci_niter(5) # %% [markdown] # ### VGP is a GPR # %% [markdown] # The following section demonstrates how natural gradients can turn VGP into GPR *in a single step, if the likelihood is Gaussian*. # %% [markdown] # Let's start by first creating a standard GPR model with Gaussian likelihood: # %% gpr = GPR(data, kernel=gpflow.kernels.Matern52()) # %% [markdown] # The log marginal likelihood of the exact GP model is: # %% gpr.log_marginal_likelihood().numpy() # %% [markdown] # Now we will create an approximate model which approximates the true posterior via a variational Gaussian distribution.<br>We initialize the distribution to be zero mean and unit variance. # %% vgp = VGP(data, kernel=gpflow.kernels.Matern52(), likelihood=gpflow.likelihoods.Gaussian())
def _gpr(x: tf.Tensor, y: tf.Tensor) -> GPR: return GPR((x, y), gpflow.kernels.Linear())
# %% [markdown] # The CGLB model introduces less bias in comparison to SGPR model. # We can show empirically that CGLB has a lower bias by plotting the objective landscape with respect to different values of the lengthscale hyperparameters. # %% x, y = data n = x.shape[0] m = 10 iv_indices = np.random.choice(range(n), size=m, replace=False) iv = x[iv_indices, :] noise = 0.1 gpr = GPR(data, kernel=SquaredExponential(), noise_variance=noise) cglb = CGLB(data, kernel=SquaredExponential(), noise_variance=noise, inducing_variable=iv) sgpr = SGPR(data, kernel=SquaredExponential(), noise_variance=noise, inducing_variable=iv) def loss_with_changed_parameter(model, parameter, value: float): original = parameter.numpy() parameter.assign(value) loss = model.training_loss() parameter.assign(original)
def build_model(data, mean_function): model = GPR(data, kernel=RBF(), mean_function=mean_function) set_trainable(model.kernel, False) model.likelihood.variance.assign(1e-2) set_trainable(model.likelihood, False) return model
def __init__(self, X, Y, kern): GPR.__init__(self, X, Y, kern)
def __init__(self, *args, paths: AbstractSampler = None, **kwargs): GPR.__init__(self, *args, **kwargs) self._paths = paths