def predict_f(self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False) -> MeanAndVariance: """ Compute the mean and variance of the latent function at some new points. Note that this is very similar to the SGPR prediction, for which there are notes in the SGPR notebook. Note: This model does not allow full output covariances. :param Xnew: points at which to predict """ if full_output_cov: raise NotImplementedError pX = DiagonalGaussian(self.X_data_mean, self.X_data_var) Y_data = self.data num_inducing = self.inducing_variable.num_inducing psi1 = expectation(pX, (self.kernel, self.inducing_variable)) psi2 = tf.reduce_sum( expectation(pX, (self.kernel, self.inducing_variable), (self.kernel, self.inducing_variable)), axis=0, ) jitter = default_jitter() Kus = covariances.Kuf(self.inducing_variable, self.kernel, Xnew) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) L = tf.linalg.cholesky( covariances.Kuu(self.inducing_variable, self.kernel, jitter=jitter)) A = tf.linalg.triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.linalg.triangular_solve(L, psi2, lower=True) AAT = tf.linalg.triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=default_float()) LB = tf.linalg.cholesky(B) c = tf.linalg.triangular_solve( LB, tf.linalg.matmul(A, Y_data), lower=True) / sigma tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True) tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True) mean = tf.linalg.matmul(tmp2, c, transpose_a=True) if full_cov: var = (self.kernel(Xnew) + tf.linalg.matmul(tmp2, tmp2, transpose_a=True) - tf.linalg.matmul(tmp1, tmp1, transpose_a=True)) shape = tf.stack([1, 1, tf.shape(Y_data)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = (self.kernel(Xnew, full_cov=False) + tf.reduce_sum(tf.square(tmp2), axis=0) - tf.reduce_sum(tf.square(tmp1), axis=0)) shape = tf.stack([1, tf.shape(Y_data)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def __call__(self, Xnew, full_cov=False, full_output_cov=False): q_mu = self.q_mu # M x K x O q_sqrt = self.q_sqrt # K x O x M x M Kuu = covariances.Kuu(self.inducing_variables, self.kernel, jitter=default_jitter()) # K x M x M Kuf = covariances.Kuf(self.inducing_variables, self.kernel, Xnew) # K x M x N Knn = self.kernel.K(Xnew, full_output_cov=False)
def conditional_ND(self, X, full_cov=False): """ Computes q(f|m, S; X, Z) as defined in eq. 6-8 in the paper. Remember that inducing point prior means are set to 0. :param X: :param full_cov: :return: """ self.build_cholesky_if_needed() Kuf = covs.Kuf(self.feature, self.kern, X) # Compute the alpha term alpha = tf.linalg.triangular_solve(self.Lu, Kuf, lower=True) if not self.white: alpha = tf.linalg.triangular_solve(tf.transpose(self.Lu), alpha, lower=False) f_mean = tf.matmul(alpha, self.q_mu, transpose_a=True) f_mean = f_mean + self.mean_function(X) alpha_tiled = tf.tile(alpha[None, :, :], [self.num_outputs, 1, 1]) if self.white: f_cov = -tf.eye(self.num_inducing, dtype=gpflow.default_float())[None, :, :] else: f_cov = -self.Ku_tiled if self.q_sqrt is not None: S = tf.matmul(self.q_sqrt, self.q_sqrt, transpose_b=True) # Inducing points prior covariance f_cov += S f_cov = tf.matmul(f_cov, alpha_tiled) if full_cov: # Shape [num_latent, num_X, num_X] delta_cov = tf.matmul(alpha_tiled, f_cov, transpose_a=True) Kff = self.kern.K(X) else: # Shape [num_latent, num_X] delta_cov = tf.reduce_sum(alpha_tiled * f_cov, 1) Kff = self.kern.K_diag(X) # Shapes either [1, num_X] + [num_latent, num_X] or # [1, num_X, num_X] + [num_latent, num_X, num_X] f_cov = tf.expand_dims(Kff, 0) + delta_cov f_cov = tf.transpose(f_cov) return f_mean, f_cov
def _conditional_with_precompute(self, Xnew, full_cov, full_output_cov): if full_output_cov: raise NotImplementedError Kuf = cov.Kuf(self.inducing_variable, self.kernel, Xnew) # still a Tensor # construct the conditional mean fmean = tf.matmul(Kuf, self.alpha, transpose_a=True) num_func = tf.shape(self.alpha)[1] # K Qinv_Kuf = tf.matmul(self.Qinv, Kuf) # compute the covariance due to the conditioning if full_cov: fvar = self.kernel(Xnew) - tf.matmul(Kuf, Qinv_Kuf, transpose_a=True) else: KufT_Qinv_Kuf_diag = tf.reduce_sum(Kuf * Qinv_Kuf, axis=-2) fvar = self.kernel(Xnew, full_cov=False) - KufT_Qinv_Kuf_diag fvar = tf.transpose(fvar) return fmean, fvar
def custom_predict_f(self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False) -> MeanAndVariance: """ Compute the mean and variance of the latent function at some new points. Note that this is very similar to the SGPR prediction, for which there are notes in the SGPR notebook. Note: This model does not allow full output covariances. :param Xnew: points at which to predict """ if full_output_cov: raise NotImplementedError Y_data = self.data X_data_mean, X_data_var = self.encoder(Y_data) pX = DiagonalGaussian(X_data_mean, X_data_var) mu, cov = self.compute_qu() jitter = default_jitter() Kus = covariances.Kuf(self.inducing_variable, self.kernel, Xnew) L = tf.linalg.cholesky( covariances.Kuu(self.inducing_variable, self.kernel, jitter=jitter)) var = cov tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True) #L^{-1} K_{us} tmp2 = tf.linalg.triangular_solve(L, mu, lower=True) # L^{-1} m mean = tf.linalg.matmul( tmp1, tmp2, transpose_a=True ) #K_{su} L^{-T} L^{-1} m = K_{su} K_{uu}^{-1} m #ook return mean + self.mean_function(Xnew), var
def conditional_vff(Xnew, inducing_variable, kernel, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False): """ - Xnew are the points of the data or minibatch, size N x D (tf.array, 2d) - feat is an instance of features.InducingFeature that provides `Kuu` and `Kuf` methods for Fourier features, this contains the limits of the bounding box and the frequencies - f is the value (or mean value) of the features (i.e. the weights) - q_sqrt (default None) is the Cholesky factor of the uncertainty about f (to be propagated through the conditional as per the GPflow inducing-point implementation) - white (defaults False) specifies whether the whitening has been applied Given the GP represented by the inducing points specified in `feat`, produce the mean and (co-)variance of the GP at the points Xnew. Xnew :: N x D Kuu :: M x M Kuf :: M x N f :: M x K, K = 1 q_sqrt :: K x M x M, with K = 1 """ if full_output_cov: raise NotImplementedError # num_data = tf.shape(Xnew)[0] # M num_func = tf.shape(f)[1] # K Kuu = cov.Kuu(inducing_variable, kernel) # this is now a LinearOperator Kuf = cov.Kuf(inducing_variable, kernel, Xnew) # still a Tensor KuuInv_Kuf = Kuu.solve(Kuf) # compute the covariance due to the conditioning if full_cov: fvar = kernel(Xnew) - tf.matmul(Kuf, KuuInv_Kuf, transpose_a=True) shape = (num_func, 1, 1) else: KufT_KuuInv_Kuf_diag = tf.reduce_sum(Kuf * KuuInv_Kuf, axis=-2) fvar = kernel(Xnew, full=False) - KufT_KuuInv_Kuf_diag shape = (num_func, 1) fvar = tf.expand_dims(fvar, 0) * tf.ones( shape, dtype=gpflow.default_float()) # K x N x N or K x N # another backsubstitution in the unwhitened case if white: raise NotImplementedError A = KuuInv_Kuf # construct the conditional mean fmean = tf.matmul(A, f, transpose_a=True) if q_sqrt is not None: if q_sqrt.get_shape().ndims == 2: # LTA = A * tf.expand_dims(q_sqrt, 2) # K x M x N # won't work # make ticket for this? raise NotImplementedError elif q_sqrt.get_shape().ndims == 3: # L = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0) # K x M x M # K x M x N # A_tiled = tf.expand_dims(A.get(), 0) * tf.ones((num_func, 1, 1), dtype=float_type) # LTA = tf.matmul(L, A_tiled, transpose_a=True) # K x M x N # TODO the following won't work for K > 1 assert q_sqrt.shape[0] == 1 # LTA = (A.T @ DenseMatrix(q_sqrt[:,:,0])).T.get()[None, :, :] ATL = tf.matmul(A, q_sqrt, transpose_a=True) else: raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.get_shape().ndims)) if full_cov: # fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True) # K x N x N fvar = fvar + tf.matmul(ATL, ATL, transpose_b=True) # K x N x N else: # fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # K x N fvar = fvar + tf.reduce_sum(tf.square(ATL), 2) # K x N fvar = tf.transpose(fvar) # N x K or N x N x K return fmean, fvar
def _conditional_fused(self, Xnew, full_cov, full_output_cov): """ Xnew is a tensor with the points of the data or minibatch, shape N x D """ if full_output_cov: raise NotImplementedError f = self._q_dist.q_mu q_sqrt = self._q_dist.q_sqrt # num_data = tf.shape(Xnew)[0] # M num_func = tf.shape(f)[1] # K Kuu = cov.Kuu(self.X_data, self.kernel) # this is now a LinearOperator Kuf = cov.Kuf(self.X_data, self.kernel, Xnew) # still a Tensor KuuInv_Kuf = Kuu.solve(Kuf) # compute the covariance due to the conditioning if full_cov: fvar = self.kernel(Xnew) - tf.matmul( Kuf, KuuInv_Kuf, transpose_a=True) shape = (num_func, 1, 1) else: KufT_KuuInv_Kuf_diag = tf.reduce_sum(Kuf * KuuInv_Kuf, axis=-2) fvar = self.kernel(Xnew, full_cov=False) - KufT_KuuInv_Kuf_diag shape = (num_func, 1) fvar = tf.expand_dims(fvar, 0) * tf.ones( shape, dtype=gpflow.default_float()) # K x N x N or K x N if self.whiten: raise NotImplementedError A = KuuInv_Kuf # construct the conditional mean fmean = tf.matmul(A, f, transpose_a=True) if q_sqrt is not None: if q_sqrt.get_shape().ndims == 2: # LTA = A * tf.expand_dims(q_sqrt, 2) # K x M x N # won't work # make ticket for this? raise NotImplementedError elif q_sqrt.get_shape().ndims == 3: # L = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0) # K x M x M # K x M x N # A_tiled = tf.expand_dims(A.get(), 0) * tf.ones((num_func, 1, 1), dtype=float_type) # LTA = tf.matmul(L, A_tiled, transpose_a=True) # K x M x N # TODO the following won't work for K > 1 assert q_sqrt.shape[0] == 1 # LTA = (A.T @ DenseMatrix(q_sqrt[:,:,0])).T.get()[None, :, :] ATL = tf.matmul(A, q_sqrt, transpose_a=True) else: raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.get_shape().ndims)) if full_cov: # fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True) # K x N x N fvar = fvar + tf.matmul(ATL, ATL, transpose_b=True) # K x N x N else: # fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # K x N fvar = fvar + tf.reduce_sum(tf.square(ATL), 2) # K x N fvar = tf.transpose(fvar) # N x K or N x N x K return fmean, fvar
def approx_conditional_ldf( Xnew, inducing_variable, kernel, f, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=True, ): """ - Xnew are the points of the data or minibatch, size N x D (tf.array, 2d) - inducing_variable is an instance of inducing_variables.InducingVariable that provides `Kuu` and `Kuf` methods for Laplacian Dirichlet features, this contains the limits of the bounding box and the frequencies - remainder_variable is another instance of inducing_variables.InducingVariable that specifies the high frequency components not selected in inducing_variable. - f is the value (or mean value) of the features (i.e. the weights) - q_sqrt (default None) is the Cholesky factor of the uncertainty about f (to be propagated through the conditional as per the GPflow inducing-point implementation) - white (defaults False) specifies whether the whitening has been applied. LDF works a lot better, when using vanilla gradients, if whitening has been applied, so it's the default option. Given the GP represented by the inducing points specified in `inducing_variable`, produce the mean and (co-)variance of the GP at the points Xnew. Xnew :: N x D Kuu :: M x M Kuf :: M x N f :: M x K, K = 1 q_sqrt :: K x M x M, with K = 1 """ if full_output_cov: raise NotImplementedError # num_data = tf.shape(Xnew)[0] # M num_func = tf.shape(f)[1] # K Λ = cov.Kuu(inducing_variable, kernel) # this is now a LinearOperator Φ = cov.Kuf(inducing_variable, kernel, Xnew) # still a Tensor Λr = cov.Kuu(inducing_variable.remainder, kernel) Φr = cov.Kuf(inducing_variable.remainder, kernel, Xnew) # compute the covariance due to the conditioning if full_cov: fvar = tf.matmul(Φr, Λr.solve(Φr), transpose_a=True) shape = (num_func, 1, 1) else: fvar = tf.reduce_sum(Φr * Λr.solve(Φr), -2) shape = (num_func, 1) fvar = tf.expand_dims(fvar, 0) * tf.ones( shape, dtype=gpflow.default_float()) # K x N x N or K x N # another backsubstitution in the unwhitened case if white: A = Λ.cholesky().solve(Φ) else: A = Λ.solve(Φ) # construct the conditional mean fmean = tf.matmul(A, f, transpose_a=True) if q_sqrt is not None: if q_sqrt.shape.ndims == 2: # case for q_diag = True LTA = Diag(q_sqrt) @ A # K x M x N elif q_sqrt.shape.ndims == 3: LTA = tf.matmul(q_sqrt, A, transpose_a=True) else: raise ValueError("Bad dimension for q_sqrt: %s" % str(q_sqrt.get_shape().ndims)) if full_cov: fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True) # K x N x N else: fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # K x N fvar = tf.transpose(fvar) # N x K or N x N x K return fmean, fvar