def testUnivariateConditionals(self): with self.test_context() as sess: for is_diagonal in [True, False]: for is_whitened in [True, False]: m = self.get_model(is_diagonal, is_whitened) with gpflow.params_as_tensors_for(m): if is_whitened: fmean_func, fvar_func = gpflow.conditionals.conditional( self.X, self.Z, m.kern, m.q_mu, q_sqrt=m.q_sqrt) else: fmean_func, fvar_func = gpflow.conditionals.conditional( self.X, self.Z, m.kern, m.q_mu, q_sqrt=m.q_sqrt, white=True) mean_value = fmean_func.eval(session=sess)[0, 0] var_value = fvar_func.eval(session=sess)[0, 0] assert_allclose(mean_value - self.posteriorMean, 0, atol=4) assert_allclose(var_value - self.posteriorVariance, 0, atol=4)
def __call__(self, model): chosen = np.random.choice(np.arange(len(self.X_test)), size=1) conv_layer = model.layers[0] sess = model.enquire_session() with gpflow.params_as_tensors_for(conv_layer): samples, Fmeans, Fvars = conv_layer.sample_from_conditional( tf.tile(self.input_image[None], [4, 1, 1]), full_cov=False) samples, Fmeans, Fvars = sess.run( [samples, Fmeans, Fvars], {self.input_image: self.X_test[chosen]}) sample_image = self._plot_samples(samples[:, 0, :], conv_layer) mean_image = self._plot_mean(Fmeans[:, 0, :], conv_layer) variance_image = self._plot_variance(Fvars[:, 0, :], conv_layer) sample_image, mean_image, variance_image = sess.run( [sample_image, mean_image, variance_image]) self.plt.close('all') return sess.run( self.summary, { self.tf_sample_image: sample_image, self.tf_mean_image: mean_image, self.tf_variance_image: variance_image })
def KL(self): """ The KL divergence from the variational distribution to the prior :return: KL divergence from N(q_mu, q_sqrt) to N(0, I), independently for each GP """ self.build_cholesky_if_needed() KL = -0.5 * self.num_inducing * self.num_nodes * self.dim_per_out for nd in range(self.num_nodes): q_sqrt_nd = self.q_sqrt_lst[nd] with params_as_tensors_for(q_sqrt_nd, convert=True): KL -= 0.5 * tf.reduce_sum( tf.log(tf.matrix_diag_part(q_sqrt_nd)**2)) KL += tf.reduce_sum(tf.log(tf.matrix_diag_part( self.Lu[nd]))) * self.dim_per_out KL += 0.5 * tf.reduce_sum( tf.square( tf.matrix_triangular_solve( self.Lu_tiled_lst[nd], q_sqrt_nd, lower=True))) q_mu_nd = self.q_mu[:, nd * self.dim_per_out:(nd + 1) * self.dim_per_out] Kinv_m_nd = tf.cholesky_solve(self.Lu[nd], q_mu_nd) KL += 0.5 * tf.reduce_sum(q_mu_nd * Kinv_m_nd) return KL
def test_feature_len(self): with self.test_context(): N, D = 17, 3 Z = np.random.randn(N, D) f = gpflow.features.InducingPoints(Z) self.assertTrue(len(f), N) with gpflow.params_as_tensors_for(f): self.assertTrue(len(f), N)
def test_feature_len(self): with self.test_context(): N, D = 17, 3 Z = np.random.randn(N, D) f = gpflow.features.InducingPoints(Z) self.assertTrue(len(f), N) with gpflow.params_as_tensors_for(f): self.assertTrue(len(f), N)
def __init__(self, kern, num_outputs, mean_function, Z=None, feature=None, white=False, input_prop_dim=None, q_mu=None, q_sqrt=None, **kwargs): """ A sparse variational GP layer in whitened representation. This layer holds the kernel, variational parameters, inducing points and mean function. The underlying model at inputs X is f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X) The variational distribution over the inducing points is q(v) = N(q_mu, q_sqrt q_sqrt^T) The layer holds D_out independent GPs with the same kernel and inducing points. :param kern: The kernel for the layer (input_dim = D_in) :param Z: Inducing points (M, D_in) :param num_outputs: The number of GP outputs (q_mu is shape (M, num_outputs)) :param mean_function: The mean function :return: """ Layer.__init__(self, input_prop_dim, **kwargs) if feature is None: feature = InducingPoints(Z) self.num_inducing = len(feature) self.feature = feature self.kern = kern self.mean_function = mean_function self.num_outputs = num_outputs self.white = white if q_mu is None: q_mu = np.zeros((self.num_inducing, num_outputs), dtype=settings.float_type) self.q_mu = Parameter(q_mu) if q_sqrt is None: if not self.white: # initialize to prior with gpflow.params_as_tensors_for(feature): Ku = conditionals.Kuu(feature, self.kern, jitter=settings.jitter) Lu = tf.linalg.cholesky(Ku) Lu = self.enquire_session().run(Lu) q_sqrt = np.tile(Lu[None, :, :], [num_outputs, 1, 1]) else: q_sqrt = np.tile(np.eye(self.num_inducing, dtype=settings.float_type)[None, :, :], [num_outputs, 1, 1]) transform = transforms.LowerTriangular(self.num_inducing, num_matrices=num_outputs) self.q_sqrt = Parameter(q_sqrt, transform=transform) self.needs_build_cholesky = True
def testUnivariateConditionals(self): with self.test_context() as sess: for is_diagonal in [True, False]: for is_whitened in [True, False]: m = self.get_model(is_diagonal, is_whitened) with gpflow.params_as_tensors_for(m): if is_whitened: fmean_func, fvar_func = gpflow.conditionals.conditional( self.X, self.Z, m.kern, m.q_mu, q_sqrt=m.q_sqrt) else: fmean_func, fvar_func = gpflow.conditionals.conditional( self.X, self.Z, m.kern, m.q_mu, q_sqrt=m.q_sqrt, white=True) mean_value = fmean_func.eval(session=sess)[0, 0] var_value = fvar_func.eval(session=sess)[0, 0] assert_allclose(mean_value - self.posteriorMean, 0, atol=4) assert_allclose(var_value - self.posteriorVariance, 0, atol=4)
def run(self, context: MonitorContext, *args, **kwargs) -> None: with params_as_tensors_for(self._model): tf_x, tf_y = self._model.X, self._model.Y lml = 0.0 num_batches = int(math.ceil(len(self._model.X._value) / self._minibatch_size)) # round up for mb in self.wrapper(range(num_batches)): start = mb * self._minibatch_size finish = (mb + 1) * self._minibatch_size x_mb = self._model.X._value[start:finish, :] y_mb = self._model.Y._value[start:finish, :] mb_lml = self._model.compute_log_likelihood(feed_dict={tf_x: x_mb, tf_y: y_mb}) lml += mb_lml * len(x_mb) lml = lml / len(self._model.X._value) self._eval_summary(context, {self._full_lml: lml})
def run(self, context: MonitorContext, *args, **kwargs) -> None: with params_as_tensors_for(self._model): tf_x, tf_y = self._model.X, self._model.Y lml = 0.0 num_batches = int(math.ceil(len(self._model.X._value) / self._minibatch_size)) # round up for mb in self.wrapper(range(num_batches)): start = mb * self._minibatch_size finish = (mb + 1) * self._minibatch_size x_mb = self._model.X._value[start:finish, :] y_mb = self._model.Y._value[start:finish, :] mb_lml = self._model.compute_log_likelihood(feed_dict={tf_x: x_mb, tf_y: y_mb}) lml += mb_lml * len(x_mb) lml = lml / len(self._model.X._value) self._eval_summary(context, {self._full_lml: lml})
def conditional_ND_not_share_Z(self, X, full_cov=False): mean_lst, var_lst, A_tiled_lst = [], [], [] for nd in range(self.num_nodes): pa_nd = self.pa_idx(nd) X_tmp = tf.gather(X, pa_nd, axis=1) Kuf_nd = self.feature[nd].Kuf(self.kern[nd], X_tmp) A_nd = tf.matrix_triangular_solve(self.Lu[nd], Kuf_nd, lower=True) A_nd = tf.matrix_triangular_solve(tf.transpose(self.Lu[nd]), A_nd, lower=False) mean_tmp = tf.matmul(A_nd, self.q_mu[:, nd * self.dim_per_out:(nd + 1) * self.dim_per_out], transpose_a=True) if self.nb_init: mean_tmp += self.mean_function[nd](X_tmp) else: mean_tmp += self.mean_function[nd]( X[:, nd * self.dim_per_in:(nd + 1) * self.dim_per_in]) mean_lst.append(mean_tmp) A_tiled_lst.append( tf.tile(A_nd[None, :, :], [self.dim_per_out, 1, 1])) SK_nd = -self.Ku_tiled_lst[nd] q_sqrt_nd = self.q_sqrt_lst[nd] with params_as_tensors_for(q_sqrt_nd, convert=True): SK_nd += tf.matmul(q_sqrt_nd, q_sqrt_nd, transpose_b=True) B_nd = tf.matmul(SK_nd, A_tiled_lst[nd]) # (num_latent, num_X) delta_cov_nd = tf.reduce_sum(A_tiled_lst[nd] * B_nd, 1) Kff_nd = self.kern[nd].Kdiag(X_tmp) # (1, num_X) + (num_latent, num_X) var_nd = tf.expand_dims(Kff_nd, 0) + delta_cov_nd var_nd = tf.transpose(var_nd) var_lst.append(var_nd) mean = tf.concat(mean_lst, axis=1) var = tf.concat(var_lst, axis=1) return mean, var
def full_lml(model, batch_size): with params_as_tensors_for(model): tf_x, tf_y, tf_w = model.X, model.Y, model.weight_idx lml = 0.0 num_batches = int(math.ceil(len(model.X._value) / batch_size)) for mb in range(num_batches): start = mb * batch_size finish = (mb + 1) * batch_size x_mb = model.X._value[start:finish, :] y_mb = model.Y._value[start:finish, :] w_mb = model.weight_idx._value[start:finish] mb_lml = model.compute_log_likelihood( feed_dict={tf_x: x_mb, tf_y: y_mb, tf_w: w_mb}) lml += mb_lml * len(x_mb) lml = lml / model.X._value.size return lml
def multisample_sample_conditional(Xnew: tf.Tensor, feat: InducingPoints, kern: Kernel, f: tf.Tensor, *, full_cov=False, full_output_cov=False, q_sqrt=None, white=False): if isinstance(kern, SharedMixedMok) and isinstance(feat, MixedKernelSharedMof): if Xnew.get_shape().ndims == 3: sample, gmean, gvar = independent_multisample_sample_conditional(Xnew, feat.feat, kern.kernel, f, white=white, q_sqrt=q_sqrt, full_output_cov=False, full_cov=False) # N x L, N x L o = tf.ones(([tf.shape(Xnew)[0], 1, 1]), dtype=settings.float_type) else: sample, gmean, gvar = sample_conditional(Xnew, feat.feat, kern.kernel, f, white=white, q_sqrt=q_sqrt, full_output_cov=False, full_cov=False) # N x L, N x L o = 1. with params_as_tensors_for(kern): f_sample = tf.matmul(sample, o * kern.W, transpose_b=True) f_mu = tf.matmul(gmean, o * kern.W, transpose_b=True) f_var = tf.matmul(gvar, o * kern.W ** 2, transpose_b=True) return f_sample, f_mu, f_var else: assert not isinstance(kern, Mok) if Xnew.get_shape().ndims == 3: return independent_multisample_sample_conditional(Xnew, feat, kern, f, full_cov=full_cov, full_output_cov=full_output_cov, q_sqrt=q_sqrt, white=white) else: return sample_conditional(Xnew, feat, kern, f, full_cov=full_cov, full_output_cov=full_output_cov, q_sqrt=q_sqrt, white=white)
def test_factorized_transition_KLs(self): def KL_sampled_mu_and_Q_diag_P(mu_diff, Q_chol, P_chol): """ :param mu_diff: NxSxD :param Q_chol: NxSxD :param P_chol: D :return: N """ D = tf.shape(mu_diff)[-1] assert mu_diff.shape.ndims is not None assert Q_chol.shape.ndims is not None assert P_chol.shape.ndims is not None mahalanobis = mu_diff / P_chol mahalanobis = tf.reduce_sum(tf.square(mahalanobis), -1) mahalanobis = tf.reduce_mean(mahalanobis, -1) trace = Q_chol / P_chol trace = tf.reduce_sum(tf.square(trace), -1) trace = tf.reduce_mean(trace, -1) constant = tf.cast(D, dtype=mu_diff.dtype) log_det_P = 2. * tf.reduce_sum(tf.log(tf.abs(P_chol))) log_det_Q = 2. * tf.reduce_mean( tf.reduce_sum(tf.log(tf.abs(Q_chol)), -1), -1) double_KL = trace + mahalanobis - constant + log_det_P - log_det_Q return 0.5 * double_KL with self.test_context() as sess: m = self.prepare() with gp.params_as_tensors_for(m): _, f_mus, f_vars, xcov_chols = sess.run( m._build_linear_time_q_sample(return_f_moments=True, return_x_cov_chols=True, sample_f=False, sample_u=False)) gpssm_KLs = sess.run( m._build_transition_KLs(tf.constant(f_mus), tf.constant(f_vars))) diff_term = tf.reduce_sum( tf.reduce_mean(tf.constant(f_vars), -2) * m.As / tf.square(m.Q_sqrt), -1) diff_term += tf.reduce_sum(tf.log(tf.abs(m.S_chols)), 1) diff_term -= tf.reduce_sum( tf.reduce_mean(tf.log(tf.abs(tf.constant(xcov_chols))), -2), -1) gpssm_KLs += sess.run(diff_term) gpssm_factorized_KLs = sess.run( m._build_factorized_transition_KLs( tf.constant(f_mus), tf.constant(f_vars), tf.constant(xcov_chols))) assert_allclose(gpssm_KLs, gpssm_factorized_KLs) gpssm_factorized_KLs_2 = sess.run( KL_sampled_mu_and_Q_diag_P( m.As[:, None, :] * f_mus + m.bs[:, None, :] - f_mus, tf.constant(xcov_chols), m.Q_sqrt)) gpssm_factorized_KLs_2 += 0.5 * np.mean( np.sum(f_vars / np.square(sess.run(m.Q_sqrt)), -1), -1) assert_allclose(gpssm_factorized_KLs, gpssm_factorized_KLs_2)
def Kuf(feat, kern, Xnew): with gpflow.params_as_tensors_for(feat): return kern.K(feat.Z, kern.f(Xnew))