Example #1
0
 def _elbo_data_term(self, events, Kuu=None):
     mean, var = self.predict_f(events, full_cov=False, Kuu=Kuu)
     expect_log_fn_sqr = integrate_log_fn_sqr(mean, var)
     if self.num_events is None:
         scale = 1.0
     else:
         minibatch_size = tf.shape(events)[0]
         scale = to_default_float(
             self.num_events) / to_default_float(minibatch_size)
     return scale * tf.reduce_sum(expect_log_fn_sqr)
Example #2
0
def build_model(data):

    kernel = gpflow.kernels.Matern52(lengthscales=0.3)

    meanf = gpflow.mean_functions.Linear(1.0, 0.0)
    model = gpflow.models.GPR(data, kernel, meanf, noise_variance=0.01)

    for p in model.parameters:
        p.prior = Gamma(to_default_float(1.0), to_default_float(1.0))

    return model
def optimised_background_model(X, Y):

    k = gpflow.kernels.SquaredExponential()
    m = gpflow.models.GPR(data=(X, Y), kernel=k, mean_function=None)
    m.kernel.lengthscales = gpflow.Parameter(
        to_default_float(7.1),
        transform=tfp.bijectors.Softplus(low=to_default_float(7.)))
    opt = gpflow.optimizers.Scipy()
    opt_logs = opt.minimize(m.training_loss,
                            m.trainable_variables,
                            options=dict(maxiter=100))

    return m
Example #4
0
    def __init__(self, with_transform):
        super().__init__()

        prior = tfp.distributions.Normal(to_default_float(1.0), to_default_float(1.0))

        scale = np.exp(self.log_scale)
        if with_transform:
            transform = tfp.bijectors.Shift(to_default_float(0.0))(
                tfp.bijectors.Scale(to_default_float(scale))
            )
        else:
            transform = None

        self.theta = gpflow.Parameter(self.value, prior=prior, transform=transform)
Example #5
0
def Kuu_matern32_fourierfeatures1d(inducing_variable, kernel, jitter=None):
    a, b, ms = (lambda u: (u.a, u.b, u.ms))(inducing_variable)
    omegas = 2.0 * np.pi * ms / (b - a)

    # Cosine block: eq. (114)
    lamb = np.sqrt(3.0) / kernel.lengthscales
    four_or_eight = to_default_float(tf.where(omegas == 0, 4.0, 8.0))
    d_cos = (
        (b - a)
        * tf.square(tf.square(lamb) + tf.square(omegas))
        / tf.pow(lamb, 3)
        / kernel.variance
        / four_or_eight
    )
    v_cos = tf.ones_like(d_cos) / tf.sqrt(kernel.variance)
    cosine_block = LowRank(Diag(d_cos, is_positive_definite=True), v_cos[:, None])

    # Sine block: eq. (115)
    omegas = omegas[tf.not_equal(omegas, 0)]  # don't compute omega=0
    d_sin = (
        (b - a)
        * tf.square(tf.square(lamb) + tf.square(omegas))
        / tf.pow(lamb, 3)
        / kernel.variance
        / 8.0
    )
    v_sin = omegas / lamb / tf.sqrt(kernel.variance)
    sine_block = LowRank(Diag(d_sin, is_positive_definite=True), v_sin[:, None])

    return BlockDiag([cosine_block, sine_block])  # eq. (116)
 def _elbo_data_term(self,
                     events,
                     Kuu=None):  # E_q [log f_n], log f_n^2 的期望
     #print('len of events',len(events))
     mean, var = self.predict_f(events, full_cov=False, Kuu=Kuu)
     expect_log_fn_sqr = integrate_log_fn_sqr(mean, var)
     if self.num_events is None:
         scale = 1.0
     else:
         minibatch_size = tf.shape(events)[0]
         #tf.print('num_events is', self.num_events)
         #tf.print('minibatch_size is',minibatch_size)
         scale = to_default_float(
             self.num_events) / to_default_float(minibatch_size)
     return scale * tf.reduce_sum(
         expect_log_fn_sqr)  #计算张量沿着某一维度的和,默认计算所有元素的和。
Example #7
0
def test_sgpr_qu():
    rng = Datum().rng
    X = to_default_float(rng.randn(100, 2))
    Z = to_default_float(rng.randn(20, 2))
    Y = to_default_float(np.sin(X @ np.array([[-1.4], [0.5]])) + 0.5 * rng.randn(len(X), 1))
    model = gpflow.models.SGPR(
        (X, Y), kernel=gpflow.kernels.SquaredExponential(), inducing_variable=Z
    )

    gpflow.optimizers.Scipy().minimize(model.training_loss, variables=model.trainable_variables)

    qu_mean, qu_cov = model.compute_qu()
    f_at_Z_mean, f_at_Z_cov = model.predict_f(model.inducing_variable.Z, full_cov=True)

    np.testing.assert_allclose(qu_mean, f_at_Z_mean, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(tf.reshape(qu_cov, (1, 20, 20)), f_at_Z_cov, rtol=1e-5, atol=1e-5)
Example #8
0
    def upper_bound(self) -> tf.Tensor:
        """
        Upper bound for the sparse GP regression marginal likelihood.  Note that
        the same inducing points are used for calculating the upper bound, as are
        used for computing the likelihood approximation. This may not lead to the
        best upper bound. The upper bound can be tightened by optimising Z, just
        like the lower bound. This is especially important in FITC, as FITC is
        known to produce poor inducing point locations. An optimisable upper bound
        can be found in https://github.com/markvdw/gp_upper.

        The key reference is

        ::

          @misc{titsias_2014,
            title={Variational Inference for Gaussian and Determinantal Point Processes},
            url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf},
            publisher={Workshop on Advances in Variational Inference (NIPS 2014)},
            author={Titsias, Michalis K.},
            year={2014},
            month={Dec}
          }

        The key quantity, the trace term, can be computed via

        >>> _, v = conditionals.conditional(X, model.inducing_variable.Z, model.kernel,
        ...                                 np.zeros((len(model.inducing_variable), 1)))

        which computes each individual element of the trace term.
        """
        X_data, Y_data = self.data
        num_data = to_default_float(tf.shape(Y_data)[0])

        Kdiag = self.kernel(X_data, full_cov=False)
        kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
        kuf = Kuf(self.inducing_variable, self.kernel, X_data)

        I = tf.eye(tf.shape(kuu)[0], dtype=default_float())

        L = tf.linalg.cholesky(kuu)
        A = tf.linalg.triangular_solve(L, kuf, lower=True)
        AAT = tf.linalg.matmul(A, A, transpose_b=True)
        B = I + AAT / self.likelihood.variance
        LB = tf.linalg.cholesky(B)

        # Using the Trace bound, from Titsias' presentation
        c = tf.maximum(tf.reduce_sum(Kdiag) - tf.reduce_sum(tf.square(A)), 0)

        # Alternative bound on max eigenval:
        corrected_noise = self.likelihood.variance + c

        const = -0.5 * num_data * tf.math.log(2 * np.pi * self.likelihood.variance)
        logdet = -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB)))

        LC = tf.linalg.cholesky(I + AAT / corrected_noise)
        v = tf.linalg.triangular_solve(LC, tf.linalg.matmul(A, Y_data) / corrected_noise, lower=True)
        quad = -0.5 * tf.reduce_sum(tf.square(Y_data)) / corrected_noise + 0.5 * tf.reduce_sum(tf.square(v))

        return const + logdet + quad
Example #9
0
    def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor:
        r"""
        Computes the log marginal likelihood, with some slack caused by the
        jitter. Adding the jitter ensures numerical stability.

        .. math::
            \log p(Y | \theta).

        """
        X, Y = self.data
        num_data = X.shape[0]
        output_dim = tf.shape(Y)[1]

        K = self.kernel(X)
        k_diag = tf.linalg.diag_part(K)
        noiseK_L, L = tf.cond(
            self.likelihood.variance > self.jitter_variance,
            lambda: (
                tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.likelihood.variance)),
                tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.jitter_variance)),
            ),
            lambda: (tf.linalg.cholesky(tf.linalg.set_diag(K, k_diag + self.jitter_variance)),) * 2,
        )

        err = Y - self.mean_function(X)
        sigma = tf.sqrt(self.likelihood.variance)

        # Compute intermediate matrices
        A = tf.linalg.triangular_solve(L, K, lower=True) / sigma

        AAT = tf.linalg.matmul(A, A, transpose_b=True)
        B = tf.linalg.set_diag(AAT, tf.linalg.diag_part(AAT) + 1)  # B = AAT + tf.eye(num_data, dtype=default_float())
        # B = AAT + tf.eye(num_data, dtype=default_float())
        LB = tf.linalg.cholesky(B)
        Aerr = tf.linalg.matmul(A, err)
        c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma

        # compute log marginal bound
        bound = -0.5 * to_default_float(num_data) * to_default_float(output_dim) * np.log(2 * np.pi)
        bound -= to_default_float(output_dim) * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(noiseK_L)))
        bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.square(c))

        return bound
def gauss_kl_vff(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    q_mu is a vector [N, 1] that contains the mean.
    q_sqrt is a matrix that is the lower triangular square-root matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    NOTE: K is a LinearOperator that provides efficient methjods
        for solve(), log_abs_determinant(), and trace()
    """
    # KL(N₀ || N₁) = ½ [tr(Σ₁⁻¹ Σ₀) + (μ₁ - μ₀)ᵀ Σ₁⁻¹ (μ₁ - μ₀) - k + ln(det(Σ₁)/det(Σ₀))]
    # N₀ = q; μ₀ = q_mu, Σ₀ = q_sqrt q_sqrtᵀ
    # N₁ = p; μ₁ = 0, Σ₁ = K
    # KL(q || p) =
    #     ½ [tr(K⁻¹ q_sqrt q_sqrtᵀA + q_muᵀ K⁻¹ q_mu - k + logdet(K) - logdet(q_sqrt q_sqrtᵀ)]
    # k = number of dimensions, if q_sqrt is m x m this is m²
    Kinv_q_mu = K.solve(q_mu)

    mahalanobis_term = tf.squeeze(tf.matmul(q_mu, Kinv_q_mu, transpose_a=True))

    # GPflow: q_sqrt is num_latent_gps x N x N
    num_latent_gps = to_default_float(tf.shape(q_mu)[1])
    logdet_prior = num_latent_gps * K.log_abs_determinant()

    product_of_dimensions__int = tf.reduce_prod(
        tf.shape(q_sqrt)[:-1])  # dimensions are integers
    constant_term = to_default_float(product_of_dimensions__int)

    Lq = tf.linalg.band_part(q_sqrt, -1, 0)  # force lower triangle
    logdet_q = tf.reduce_sum(tf.math.log(tf.square(tf.linalg.diag_part(Lq))))

    # S = tf.matmul(q_sqrt, q_sqrt, transpose_b=True)
    # trace_term = tf.trace(K.solve(S))
    trace_term = tf.squeeze(tf.reduce_sum(Lq * K.solve(Lq),
                                          axis=[-1, -2
                                                ]))  # [O(N²) instead of O(N³)

    twoKL = trace_term + mahalanobis_term - constant_term + logdet_prior - logdet_q
    return 0.5 * twoKL
Example #11
0
    def kl_div_x(self, X_data_mean, X_data_var) -> tf.Tensor:
        # KL[q(x) || p(x)]
        dX_data_var = (X_data_var if X_data_var.shape.ndims == 2 else
                       tf.linalg.diag_part(X_data_var))

        nq = to_default_float(tf.size(X_data_mean))
        kl_div = -0.5 * tf.reduce_sum(tf.math.log(dX_data_var))
        kl_div -= 0.5 * nq
        kl_div += 0.5 * tf.reduce_sum(tf.square(X_data_mean) + dX_data_var)

        return kl_div
Example #12
0
    def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor:
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook.
        """
        X_data, Y_data = self.data

        num_inducing = len(self.inducing_variable)
        num_data = to_default_float(tf.shape(Y_data)[0])
        output_dim = to_default_float(tf.shape(Y_data)[1])

        err = Y_data - self.mean_function(X_data)
        Kdiag = self.kernel(X_data, full_cov=False)
        kuf = Kuf(self.inducing_variable, self.kernel, X_data)
        kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
        L = tf.linalg.cholesky(kuu)
        sigma = tf.sqrt(self.likelihood.variance)

        # Compute intermediate matrices
        A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma
        AAT = tf.linalg.matmul(A, A, transpose_b=True)
        B = AAT + tf.eye(num_inducing, dtype=default_float())
        LB = tf.linalg.cholesky(B)
        Aerr = tf.linalg.matmul(A, err)
        c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma
        trace_term = 0.5 * output_dim * tf.reduce_sum(Kdiag) / self.likelihood.variance
        trace_term -= 0.5 * output_dim * tf.reduce_sum(tf.linalg.diag_part(AAT))

        # tr(Kff - Qff) should be positive, numerical issues can arise here
        assert trace_term > 0.0, f"Trace term negative, should be positive ({trace_term:.4e})."

        # compute log marginal bound
        bound = -0.5 * num_data * output_dim * np.log(2 * np.pi)
        bound += tf.negative(output_dim) * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB)))
        bound -= 0.5 * num_data * output_dim * tf.math.log(self.likelihood.variance)
        bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound -= trace_term

        return bound
def detrend_cell(X, Y, detrend_lengthscale):

    k_trend = gpflow.kernels.SquaredExponential()
    m = gpflow.models.GPR(data=(X, Y), kernel=k_trend, mean_function=None)

    m.kernel.lengthscales = gpflow.Parameter(
        to_default_float(detrend_lengthscale + 0.1),
        transform=tfp.bijectors.Softplus(
            low=to_default_float(detrend_lengthscale)))

    opt = gpflow.optimizers.Scipy()
    opt_logs = opt.minimize(m.training_loss,
                            m.trainable_variables,
                            options=dict(maxiter=100))

    mean, var = m.predict_f(X)

    Y_detrended = Y - mean
    Y_detrended = Y_detrended - np.mean(Y_detrended)

    return k_trend, mean, var, Y_detrended
Example #14
0
 def create_models(self, data):
     self.models = []
     for i in range(self.num_outputs):
         kern = gpflow.kernels.SquaredExponential(lengthscales=tf.ones([data[0].shape[1],], dtype=gpflow.config.default_float()))
         kern.lengthscales.prior = tfd.Gamma(to_default_float(1.1), to_default_float(1/10.0)) # priors have to be included before
         kern.variance.prior = tfd.Gamma(to_default_float(1.5), to_default_float(1/2.0))    # before the model gets compiled
         self.models.append(gpflow.models.GPR((data[0], data[1][:, i:i+1]), kernel=kern))
         self.models[-1].likelihood.prior = tfd.Gamma(to_default_float(1.2), to_default_float(1/0.05))
Example #15
0
 def _create_kernel(self):
     """Creates a kernel from list of strings stored in _kernel_split."""
     k = None
     for i, prod_kern in enumerate(self.kernel_split):
         sub_k = None
         for j, kern in enumerate(prod_kern):
             new_k = getattr(gpflow.kernels,
                             kern)(**self.kernel_params[i + j])
             if hasattr(new_k, 'lengthscales') and self.length_scale_prior:
                 new_k.lengthscales.prior = tfp.distributions.InverseGamma(
                     to_default_float(1), to_default_float(1))
             if j == 0:
                 sub_k = new_k
                 if self.variance_prior:
                     new_k.variance.prior = tfp.distributions.Gamma(
                         to_default_float(1), to_default_float(1))
             else:
                 set_trainable(new_k.variance, False)
                 sub_k *= new_k
         if i == 0:
             k = sub_k
         else:
             k += sub_k
     return k
Example #16
0
 def kl_mvn(self, X_mean, X_var, X_prior_mean, X_prior_var):
     dX_var = (
         X_var
         if X_var.shape.ndims == 2
         else tf.transpose(tf.linalg.diag_part(X_var))
     )
     NQ = to_default_float(tf.size(X_mean))
     # log of determinant of diagonal matrix = log of product of entries = sum of logs of entries 
     KL = -0.5 * tf.reduce_sum(tf.math.log(dX_var))
     KL += 0.5 * tf.reduce_sum(tf.math.log(X_prior_var))
     KL -= 0.5 * NQ
     # KL is additive for independent distribution (sums over N)
     # trace sums over Q (see https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence#Multivariate_normal_distributions)
     KL += 0.5 * tf.reduce_sum(
         (tf.square(X_mean - X_prior_mean) + dX_var) / X_prior_var
     )
     return KL
Example #17
0
def test_softmax_bernoulli_equivalence(num, dimF, dimY):
    dF = np.vstack(
        (np.random.randn(num - 3,
                         dimF), np.array([[-3.0, 0.0], [3, 0.0], [0.0, 0.0]])))
    dY = np.vstack((np.random.randn(num - 3, dimY), np.ones((3, dimY)))) > 0
    F = to_default_float(dF)
    Fvar = tf.exp(
        tf.stack([F[:, 1], -10.0 + tf.zeros(F.shape[0], dtype=F.dtype)],
                 axis=1))
    F = tf.stack([F[:, 0], tf.zeros(F.shape[0], dtype=F.dtype)], axis=1)
    Y = to_default_int(dY)
    Ylabel = 1 - Y

    softmax_likelihood = Softmax(dimF)
    bernoulli_likelihood = Bernoulli(invlink=tf.sigmoid)
    softmax_likelihood.num_monte_carlo_points = int(
        0.3e7)  # Minimum number of points to pass the test on CircleCI
    bernoulli_likelihood.num_gauss_hermite_points = 40

    assert_allclose(
        softmax_likelihood.conditional_mean(F)[:, :1],
        bernoulli_likelihood.conditional_mean(F[:, :1]),
    )

    assert_allclose(
        softmax_likelihood.conditional_variance(F)[:, :1],
        bernoulli_likelihood.conditional_variance(F[:, :1]),
    )

    assert_allclose(
        softmax_likelihood.log_prob(F, Ylabel),
        bernoulli_likelihood.log_prob(F[:, :1], Y.numpy()),
    )

    mean1, var1 = softmax_likelihood.predict_mean_and_var(F, Fvar)
    mean2, var2 = bernoulli_likelihood.predict_mean_and_var(
        F[:, :1], Fvar[:, :1])

    assert_allclose(mean1[:, 0, None], mean2, rtol=2e-3)
    assert_allclose(var1[:, 0, None], var2, rtol=2e-3)

    ls_ve = softmax_likelihood.variational_expectations(F, Fvar, Ylabel)
    lb_ve = bernoulli_likelihood.variational_expectations(
        F[:, :1], Fvar[:, :1], Y.numpy())
    assert_allclose(ls_ve, lb_ve, rtol=5e-3)
Example #18
0
def Kuu_matern12_fourierfeatures1d(inducing_variable, kernel, jitter=None):
    a, b, ms = (lambda u: (u.a, u.b, u.ms))(inducing_variable)
    omegas = 2.0 * np.pi * ms / (b - a)

    # Cosine block:
    lamb = 1.0 / kernel.lengthscales
    two_or_four = to_default_float(tf.where(omegas == 0, 2.0, 4.0))
    d_cos = ((b - a) * (tf.square(lamb) + tf.square(omegas)) / lamb /
             kernel.variance / two_or_four)  # eq. (111)
    v_cos = tf.ones_like(d_cos) / tf.sqrt(kernel.variance)  # eq. (110)
    cosine_block = LowRank(Diag(d_cos), v_cos[:, None])

    # Sine block:
    omegas = omegas[tf.not_equal(omegas,
                                 0)]  # the sine block does not include omega=0
    d_sin = ((b - a) * (tf.square(lamb) + tf.square(omegas)) / lamb /
             kernel.variance / 4.0)  # eq. (113)
    sine_block = Diag(d_sin)

    return BlockDiag([cosine_block, sine_block]).to_dense()
Example #19
0
    def elbo(self) -> tf.Tensor:
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood.
        """
        Y_data = self.data

        X_data_mean, X_data_var = self.encoder(Y_data)

        pX = DiagonalGaussian(X_data_mean, X_data_var)

        num_inducing = self.inducing_variable.num_inducing
        psi0 = tf.reduce_sum(expectation(pX, self.kernel))
        psi1 = expectation(pX, (self.kernel, self.inducing_variable))
        psi2 = tf.reduce_sum(
            expectation(pX, (self.kernel, self.inducing_variable),
                        (self.kernel, self.inducing_variable)),
            axis=0)

        cov_uu = covariances.Kuu(self.inducing_variable,
                                 self.kernel,
                                 jitter=default_jitter())
        L = tf.linalg.cholesky(cov_uu)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)

        # Compute intermediate matrices
        A = tf.linalg.triangular_solve(L, tf.transpose(psi1),
                                       lower=True) / sigma
        tmp = tf.linalg.triangular_solve(L, psi2, lower=True)
        AAT = tf.linalg.triangular_solve(L, tf.transpose(tmp),
                                         lower=True) / sigma2
        B = AAT + tf.eye(num_inducing, dtype=default_float())
        LB = tf.linalg.cholesky(B)
        log_det_B = 2.0 * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB)))
        c = tf.linalg.triangular_solve(
            LB, tf.linalg.matmul(A, Y_data), lower=True) / sigma

        # KL[q(x) || p(x)]
        dX_data_var = (X_data_var if X_data_var.shape.ndims == 2 else
                       tf.linalg.diag_part(X_data_var))
        NQ = to_default_float(tf.size(X_data_mean))
        D = to_default_float(tf.shape(Y_data)[1])
        KL = -0.5 * tf.reduce_sum(tf.math.log(dX_data_var))
        KL += 0.5 * tf.reduce_sum(tf.math.log(self.X_prior_var))
        KL -= 0.5 * NQ
        KL += 0.5 * tf.reduce_sum(
            (tf.square(X_data_mean - self.X_prior_mean) + dX_data_var) /
            self.X_prior_var)

        self.loss_placeholder["KL_x"].append(KL.numpy())

        # compute log marginal bound
        ND = to_default_float(tf.size(Y_data))
        bound = -0.5 * ND * tf.math.log(2 * np.pi * sigma2)
        bound += -0.5 * D * log_det_B
        bound += -0.5 * tf.reduce_sum(tf.square(Y_data)) / sigma2
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 -
                             tf.reduce_sum(tf.linalg.diag_part(AAT)))
        bound -= KL

        self.loss_placeholder["ELBO"].append(bound.numpy())

        return bound
Example #20
0
 def map_fn(image, label):
     image = to_default_float(image) / 255.0
     label = to_default_float(label)
     return tf.reshape(image, [-1, image_size]), label
Example #21
0
def map_fn(input_slice: Dict[str, tf.Tensor]):
    updated = input_slice
    image = to_default_float(updated["image"]) / 255.0
    label = to_default_float(updated["label"])
    return tf.reshape(image, [-1, image_size]), label
Example #22
0
def gauss_kl_ldf(q_mu: tf.Tensor, q_sqrt: tf.Tensor,
                 K: tf.linalg.LinearOperatorDiag):
    """
    Compute the KL divergence KL[q || p] between
          q(x) = N(m, [email protected])
          m = Kuu @ q_mu
          L = Kuu @ q_sqrt
    and
          p(x) = N(0, K)    where K is a Diag linear operator
          p(x) = N(0, I)    if K is None
    We assume L multiple independent distributions, given by the columns of
    q_mu and the first or last dimension of q_sqrt. Returns the *sum* of the
    divergences.
    q_mu is a matrix ([M, L]), each column contains a mean.
    q_sqrt can be a 3D tensor ([L, M, M]), each matrix within is a lower
        triangular square-root matrix of the covariance of q.
    q_sqrt can be a matrix ([M, L]), each column represents the diagonal of a
        square-root matrix of the covariance of q.
    K is the covariance of p (positive-definite matrix).  In this case it must always
    be a tf.linalg.LinearOperatorDiag instance as the type hint suggests
    """
    if K is None:
        is_white = True
        is_batched_prior = False
    else:
        is_white = False
        is_batched_prior = len(K.shape) == 3
    is_diag = len(tf.shape(q_sqrt)) == 2

    M, L = tf.shape(q_mu)[0], tf.shape(q_mu)[1]

    if is_white:
        alpha = q_mu  # [M, L], implying that K is identity
    else:
        q_mu = tf.transpose(
            q_mu)[:, :,
                  None] if is_batched_prior else q_mu  # [L, M, 1] or [M, L]
        alpha = K.solve(q_mu)  # [L, M, 1] or [M, L]

    if is_diag:
        # if q_sqrt is diagonal
        q_diag = tf.linalg.LinearOperatorDiag(tf.square(q_sqrt))
        # Log-determinant of the covariance of q(x); factor of 2 from fact that q_sqrt is sqrt of whole
        logdet_qcov = tf.reduce_sum(q_diag.log_abs_determinant())
    else:
        Lq = tf.linalg.band_part(q_sqrt, -1,
                                 0)  # force lower triangle # [L, M, M]
        Lq_diag = tf.linalg.diag_part(Lq)  # [L, M]
        # Log-determinant of the covariance of q(x):
        logdet_qcov = tf.reduce_sum(tf.math.log(tf.square(Lq_diag)))

    # Mahalanobis term: μqᵀ Σp⁻¹ μq
    mahalanobis = tf.reduce_sum(q_mu * alpha)

    # Constant term: - L * M
    constant = -to_default_float(tf.size(q_mu, out_type=tf.int64))

    # Trace term: tr(Σp⁻¹ Σq)
    if is_white:
        if is_diag:
            trace = tf.reduce_sum(q_diag.trace())
        else:
            trace = tf.reduce_sum(tf.square(Lq))
    else:
        if is_diag and not is_batched_prior:
            # K is [M, M] and q_sqrt is [M, L]: fast specialisation, we skip needing to take diag_part
            trace = tf.reduce_sum(K.solve(tf.square(q_sqrt)))
        else:
            # K is [L,M,M] or [M,M] and Lq_diag is [L, M] -> [M, L]
            trace = tf.reduce_sum(
                K.solve(tf.square(tf.linalg.matrix_transpose(Lq_diag))))

    twoKL = mahalanobis + constant - logdet_qcov + trace

    # Log-determinant of the covariance of p(x):
    if not is_white:
        log_det_p = tf.reduce_sum(K.log_abs_determinant())
        # If K is [L, M, M], num_latent_gps is no longer implicit, no need to multiply the single kernel logdet
        scale = 1.0 if is_batched_prior else to_default_float(L)
        log_det_p *= scale
        twoKL += log_det_p

    return 0.5 * twoKL
Example #23
0
    m.kernel.trainable
except AttributeError:
    print(f'{m.kernel.__class__.__name__} does not have a trainable attribute')

# %%
set_trainable(m.kernel, False)
print_summary(m)

# %% [markdown]
# ## Priors
#
# You can set priors in the same way as transforms and trainability, by using `tensorflow_probability` distribution objects. Let's set a Gamma prior on the variance of the Matern32 kernel.

# %%
k = gpflow.kernels.Matern32()
k.variance.prior = tfp.distributions.Gamma(to_default_float(2),
                                           to_default_float(3))

print_summary(k)

# %%
m.kernel.kernels[0].variance.prior = tfp.distributions.Gamma(
    to_default_float(2), to_default_float(3))
print_summary(m)

# %% [markdown]
# ## Optimization
#
# To optimize your model, first create an instance of an optimizer (in this case, `gpflow.optimizers.Scipy`), which has optional arguments that are passed to `scipy.optimize.minimize` (we minimize the negative log likelihood). Then, call the `minimize` method of that optimizer, with your model as the optimization target. Variables that have priors are maximum a priori (MAP) estimated, that is, we add the log prior to the log likelihood, and otherwise use Maximum Likelihood.

def FitModel(
    bConsider,
    GPt,
    GPy,
    globalBranching,
    priorConfidence=0.80,
    M=10,
    likvar=1.0,
    kerlen=2.0,
    kervar=5.0,
    fDebug=False,
    maxiter=100,
    fPredict=True,
    fixHyperparameters=False,
):
    """
    Fit BGP model
    :param bConsider: list of candidate branching points
    :param GPt: pseudotime
    :param GPy: gene expression. Should be 0 mean for best performance.
    :param globalBranching: cell labels
    :param priorConfidence: prior confidence on cell labels
    :param M: number of inducing points
    :param likvar: initial value for Gaussian noise variance
    :param kerlen: initial value for kernel length scale
    :param kervar: initial value for kernel variance
    :param fDebug: Print debugging information
    :param maxiter: maximum number of iterations for optimisation
    :param fPredict: compute predictive mean and variance
    :param fixHyperparameters: should kernel hyperparameters be kept fixed or optimised?
    :return: dictionary of log likelihood, GPflow model, Phi matrix, predictive set of points,
    mean and variance, hyperparameter values, posterior on branching time
    """
    assert isinstance(bConsider, list), "Candidate B must be list"
    assert GPt.ndim == 1
    assert GPy.ndim == 2
    assert (
        GPt.size == GPy.size
    ), "pseudotime and gene expression data must be the same size"
    assert (
        globalBranching.size == GPy.size
    ), "state space must be same size as number of cells"
    assert M >= 0, "at least 0 or more inducing points should be given"
    phiInitial, phiPrior = GetInitialConditionsAndPrior(
        globalBranching, priorConfidence, infPriorPhi=True
    )

    XExpanded, indices, _ = VBHelperFunctions.GetFunctionIndexListGeneral(GPt)
    ptb = np.min([np.min(GPt[globalBranching == 2]), np.min(GPt[globalBranching == 3])])
    tree = bt.BinaryBranchingTree(0, 1, fDebug=False)
    tree.add(None, 1, np.ones((1, 1)) * ptb)  # B can be anything here
    (fm, _) = tree.GetFunctionBranchTensor()

    kb = bk.BranchKernelParam(
        gpflow.kernels.Matern32(1), fm, b=np.zeros((1, 1))
    ) + gpflow.kernels.White(1)
    kb.kernels[1].variance.assign(
        1e-6
    )  # controls the discontinuity magnitude, the gap at the branching point
    set_trainable(kb.kernels[1].variance, False)  # jitter for numerics
    if M == 0:
        m = assigngp_dense.AssignGP(
            GPt,
            XExpanded,
            GPy,
            kb,
            indices,
            np.ones((1, 1)) * ptb,
            phiInitial=phiInitial,
            phiPrior=phiPrior,
        )
    else:
        ZExpanded = np.ones((M, 2))
        ZExpanded[:, 0] = np.linspace(0, 1, M, endpoint=False)
        ZExpanded[:, 1] = np.array([i for j in range(M) for i in range(1, 4)])[:M]
        m = assigngp_denseSparse.AssignGPSparse(
            GPt,
            XExpanded,
            GPy,
            kb,
            indices,
            np.ones((1, 1)) * ptb,
            ZExpanded,
            phiInitial=phiInitial,
            phiPrior=phiPrior,
        )
    # Initialise hyperparameters
    m.likelihood.variance.assign(likvar)
    m.kernel.kernels[0].kern.lengthscales.assign(kerlen)
    m.kernel.kernels[0].kern.variance.assign(kervar)
    if fixHyperparameters:
        print("Fixing hyperparameters")
        set_trainable(m.kernel.kernels[0].kern.lengthscales, False)
        set_trainable(m.likelihood.variance, False)
        set_trainable(m.kernel.kernels[0].kern.variance, False)
    else:
        if fDebug:
            print("Adding prior logistic on length scale to avoid numerical problems")
        m.kernel.kernels[0].kern.lengthscales.prior = tfp.distributions.Normal(
            to_default_float(2.0), to_default_float(1.0)
        )
        m.kernel.kernels[0].kern.variance.prior = tfp.distributions.Normal(
            to_default_float(3.0), to_default_float(1.0)
        )
        m.likelihood.variance.prior = tfp.distributions.Normal(
            to_default_float(0.1), to_default_float(0.1)
        )

    # optimization
    ll = np.zeros(len(bConsider))
    Phi_l = list()
    ttestl_l, mul_l, varl_l = list(), list(), list()
    hyps = list()
    for ib, b in enumerate(bConsider):
        m.UpdateBranchingPoint(np.ones((1, 1)) * b, phiInitial)
        try:
            opt = gpflow.optimizers.Scipy()
            opt.minimize(
                m.training_loss,
                variables=m.trainable_variables,
                options=dict(disp=True, maxiter=maxiter),
            )
            # remember winning hyperparameter
            hyps.append(
                {
                    "likvar": m.likelihood.variance.numpy(),
                    "kerlen": m.kernel.kernels[0].kern.lengthscales.numpy(),
                    "kervar": m.kernel.kernels[0].kern.variance.numpy(),
                }
            )
            ll[ib] = m.log_posterior_density()
        except Exception as ex:
            print(f"Unexpected error: {ex} {'-' * 60}\nCaused by model: {m} {'-' * 60}")
            ll[0] = np.nan
            # return model so can inspect model
            return {
                "loglik": ll,
                "model": m,
                "Phi": np.nan,
                "prediction": {"xtest": np.nan, "mu": np.nan, "var": np.nan},
                "hyperparameters": np.nan,
                "posteriorB": np.nan,
            }
        # prediction
        Phi = m.GetPhi()
        Phi_l.append(Phi)
        if fPredict:
            ttestl, mul, varl = VBHelperFunctions.predictBranchingModel(m)
            ttestl_l.append(ttestl), mul_l.append(mul), varl_l.append(varl)
        else:
            ttestl_l.append([]), mul_l.append([]), varl_l.append([])
    iw = np.argmax(ll)
    postB = GetPosteriorB(ll, bConsider)
    if fDebug:
        print(
            "BGP Maximum at b=%.2f" % bConsider[iw],
            "CI= [%.2f, %.2f]" % (postB["B_CI"][0], postB["B_CI"][1]),
        )
    assert np.allclose(bConsider[iw], postB["Bmode"]), "%s-%s" % str(
        postB["B_CI"], bConsider[iw]
    )
    return {
        "loglik": ll,
        "Phi": Phi_l[iw],  # 'model': m,
        "prediction": {"xtest": ttestl_l[iw], "mu": mul_l[iw], "var": varl_l[iw]},
        "hyperparameters": hyps[iw],
        "posteriorB": postB,
    }