Ejemplo n.º 1
0
def test_multi_scale_inducing_equivalence_inducing_points(N, M, D):
    # Multiscale must be equivalent to inducing points when variance is zero
    Xnew, Z = np.random.randn(N, D), np.random.randn(M, D)
    rbf = gpflow.kernels.SquaredExponential(1.3441,
                                            lengthscale=np.random.uniform(
                                                0.5, 3., D))
    inducing_variable_zero_lengthscale = Multiscale(Z,
                                                    scales=np.zeros(Z.shape))
    inducing_variable_inducing_point = InducingPoints(Z)

    multi_scale_Kuf = Kuf(inducing_variable_zero_lengthscale, rbf, Xnew)
    inducing_point_Kuf = Kuf(inducing_variable_inducing_point, rbf, Xnew)

    deviation_percent_Kuf = np.max(
        np.abs(multi_scale_Kuf - inducing_point_Kuf) / inducing_point_Kuf *
        100)
    assert deviation_percent_Kuf < 0.1

    multi_scale_Kuu = Kuu(inducing_variable_zero_lengthscale, rbf)
    inducing_point_Kuu = Kuu(inducing_variable_inducing_point, rbf)

    deviation_percent_Kuu = np.max(
        np.abs(multi_scale_Kuu - inducing_point_Kuu) / inducing_point_Kuu *
        100)
    assert deviation_percent_Kuu < 0.1
Ejemplo n.º 2
0
def test_inducing_variables_psd_schur(input_dim, inducing_variable, kernel):
    # Conditional variance must be PSD.
    X = np.random.randn(5, input_dim)
    Kuf_values = Kuf(inducing_variable, kernel, X)
    Kuu_values = Kuu(inducing_variable, kernel, jitter=default_jitter())
    Kff_values = kernel(X)
    Qff_values = Kuf_values.numpy().T @ np.linalg.solve(Kuu_values, Kuf_values)
    assert np.all(np.linalg.eig(Kff_values - Qff_values)[0] > 0.0)
Ejemplo n.º 3
0
    def upper_bound(self) -> tf.Tensor:
        """
        Upper bound for the sparse GP regression marginal likelihood.  Note that
        the same inducing points are used for calculating the upper bound, as are
        used for computing the likelihood approximation. This may not lead to the
        best upper bound. The upper bound can be tightened by optimising Z, just
        like the lower bound. This is especially important in FITC, as FITC is
        known to produce poor inducing point locations. An optimisable upper bound
        can be found in https://github.com/markvdw/gp_upper.

        The key reference is

        ::

          @misc{titsias_2014,
            title={Variational Inference for Gaussian and Determinantal Point Processes},
            url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf},
            publisher={Workshop on Advances in Variational Inference (NIPS 2014)},
            author={Titsias, Michalis K.},
            year={2014},
            month={Dec}
          }

        The key quantity, the trace term, can be computed via

        >>> _, v = conditionals.conditional(X, model.inducing_variable.Z, model.kernel,
        ...                                 np.zeros((len(model.inducing_variable), 1)))

        which computes each individual element of the trace term.
        """
        X_data, Y_data = self.data
        num_data = to_default_float(tf.shape(Y_data)[0])

        Kdiag = self.kernel(X_data, full_cov=False)
        kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
        kuf = Kuf(self.inducing_variable, self.kernel, X_data)

        I = tf.eye(tf.shape(kuu)[0], dtype=default_float())

        L = tf.linalg.cholesky(kuu)
        A = tf.linalg.triangular_solve(L, kuf, lower=True)
        AAT = tf.linalg.matmul(A, A, transpose_b=True)
        B = I + AAT / self.likelihood.variance
        LB = tf.linalg.cholesky(B)

        # Using the Trace bound, from Titsias' presentation
        c = tf.maximum(tf.reduce_sum(Kdiag) - tf.reduce_sum(tf.square(A)), 0)

        # Alternative bound on max eigenval:
        corrected_noise = self.likelihood.variance + c

        const = -0.5 * num_data * tf.math.log(2 * np.pi * self.likelihood.variance)
        logdet = -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB)))

        LC = tf.linalg.cholesky(I + AAT / corrected_noise)
        v = tf.linalg.triangular_solve(LC, tf.linalg.matmul(A, Y_data) / corrected_noise, lower=True)
        quad = -0.5 * tf.reduce_sum(tf.square(Y_data)) / corrected_noise + 0.5 * tf.reduce_sum(tf.square(v))

        return const + logdet + quad
Ejemplo n.º 4
0
    def conditional_ND(self, X, full_cov=False):
        # X is [S,N,D]
        Kmm = Kuu(self.inducing_points, self.kernel, jitter=default_jitter())
        Lmm = tf.linalg.cholesky(Kmm)
        Kmm_tiled = tf.tile(tf.expand_dims(Kmm, 0), (self.num_outputs, 1, 1))
        Lmm_tiled = tf.tile(tf.expand_dims(Lmm, 0), (self.num_outputs, 1, 1))

        Kmn = Kuf(self.inducing_points, self.kernel, X)  # K(Z,X)
        # alpha(X) = k(Z,Z)^{-1}k(Z,X), = L^{-T}L^{-1}k(Z,X)
        A = tf.linalg.triangular_solve(Lmm, Kmn, lower=True)  # L^{-1}k(Z,X)
        if not self.white:
            # L^{-T}L^{-1}K(Z,X) is [M,N]
            A = tf.linalg.triangular_solve(tf.transpose(Lmm), A, lower=False)

        # m = alpha(X)^T(q_mu - m(Z)) = alpha(X)^T(q_mu) if zero mean function.
        mean = tf.matmul(A, self.q_mu, transpose_a=True)  # [N]

        # [D_out,M,N]
        A_tiled = tf.tile(A[None, :, :], [self.num_outputs, 1, 1])
        I = tf.eye(self.num_inducing, dtype=default_float())[None, :, :]

        # var = k(X,X) - alpha(X)^T(k(Z,Z)-q_sqrtq_sqrt^T)alpha(X)
        if self.white:
            SK = -I
        else:
            # -k(Z,Z)
            SK = -Kmm_tiled  # [D_out,M,M]

        if self.q_sqrt is not None:
            # SK = -k(Z,Z) + q_sqrtq_sqrt^T
            # [D_out,M,M]
            SK += tf.matmul(self.q_sqrt, self.q_sqrt, transpose_b=True)

        # B = -(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
        B = tf.matmul(SK, A_tiled)  # [D_out,M,N]

        if full_cov:
            # delta_cov = -alpha(X)^T(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
            delta_cov = tf.matmul(A_tiled, B, transpose_a=True)  # [D_out,N,N]
            # Knn = k(X,X)
            Knn = self.kernel.K(X)
        else:
            # Summing over dimension 1 --> sum variances due to other.
            # Is this legit?
            delta_cov = tf.reduce_sum(A_tiled * B, 1)
            #delta_cov = tf.linalg.diag_part(tf.matmul(A_tiled, B,
            #    transpose_a=True)) # [D_out,N]
            Knn = self.kernel.K_diag(X)  # [N]

        var = tf.expand_dims(Knn, 0) + delta_cov  # [D_out,N]
        var = tf.transpose(var)

        return mean + self.mean_function(X), var
Ejemplo n.º 5
0
 def predict_f(self, Xnew: InputData, full_cov=False, full_output_cov=False) -> MeanAndVariance:
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     X_data, Y_data = self.data
     num_inducing = len(self.inducing_variable)
     err = Y_data - self.mean_function(X_data)
     kuf = Kuf(self.inducing_variable, self.kernel, X_data)
     kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
     Kus = Kuf(self.inducing_variable, self.kernel, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.linalg.cholesky(kuu)
     A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma
     B = tf.linalg.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=default_float())
     LB = tf.linalg.cholesky(B)
     Aerr = tf.linalg.matmul(A, err)
     c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True)
     tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True)
     mean = tf.linalg.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = (
             self.kernel(Xnew)
             + tf.linalg.matmul(tmp2, tmp2, transpose_a=True)
             - tf.linalg.matmul(tmp1, tmp1, transpose_a=True)
         )
         var = tf.tile(var[None, ...], [self.num_latent_gps, 1, 1])  # [P, N, N]
     else:
         var = (
             self.kernel(Xnew, full_cov=False)
             + tf.reduce_sum(tf.square(tmp2), 0)
             - tf.reduce_sum(tf.square(tmp1), 0)
         )
         var = tf.tile(var[:, None], [1, self.num_latent_gps])
     return mean + self.mean_function(Xnew), var
Ejemplo n.º 6
0
def _conditional_train(
    Xnew: tf.Tensor,
    inducing_variable: InducingVariables,
    kernel: Kernel,
    f: tf.Tensor,
    *,
    full_cov=False,
    full_output_cov=False,
    q_sqrt=None,
    white=False,
):
    """
    Single-output GP conditional.

    The covariance matrices used to calculate the conditional have the following shape:
    - Kuu: [M, M]
    - Kuf: [M, N]
    - Kff: [N, N]

    Further reference
    -----------------
    - See `gpflow.conditionals._conditional` (below) for a detailed explanation of
      conditional in the single-output case.
    - See the multiouput notebook for more information about the multiouput framework.

    Parameters
    ----------
    :param Xnew: data matrix, size [N, D].
    :param f: data matrix, [M, R]
    :param full_cov: return the covariance between the datapoints
    :param full_output_cov: return the covariance between the outputs.
           NOTE: as we are using a single-output kernel with repetitions
                 these covariances will be zero.
    :param q_sqrt: matrix of standard-deviations or Cholesky matrices,
        size [M, R] or [R, M, M].
    :param white: boolean of whether to use the whitened representation
    :return:
        - mean:     [N, R]
        - variance: [N, R], [R, N, N], [N, R, R] or [N, R, N, R]
        Please see `gpflow.conditional._expand_independent_outputs` for more information
        about the shape of the variance, depending on `full_cov` and `full_output_cov`.
    """
    Kmm = Kuu(inducing_variable, kernel, jitter=default_jitter())  # [M, M]
    Kmn = Kuf(inducing_variable, kernel, Xnew)  # [M, N]
    Knn = kernel.diag_tr() #uses optimzied function to calculate the covariances
    fmean, fvar = base_conditional(
        Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=white
    )  # [N, R],  [R, N, N] or [N, R]
    return fmean, expand_independent_outputs(fvar, full_cov, full_output_cov)
Ejemplo n.º 7
0
def test_multi_scale_inducing_equivalence_inducing_points(N, M, D):
    # Multiscale must be equivalent to inducing points when variance is zero
    Xnew, Z = np.random.randn(N, D), np.random.randn(M, D)
    rbf = gpflow.kernels.SquaredExponential(1.3441,
                                            lengthscales=np.random.uniform(
                                                0.5, 3.0, D))
    inducing_variable_zero_lengthscales = Multiscale(Z,
                                                     scales=np.zeros(Z.shape) +
                                                     1e-10)
    inducing_variable_inducing_point = InducingPoints(Z)

    multi_scale_Kuf = Kuf(inducing_variable_zero_lengthscales, rbf, Xnew)
    inducing_point_Kuf = Kuf(inducing_variable_inducing_point, rbf, Xnew)

    relative_error_Kuf = np.abs(multi_scale_Kuf -
                                inducing_point_Kuf) / inducing_point_Kuf
    assert np.max(relative_error_Kuf) < 0.1e-2  # 0.1 %

    multi_scale_Kuu = Kuu(inducing_variable_zero_lengthscales, rbf)
    inducing_point_Kuu = Kuu(inducing_variable_inducing_point, rbf)

    relative_error_Kuu = np.abs(multi_scale_Kuu -
                                inducing_point_Kuu) / inducing_point_Kuu
    assert np.max(relative_error_Kuu) < 0.1e-2  # 0.1 %
Ejemplo n.º 8
0
        def __call__(self, X: TensorType) -> tf.Tensor:
            """
            :param X: evaluation points [N, D]
            :return: function value of sample [N, P]
            """
            N = tf.shape(X)[0]
            phi_X = kernel.feature_functions(X)  # [N, L]
            weight_space_prior_X = phi_X @ prior_weights  # [N, 1]
            Knm = tf.linalg.matrix_transpose(Kuf(inducing_variable, kernel, X))  # [N, M]
            function_space_update_X = Knm @ v  # [N, P]

            tf.debugging.assert_equal(tf.shape(weight_space_prior_X), [N, 1])
            tf.debugging.assert_equal(tf.shape(function_space_update_X), [N, P])

            return weight_space_prior_X + function_space_update_X  # [N, P]
Ejemplo n.º 9
0
    def conditional(self, X, full_cov=False):
        # X is [N,D] or [S*N,D]
        
        Kmm = Kuu(self.inducing_points, self.kernel, jitter=default_jitter()) #[M,M]
        Lmm = tf.linalg.cholesky(Kmm)
        Kmn = Kuf(self.inducing_points, self.kernel, X) #[M,N]
        
        # alpha(X) = k(Z,Z)^{-1}k(Z,X), = L^{-T}L^{-1}k(Z,X)
        A = tf.linalg.triangular_solve(Lmm, Kmn, lower=True) # L^{-1}k(Z,X)
        if not self.white:
            # L^{-T}L^{-1}K(Z,X) is [M,N]
            A = tf.linalg.triangular_solve(tf.transpose(Lmm), A, lower=False)
        
        # m = alpha(X)^T(q_mu - m(Z))
        mean = tf.matmul(A, self.q_mu-self.mean_function(self.inducing_points.Z), 
                         transpose_a=True) # [N,1]
        
        I = tf.eye(self.num_inducing, dtype=default_float())
       
        # var = k(X,X) - alpha(X)^T(k(Z,Z)-q_sqrtq_sqrt^T)alpha(X)
        if self.white: SK = -I
        else: SK = -Kmm 

        if self.q_sqrt is not None: # SK = -k(Z,Z) + q_sqrtq_sqrt^T
            SK += tf.matmul(self.q_sqrt, self.q_sqrt, transpose_b=True) 
        
        # B = -(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
        B = tf.matmul(SK, A) #[M,N]

        if full_cov:
            # delta_cov = -alpha(X)^T(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
            delta_cov = tf.matmul(A, B, transpose_a=True) # [N,N]
            Knn = self.kernel(X, full_cov=True, presliced=False)
        else:
            delta_cov = tf.reduce_sum(A * B, 0)
            Knn = self.kernel(X, full_cov=False, presliced=False)
       
        var = Knn + delta_cov
        var = tf.transpose(var)
        
        return mean + self.mean_function(X), var
Ejemplo n.º 10
0
    def _compute_robust_maximum_log_likelihood_objective(self) -> tf.Tensor:
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook.
        """
        X_data, Y_data = self.data

        num_inducing = len(self.inducing_variable)
        num_data = to_default_float(tf.shape(Y_data)[0])
        output_dim = to_default_float(tf.shape(Y_data)[1])

        err = Y_data - self.mean_function(X_data)
        Kdiag = self.kernel(X_data, full_cov=False)
        kuf = Kuf(self.inducing_variable, self.kernel, X_data)
        kuu = Kuu(self.inducing_variable, self.kernel, jitter=self.jitter_variance)
        L = tf.linalg.cholesky(kuu)
        sigma = tf.sqrt(self.likelihood.variance)

        # Compute intermediate matrices
        A = tf.linalg.triangular_solve(L, kuf, lower=True) / sigma
        AAT = tf.linalg.matmul(A, A, transpose_b=True)
        B = AAT + tf.eye(num_inducing, dtype=default_float())
        LB = tf.linalg.cholesky(B)
        Aerr = tf.linalg.matmul(A, err)
        c = tf.linalg.triangular_solve(LB, Aerr, lower=True) / sigma
        trace_term = 0.5 * output_dim * tf.reduce_sum(Kdiag) / self.likelihood.variance
        trace_term -= 0.5 * output_dim * tf.reduce_sum(tf.linalg.diag_part(AAT))

        # tr(Kff - Qff) should be positive, numerical issues can arise here
        assert trace_term > 0.0, f"Trace term negative, should be positive ({trace_term:.4e})."

        # compute log marginal bound
        bound = -0.5 * num_data * output_dim * np.log(2 * np.pi)
        bound += tf.negative(output_dim) * tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LB)))
        bound -= 0.5 * num_data * output_dim * tf.math.log(self.likelihood.variance)
        bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound -= trace_term

        return bound
Ejemplo n.º 11
0
    def conditional_ND(self, X, full_cov=False):
        # X is [S,N,D]
        Kmm_tiled = tf.convert_to_tensor([
            Kuu(self.inducing_points[i],
                self.kernels[i],
                jitter=default_jitter()) for i in range(self.num_outputs)
        ])
        Lmm_tiled = tf.convert_to_tensor([
            tf.linalg.cholesky(Kmm_tiled[i]) for i in range(self.num_outputs)
        ])

        A_tiled = []
        mean_tiled = []
        for i in range(self.num_outputs):
            Kmn = Kuf(self.inducing_points[i], self.kernels[i], X)
            Lmm = Lmm_tiled[i]

            A = tf.linalg.triangular_solve(Lmm, Kmn,
                                           lower=True)  # L^{-1}k(Z,X)
            if not self.white:
                # L^{-T}L^{-1}K(Z,X) is [M,N]
                A = tf.linalg.triangular_solve(tf.transpose(Lmm),
                                               A,
                                               lower=False)

            # m = alpha(X)^T(q_mu - m(Z)) = alpha(X)^T(q_mu) if zero mean function.
            mean = tf.linalg.matvec(A, self.q_mu[:, i],
                                    transpose_a=True)  # [N]

            A_tiled.append(A)
            mean_tiled.append(mean)

        A_tiled = tf.convert_to_tensor(A_tiled)
        mean_tiled = tf.transpose(tf.convert_to_tensor(mean_tiled))

        I = tf.eye(self.num_inducing, dtype=default_float())[None, :, :]

        # var = k(X,X) - alpha(X)^T(k(Z,Z)-q_sqrtq_sqrt^T)alpha(X)
        if self.white:
            SK = -I
        else:
            # -k(Z,Z)
            SK = -Kmm_tiled  # [D_out,M,M]

        if self.q_sqrt is not None:
            # SK = -k(Z,Z) + q_sqrtq_sqrt^T
            # [D_out,M,M]
            SK += tf.matmul(self.q_sqrt, self.q_sqrt, transpose_b=True)

        # B = -(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
        B = tf.matmul(SK, A_tiled)  # [D_out,M,N]

        if full_cov:
            # delta_cov = -alpha(X)^T(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
            delta_cov = tf.matmul(A_tiled, B, transpose_a=True)  # [D_out,N,N]
            # Knn = k(X,X)
            Knn = tf.convert_to_tensor(
                [self.kernels[i].K(X) for i in range(self.num_outputs)])
        else:
            # Summing over dimension 1 --> sum variances due to other.
            # Is this legit?
            delta_cov = tf.reduce_sum(A_tiled * B, 1)
            #delta_cov = tf.linalg.diag_part(tf.matmul(A_tiled, B,
            #    transpose_a=True)) # [D_out,N]
            Knn = tf.convert_to_tensor(
                [self.kernels[i].K_diag(X) for i in range(self.num_outputs)])

        var = Knn + delta_cov  # [D_out,N]
        var = tf.transpose(var)

        return mean_tiled + self.mean_function(X), var