Python add Examples

Programming Language: Python

Namespace/Package Name: GPy.util.diag

Method/Function: add

Examples at hotexamples.com: 19

Python add - 19 examples found. These are the top rated real world Python examples of GPy.util.diag.add extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, variance=None, Z_tilde=None, A = None):
        """
        Returns a Posterior class containing essential quantities of the posterior
        The comments below corresponds to Alg 2.1 in GPML textbook.
        """
        # print('ExactGaussianInferenceGroup inference:')
        if mean_function is None:
            m = 0
        else:
            m = mean_function.f(X)

        if variance is None:
            variance = likelihood.gaussian_variance(Y_metadata)

        YYT_factor = Y-m

        # NOTE: change K to AKA^T
        if K is None:
            if A is None:
                A = np.identity(X.shape[0])
            K = A.dot(kern.K(X)).dot(A.T) # A_t k(X_t, X_t) A_t^T
        else:
            raise NotImplementedError('Need to be extended to group case!')
            

        Ky = K.copy()
        diag.add(Ky, variance+1e-8) # A_t k(X_t, X_t)A_t^T + sigma^2 I

        # pdinv: 
        # Wi: inverse of Ky
        # LW: the Cholesky decomposition of Ky -> L
        # LWi: the Cholesky decomposition of Kyi (not used)
        # W_logdet: the log of the determinat of Ky
        Wi, LW, LWi, W_logdet = pdinv(Ky) 

        # LAPACK: DPOTRS solves a system of linear equations A*X = B with a symmetric
        # positive definite matrix A using the Cholesky factorization
        # A = U**T*U or A = L*L**T computed by DPOTRF.
        alpha, _ = dpotrs(LW, YYT_factor, lower=1)
        # so this gives 
        # (A_t k(X_t, X_t)A_t^T + sigma^2 I)^{-1} (Y_t - m)

        # Note: 20210827 confirm the log marginal likelihood 
        log_marginal =  0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))

        if Z_tilde is not None:
            # This is a correction term for the log marginal likelihood
            # In EP this is log Z_tilde, which is the difference between the
            # Gaussian marginal and Z_EP
            log_marginal += Z_tilde

        # REVIEW: since log_marginal does not change, the gradient does not need to change as well.
        # FIXME: confirm the gradient update is correct
        # dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
        dL_dK = 0.5 * A.T.dot((tdot(alpha) - Y.shape[1] * Wi)).dot(A)
        # print('dL_dK shape', dL_dK.shape)

        dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)

        return PosteriorExactGroup(woodbury_chol=LW, woodbury_vector=alpha, K=K, A = A), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}

Example #2

Show file

    def inference(self,
                  kern,
                  X,
                  W,
                  likelihood,
                  Y,
                  mean_function=None,
                  Y_metadata=None,
                  K=None,
                  variance=None,
                  Z_tilde=None):
        """
        Returns a Posterior class containing essential quantities of the posterior
        """

        if mean_function is None:
            m = 0
        else:
            m = mean_function.f(X)

        if variance is None:
            variance = likelihood.gaussian_variance(Y_metadata)

        YYT_factor = Y - m

        if K is None:
            K = kern.K(X)

        Ky = K.copy()

        diag.add(Ky, variance + 1e-8)

        Wi, LW, LWi, W_logdet = pdinv(Ky)

        alpha, _ = dpotrs(LW, YYT_factor, lower=1)

        log_marginal = 0.5 * (-Y.size * log_2_pi - Y.shape[1] * W_logdet -
                              np.sum(alpha * YYT_factor))

        if Z_tilde is not None:
            # This is a correction term for the log marginal likelihood
            # In EP this is log Z_tilde, which is the difference between the
            # Gaussian marginal and Z_EP
            log_marginal += Z_tilde

        dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)

        dL_dthetaL = likelihood.exact_inference_gradients(
            np.diag(dL_dK), Y_metadata)

        posterior_ = Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K)

        return posterior_, log_marginal, {
            'dL_dK': dL_dK,
            'dL_dthetaL': dL_dthetaL,
            'dL_dm': alpha
        }, W_logdet

Example #3

Show file

File: gpmodel_library.py Project: dabinkim-LGOM/informative-path-planning

    def update_model(self, xvals, zvals, incremental = True):
        assert(self.xvals is not None)
        assert(self.zvals is not None)
        
        Kx = self.kern.K(self.xvals, xvals)

        # Update K matrix
        self._K = np.block([
            [self._K,    Kx],
            [Kx.T,      self.kern.K(xvals, xvals)] 
         ])

        # Update internal data
        self.xvals = np.vstack([self.xvals, xvals])
        self.zvals = np.vstack([self.zvals, zvals])

        # Update woodbury inverse, either incrementally or from scratch
        if incremental == True:
            Pinv = self.woodbury_inv
            Q = Kx
            R = Kx.T
            S = self.kern.K(xvals, xvals)
            M = S - np.dot(np.dot(R, Pinv), Q)
            # Adds some additional noise to ensure well-conditioned
            diag.add(M, self.noise + 1e-8)
            M, _, _, _ = pdinv(M)

            Pnew = Pinv + np.dot(np.dot(np.dot(np.dot(Pinv, Q), M), R), Pinv)
            Qnew = -np.dot(np.dot(Pinv, Q), M)
            Rnew = -np.dot(np.dot(M, R), Pinv)
            Snew = M

            self._woodbury_inv = np.block([
                [Pnew, Qnew],
                [Rnew, Snew]
            ])
        else:
            Ky = self.K.copy()
            # Adds some additional noise to ensure well-conditioned
            diag.add(Ky, self.noise + 1e-8)
            Wi, LW, LWi, W_logdet = pdinv(Ky)
            self._woodbury_inv = Wi 
        
        self._woodbury_vector = np.dot(self.woodbury_inv, self.zvals) 

        self._woodbury_chol = None 
        self._mean =  None
        self._covariance = None
        self._prior_mean = 0.
        self._K_chol = None

Example #4

Show file

File: gpmodel_library.py Project: szf2020/informative-path-planning

    def init_model(self, xvals, zvals):
        # Update internal data
        self.xvals = xvals
        self.zvals = zvals

        self._K = self.kern.K(self.xvals)

        Ky = self._K.copy()

        # Adds some additional noise to ensure well-conditioned
        diag.add(Ky, self.noise + 1e-8)
        Wi, LW, LWi, W_logdet = pdinv(Ky)

        self._woodbury_inv = Wi
        self._woodbury_vector = np.dot(self._woodbury_inv, self.zvals)

        self._woodbury_chol = None
        self._mean = None
        self._covariance = None
        self._prior_mean = 0.
        self._K_chol = None

Example #5

Show file

File: svi_ratio.py Project: mbaddar1/wias_uq_rl

    def inference(self, kern, X, Z, likelihood, Y, qU):
        """
        The SVI-VarDTC inference
        """

        if isinstance(Y, np.ndarray) and np.any(np.isnan(Y)):
            missing_data = True
            N, M, Q = Y.shape[0], Z.shape[0], Z.shape[1]
            Ds = Y.shape[1] - (np.isnan(Y) * 1).sum(1)
            Ymask = 1 - np.isnan(Y) * 1
            Y_masked = np.zeros_like(Y)
            Y_masked[Ymask == 1] = Y[Ymask == 1]
            ND = Ymask.sum()
        else:
            missing_data = False
            N, D, M, Q = Y.shape[0], Y.shape[1], Z.shape[0], Z.shape[1]
            ND = N * D

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y = self.gatherPsiStat(
            kern, X, Z, Y if not missing_data else Y_masked, beta,
            uncertain_inputs, D if not missing_data else Ds, missing_data)

        #======================================================================
        # Compute Common Components
        #======================================================================

        mu, S = qU.mean, qU.covariance
        mupsi1Y = mu.dot(psi1Y)

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        if missing_data:
            S_mu = S[None, :, :] + mu.T[:, :, None] * mu.T[:, None, :]
            NS_mu = S_mu.T.dot(Ymask.T).T
            LmInv = dtrtri(Lm)

            LmInvPsi2LmInvT = np.swapaxes(psi2.dot(LmInv.T), 1, 2).dot(LmInv.T)
            LmInvSmuLmInvT = np.swapaxes(NS_mu.dot(LmInv.T), 1, 2).dot(LmInv.T)

            B = mupsi1Y + mupsi1Y.T + (Ds[:, None, None] * psi2).sum(0)
            tmp = backsub_both_sides(Lm, B, 'right')

            logL =  -ND*log_2_pi/2. +ND*np.log(beta)/2. - psi0/2. - YRY/2.  \
                       -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. +np.trace(tmp)/2.
        else:
            S_mu = S * D + tdot(mu)
            if uncertain_inputs:
                LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
            else:
                LmInvPsi2LmInvT = tdot(dtrtrs(
                    Lm, psi1.T)[0]) / beta  #tdot(psi1.dot(LmInv.T).T) /beta
            LmInvSmuLmInvT = backsub_both_sides(Lm, S_mu, 'right')

            B = mupsi1Y + mupsi1Y.T + D * psi2
            tmp = backsub_both_sides(Lm, B, 'right')

            logL =  -ND*log_2_pi/2. +ND*np.log(beta)/2. - psi0/2. - YRY/2.  \
                       -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. +np.trace(tmp)/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm = np.eye(M)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = None  #(YRY*beta + beta*output_dim*psi0 - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        if missing_data:
            dL_dpsi0 = -Ds * (beta * np.ones((N, ))) / 2.
        else:
            dL_dpsi0 = -D * (beta * np.ones((N, ))) / 2.

        if uncertain_outputs:
            Ym, Ys = Y.mean, Y.variance
            dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm,
                                         Ym.dot(mu.T).T)[0],
                              trans=1)[0].T * beta
        else:
            if missing_data:
                dL_dpsi1 = dtrtrs(
                    Lm, dtrtrs(Lm,
                               (Y_masked).dot(mu.T).T)[0], trans=1)[0].T * beta
            else:
                dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm,
                                             Y.dot(mu.T).T)[0],
                                  trans=1)[0].T * beta

        if uncertain_inputs:
            if missing_data:
                dL_dpsi2 = np.swapaxes(
                    (Ds[:, None, None] * np.eye(M)[None, :, :] -
                     LmInvSmuLmInvT).dot(LmInv), 1, 2).dot(LmInv) * beta / 2.
            else:
                dL_dpsi2 = beta * backsub_both_sides(
                    Lm,
                    D * np.eye(M) - LmInvSmuLmInvT, 'left') / 2.
        else:
            dL_dpsi1 += beta * psi1.dot(dL_dpsi2 + dL_dpsi2.T)
            dL_dpsi2 = None

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        if uncertain_outputs:
            Ym = Y.mean
            grad_dict['dL_dYmean'] = -Ym * beta + dtrtrs(Lm, psi1.T)[0].T.dot(
                dtrtrs(Lm, mu)[0])
            grad_dict['dL_dYvar'] = beta / -2.

        return logL, grad_dict

Example #6

Show file

File: PEP_reg.py Project: thangbui/sparseGP_powerEP

    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
        assert mean_function is None, "inference with a mean function not implemented"

        num_inducing, _ = Z.shape
        num_data, output_dim = Y.shape

        #make sure the noise is not hetero
        sigma_n = likelihood.gaussian_variance(Y_metadata)
        if sigma_n.size >1:
            raise NotImplementedError("no hetero noise with this implementation of PEP")

        Kmm = kern.K(Z)
        Knn = kern.Kdiag(X)
        Knm = kern.K(X, Z)
        U = Knm

        #factor Kmm
        diag.add(Kmm, self.const_jitter)
        Kmmi, L, Li, _ = pdinv(Kmm)

        #compute beta_star, the effective noise precision
        LiUT = np.dot(Li, U.T)
        sigma_star = sigma_n + self.alpha * (Knn - np.sum(np.square(LiUT),0))
        beta_star = 1./sigma_star

        # Compute and factor A
        A = tdot(LiUT*np.sqrt(beta_star)) + np.eye(num_inducing)
        LA = jitchol(A)

        # back substitute to get b, P, v
        URiy = np.dot(U.T*beta_star,Y)
        tmp, _ = dtrtrs(L, URiy, lower=1)
        b, _ = dtrtrs(LA, tmp, lower=1)
        tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
        v, _ = dtrtrs(L, tmp, lower=1, trans=1)
        tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
        P = tdot(tmp.T)

        alpha_const_term = (1.0-self.alpha) / self.alpha

        #compute log marginal
        log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
                       -np.sum(np.log(np.diag(LA)))*output_dim + \
                       0.5*output_dim*(1+alpha_const_term)*np.sum(np.log(beta_star)) + \
                       -0.5*np.sum(np.square(Y.T*np.sqrt(beta_star))) + \
                       0.5*np.sum(np.square(b)) + 0.5*alpha_const_term*num_data*np.log(sigma_n)
        #compute dL_dR
        Uv = np.dot(U, v)
        dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \
            + np.sum(np.square(Uv), 1))*beta_star**2 

        # Compute dL_dKmm
        vvT_P = tdot(v.reshape(-1,1)) + P
        dL_dK = 0.5*(Kmmi - vvT_P)
        KiU = np.dot(Kmmi, U.T)
        dL_dK += self.alpha * np.dot(KiU*dL_dR, KiU.T)

        # Compute dL_dU
        vY = np.dot(v.reshape(-1,1),Y.T)
        dL_dU = vY - np.dot(vvT_P, U.T)
        dL_dU *= beta_star
        dL_dU -= self.alpha * 2.*KiU*dL_dR

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
        dL_dthetaL += 0.5*alpha_const_term*num_data / sigma_n
        grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR * self.alpha, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}

        #construct a posterior object
        post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

        return post, log_marginal, grad_dict

Example #7

Show file

File: svi_vardtc.py Project: mbaddar1/wias_uq_rl

    def inference(self,
                  kern,
                  X,
                  Z,
                  likelihood,
                  Y,
                  qU_mean,
                  qU_var,
                  Kuu_sigma=None):
        """
        The SVI-VarDTC inference
        """

        N, D, M, Q = Y.shape[0], Y.shape[1], Z.shape[0], Z.shape[1]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / likelihood.variance

        psi0, psi2, YRY, psi1, psi1Y = self.gatherPsiStat(
            kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kuu = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kuu, Kuu_sigma)
        else:
            diag.add(Kuu, self.const_jitter)
        Lm = jitchol(Kuu)

        mu, S = qU_mean, qU_var
        Ls = jitchol(S)
        LinvLs = dtrtrs(Lm, Ls)[0]
        Linvmu = dtrtrs(Lm, mu)[0]
        psi1YLinvT = dtrtrs(Lm, psi1Y.T)[0].T

        self.mid = {'qU_L': Ls, 'LinvLu': LinvLs, 'L': Lm, 'Linvmu': Linvmu}

        if uncertain_inputs:
            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
        else:
            LmInvPsi2LmInvT = tdot(dtrtrs(Lm, psi1.T)[0]) / beta

        LmInvSmuLmInvT = tdot(LinvLs) * D + tdot(Linvmu)

        #         logdet_L = np.sum(np.log(np.diag(Lm)))
        #         logdet_S = np.sum(np.log(np.diag(Ls)))

        #======================================================================
        # Compute log-likelihood
        #======================================================================

        logL_R = -N * np.log(beta)
        logL = -N*D*log_2_pi/2. -D*logL_R/2. - D*psi0/2. - YRY/2.  \
                     -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. + np.trace(LmInvPsi2LmInvT)*D/2.+(Linvmu*psi1YLinvT.T).sum()

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        tmp1 = backsub_both_sides(Lm, LmInvSmuLmInvT.dot(LmInvPsi2LmInvT),
                                  'left')
        tmp2 = Linvmu.dot(psi1YLinvT)
        tmp3 = backsub_both_sides(Lm, -D * LmInvPsi2LmInvT - tmp2 - tmp2.T,
                                  'left') / 2.

        dL_dKmm = (tmp1 + tmp1.T) / 2. + tmp3

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = -D * N * beta / 2. - (
            -D * psi0 / 2. - YRY / 2. -
            (LmInvSmuLmInvT * LmInvPsi2LmInvT).sum() / 2. +
            np.trace(LmInvPsi2LmInvT) * D / 2. +
            (Linvmu * psi1YLinvT.T).sum()) * beta

        #======================================================================
        # Compute dL_dqU
        #======================================================================

        tmp1 = backsub_both_sides(Lm, -LmInvPsi2LmInvT, 'left')
        dL_dqU_mean = tmp1.dot(mu) + dtrtrs(Lm, psi1YLinvT.T, trans=1)[0]
        dL_dqU_var = D / 2. * tmp1

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        KuuInvmu = dtrtrs(Lm, Linvmu, trans=1)[0]
        tmp = backsub_both_sides(Lm, np.eye(M) - tdot(LinvLs), 'left')

        post = Posterior(woodbury_inv=tmp,
                         woodbury_vector=KuuInvmu,
                         K=Kuu,
                         mean=mu,
                         cov=S,
                         K_chol=Lm)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -D * (beta * np.ones((N, ))) / 2.

        if uncertain_outputs:
            dL_dpsi1 = Y.mean.dot(dtrtrs(Lm, Linvmu, trans=1)[0].T) * beta
        else:
            dL_dpsi1 = Y.dot(dtrtrs(Lm, Linvmu, trans=1)[0].T) * beta

        dL_dpsi2 = beta * backsub_both_sides(Lm,
                                             D * np.eye(M) - LmInvSmuLmInvT,
                                             'left') / 2.
        if not uncertain_inputs:
            dL_dpsi1 += psi1.dot(dL_dpsi2 + dL_dpsi2.T) / beta
            dL_dpsi2 = None

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL,
                'dL_dqU_mean': dL_dqU_mean,
                'dL_dqU_var': dL_dqU_var
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL,
                'dL_dqU_mean': dL_dqU_mean,
                'dL_dqU_var': dL_dqU_var
            }

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            grad_dict['dL_dYmean'] = -m * beta + dtrtrs(Lm, psi1.T)[0].T.dot(
                dtrtrs(Lm, mu)[0])
            grad_dict['dL_dYvar'] = beta / -2.

        return post, logL, grad_dict

Example #8

Show file

File: var_dtc_fixed_cov.py Project: kant/applygpy

    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, fixed_covs_kerns=None, **kw):

        _, output_dim = Y.shape
        uncertain_inputs = isinstance(X, VariationalPosterior)

        #see whether we've got a different noise variance for each datum
        beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
        #self.YYTfactor = self.get_YYTfactor(Y)
        #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
        het_noise = beta.size > 1

        if het_noise:
            raise(NotImplementedError("Heteroscedastic noise not implemented, should be possible though, feel free to try implementing it :)"))

        if beta.ndim == 1:
            beta = beta[:, None]


        # do the inference:
        num_inducing = Z.shape[0]
        num_data = Y.shape[0]
        # kernel computations, using BGPLVM notation

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        if Lm is None:
            Lm = jitchol(Kmm)

        # The rather complex computations of A, and the psi stats
        if uncertain_inputs:
            psi0 = kern.psi0(Z, X)
            psi1 = kern.psi1(Z, X)
            if het_noise:
                psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
            else:
                psi2_beta = kern.psi2(Z,X) * beta
            LmInv = dtrtri(Lm)
            A = LmInv.dot(psi2_beta.dot(LmInv.T))
        else:
            psi0 = kern.Kdiag(X)
            psi1 = kern.K(X, Z)
            if het_noise:
                tmp = psi1 * (np.sqrt(beta))
            else:
                tmp = psi1 * (np.sqrt(beta))
            tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
            A = tdot(tmp)

        # factor B
        B = np.eye(num_inducing) + A
        LB = jitchol(B)
        # back substutue C into psi1Vf
        #tmp, _ = dtrtrs(Lm, psi1.T.dot(VVT_factor), lower=1, trans=0)
        #_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        #tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
        #Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # data fit and derivative of L w.r.t. Kmm
        #delit = tdot(_LBi_Lmi_psi1Vf)

        # Expose YYT to get additional covariates in (YYT + Kgg):
        tmp, _ = dtrtrs(Lm, psi1.T, lower=1, trans=0)
        _LBi_Lmi_psi1, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1, lower=1, trans=1)
        Cpsi1, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # TODO: cache this:
        # Compute fixed covariates covariance:
        if fixed_covs_kerns is not None:
            K_fixed = 0
            for name, [cov, k] in fixed_covs_kerns.iteritems():
                K_fixed += k.K(cov)

            #trYYT = self.get_trYYT(Y)
            YYT_covs = (tdot(Y) + K_fixed)
            data_term = beta**2 * YYT_covs
            trYYT_covs = np.trace(YYT_covs)
        else:
            data_term = beta**2 * tdot(Y)
            trYYT_covs = self.get_trYYT(Y)

        #trYYT = self.get_trYYT(Y)
        delit = mdot(_LBi_Lmi_psi1, data_term, _LBi_Lmi_psi1.T)
        data_fit = np.trace(delit)

        DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
        if dL_dKmm is None:
            delit = -0.5 * DBi_plus_BiPBi
            delit += -0.5 * B * output_dim
            delit += output_dim * np.eye(num_inducing)
            # Compute dL_dKmm
            dL_dKmm = backsub_both_sides(Lm, delit)

        # derivatives of L w.r.t. psi
        dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
            data_term, Cpsi1, DBi_plus_BiPBi,
            psi1, het_noise, uncertain_inputs)

        # log marginal likelihood
        log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
            psi0, A, LB, trYYT_covs, data_fit, Y)

        if self.save_per_dim:
            self.saved_vals = [psi0, A, LB, _LBi_Lmi_psi1, beta]

        # No heteroscedastics, so no _LBi_Lmi_psi1Vf:
        # For the interested reader, try implementing the heteroscedastic version, it should be possible
        _LBi_Lmi_psi1Vf = None # Is just here for documentation, so you can see, what it was.

        #noise derivatives
        dL_dR = _compute_dL_dR(likelihood,
            het_noise, uncertain_inputs, LB,
            _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
            psi0, psi1, beta,
            data_fit, num_data, output_dim, trYYT_covs, Y, None)

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)

        #put the gradients in the right places
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}

        if fixed_covs_kerns is not None:
            # For now, we do not take the gradients, we can compute them,
            # but the maximum likelihood solution is to switch off the additional covariates....
            dL_dcovs = beta * np.eye(K_fixed.shape[0]) - beta**2*tdot(_LBi_Lmi_psi1.T)
            grad_dict['dL_dcovs'] = -.5 * dL_dcovs

        #get sufficient things for posterior prediction
        #TODO: do we really want to do this in  the loop?
        if 1:
            woodbury_vector = (beta*Cpsi1).dot(Y)
        else:
            import ipdb; ipdb.set_trace()
            psi1V = np.dot(Y.T*beta, psi1).T
            tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
            tmp, _ = dpotrs(LB, tmp, lower=1)
            woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
        Bi, _ = dpotri(LB, lower=1)
        symmetrify(Bi)
        Bi = -dpotri(LB, lower=1)[0]
        diag.add(Bi, 1)

        woodbury_inv = backsub_both_sides(Lm, Bi)

        #construct a posterior object
        post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
        return post, log_marginal, grad_dict

Example #9

Show file

File: vardtc_svi_multiout.py Project: nadiucc/GPy

    def inference(self, kern_r, kern_c, Xr, Xc, Zr, Zc, likelihood, Y, qU_mean,
                  qU_var_r, qU_var_c):
        """
        The SVI-VarDTC inference
        """

        N, D, Mr, Mc, Qr, Qc = Y.shape[0], Y.shape[1], Zr.shape[0], Zc.shape[
            0], Zr.shape[1], Zc.shape[1]

        uncertain_inputs_r = isinstance(Xr, VariationalPosterior)
        uncertain_inputs_c = isinstance(Xc, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / likelihood.variance

        psi0_r, psi1_r, psi2_r = self.gatherPsiStat(kern_r, Xr, Zr,
                                                    uncertain_inputs_r)
        psi0_c, psi1_c, psi2_c = self.gatherPsiStat(kern_c, Xc, Zc,
                                                    uncertain_inputs_c)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kuu_r = kern_r.K(Zr).copy()
        diag.add(Kuu_r, self.const_jitter)
        Lr = jitchol(Kuu_r)

        Kuu_c = kern_c.K(Zc).copy()
        diag.add(Kuu_c, self.const_jitter)
        Lc = jitchol(Kuu_c)

        mu, Sr, Sc = qU_mean, qU_var_r, qU_var_c
        LSr = jitchol(Sr)
        LSc = jitchol(Sc)

        LcInvMLrInvT = dtrtrs(Lc, dtrtrs(Lr, mu.T)[0].T)[0]
        LcInvPsi2_cLcInvT = backsub_both_sides(Lc, psi2_c, 'right')
        LrInvPsi2_rLrInvT = backsub_both_sides(Lr, psi2_r, 'right')
        LcInvLSc = dtrtrs(Lc, LSc)[0]
        LrInvLSr = dtrtrs(Lr, LSr)[0]
        LcInvScLcInvT = tdot(LcInvLSc)
        LrInvSrLrInvT = tdot(LrInvLSr)
        LcInvPsi1_cT = dtrtrs(Lc, psi1_c.T)[0]
        LrInvPsi1_rT = dtrtrs(Lr, psi1_r.T)[0]

        tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT = (LrInvPsi2_rLrInvT *
                                              LrInvSrLrInvT).sum()
        tr_LcInvPsi2_cLcInvT_LcInvScLcInvT = (LcInvPsi2_cLcInvT *
                                              LcInvScLcInvT).sum()
        tr_LrInvSrLrInvT = np.square(LrInvLSr).sum()
        tr_LcInvScLcInvT = np.square(LcInvLSc).sum()
        tr_LrInvPsi2_rLrInvT = np.trace(LrInvPsi2_rLrInvT)
        tr_LcInvPsi2_cLcInvT = np.trace(LcInvPsi2_cLcInvT)

        #======================================================================
        # Compute log-likelihood
        #======================================================================

        logL_A = - np.square(Y).sum() \
               - (LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT)*LrInvPsi2_rLrInvT).sum() \
               -  tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT \
               + 2 * (Y * LcInvPsi1_cT.T.dot(LcInvMLrInvT).dot(LrInvPsi1_rT)).sum() - psi0_c * psi0_r \
               + tr_LrInvPsi2_rLrInvT * tr_LcInvPsi2_cLcInvT

        logL = -N*D/2.*(np.log(2.*np.pi)-np.log(beta)) + beta/2.* logL_A \
               -Mc * (np.log(np.diag(Lr)).sum()-np.log(np.diag(LSr)).sum())  -Mr * (np.log(np.diag(Lc)).sum()-np.log(np.diag(LSc)).sum()) \
               - np.square(LcInvMLrInvT).sum()/2. - tr_LrInvSrLrInvT * tr_LcInvScLcInvT/2. + Mr*Mc/2.

        #======================================================================
        # Compute dL_dKuu
        #======================================================================

        tmp =  beta* LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) \
             + beta* tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT.dot(LcInvScLcInvT) \
             - beta* LcInvMLrInvT.dot(LrInvPsi1_rT).dot(Y.T).dot(LcInvPsi1_cT.T) \
             - beta/2. * tr_LrInvPsi2_rLrInvT* LcInvPsi2_cLcInvT - Mr/2.*np.eye(Mc) \
             + tdot(LcInvMLrInvT)/2. + tr_LrInvSrLrInvT/2. * LcInvScLcInvT

        dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
        dL_dKuu_c += dL_dKuu_c.T
        dL_dKuu_c *= 0.5

        tmp =  beta* LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT) \
             + beta* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT.dot(LrInvSrLrInvT) \
             - beta* LrInvPsi1_rT.dot(Y.T).dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT) \
             - beta/2. * tr_LcInvPsi2_cLcInvT * LrInvPsi2_rLrInvT - Mc/2.*np.eye(Mr) \
             + tdot(LcInvMLrInvT.T)/2. + tr_LcInvScLcInvT/2. * LrInvSrLrInvT

        dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
        dL_dKuu_r += dL_dKuu_r.T
        dL_dKuu_r *= 0.5

        #======================================================================
        # Compute dL_dthetaL
        #======================================================================

        dL_dthetaL = -D * N * beta / 2. - logL_A * beta * beta / 2.

        #======================================================================
        # Compute dL_dqU
        #======================================================================

        tmp = -beta * LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT)\
              + beta* LcInvPsi1_cT.dot(Y).dot(LrInvPsi1_rT.T) - LcInvMLrInvT

        dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]

        LScInv = dtrtri(LSc)
        tmp = -beta / 2. * tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT - tr_LrInvSrLrInvT / 2. * np.eye(
            Mc)
        dL_dqU_var_c = backsub_both_sides(Lc, tmp,
                                          'left') + tdot(LScInv.T) * Mr / 2.

        LSrInv = dtrtri(LSr)
        tmp = -beta / 2. * tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT - tr_LcInvScLcInvT / 2. * np.eye(
            Mr)
        dL_dqU_var_r = backsub_both_sides(Lr, tmp,
                                          'left') + tdot(LSrInv.T) * Mc / 2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        post = PosteriorMultioutput(LcInvMLrInvT=LcInvMLrInvT,
                                    LcInvScLcInvT=LcInvScLcInvT,
                                    LrInvSrLrInvT=LrInvSrLrInvT,
                                    Lr=Lr,
                                    Lc=Lc,
                                    kern_r=kern_r,
                                    Xr=Xr,
                                    Zr=Zr)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0_r = -psi0_c * beta / 2. * np.ones((D, ))
        dL_dpsi0_c = -psi0_r * beta / 2. * np.ones((N, ))

        dL_dpsi1_c = beta * dtrtrs(
            Lc, (Y.dot(LrInvPsi1_rT.T).dot(LcInvMLrInvT.T)).T, trans=1)[0].T
        dL_dpsi1_r = beta * dtrtrs(
            Lr, (Y.T.dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT)).T, trans=1)[0].T

        tmp = beta / 2. * (
            -LcInvMLrInvT.dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) -
            tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvScLcInvT +
            tr_LrInvPsi2_rLrInvT * np.eye(Mc))
        dL_dpsi2_c = backsub_both_sides(Lc, tmp, 'left')
        tmp = beta / 2. * (
            -LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT) -
            tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvSrLrInvT +
            tr_LcInvPsi2_cLcInvT * np.eye(Mr))
        dL_dpsi2_r = backsub_both_sides(Lr, tmp, 'left')

        if not uncertain_inputs_r:
            dL_dpsi1_r += psi1_r.dot(dL_dpsi2_r + dL_dpsi2_r.T)
        if not uncertain_inputs_c:
            dL_dpsi1_c += psi1_c.dot(dL_dpsi2_c + dL_dpsi2_c.T)

        grad_dict = {
            'dL_dthetaL': dL_dthetaL,
            'dL_dqU_mean': dL_dqU_mean,
            'dL_dqU_var_c': dL_dqU_var_c,
            'dL_dqU_var_r': dL_dqU_var_r,
            'dL_dKuu_c': dL_dKuu_c,
            'dL_dKuu_r': dL_dKuu_r,
        }

        if uncertain_inputs_c:
            grad_dict['dL_dpsi0_c'] = dL_dpsi0_c
            grad_dict['dL_dpsi1_c'] = dL_dpsi1_c
            grad_dict['dL_dpsi2_c'] = dL_dpsi2_c
        else:
            grad_dict['dL_dKdiag_c'] = dL_dpsi0_c
            grad_dict['dL_dKfu_c'] = dL_dpsi1_c

        if uncertain_inputs_r:
            grad_dict['dL_dpsi0_r'] = dL_dpsi0_r
            grad_dict['dL_dpsi1_r'] = dL_dpsi1_r
            grad_dict['dL_dpsi2_r'] = dL_dpsi2_r
        else:
            grad_dict['dL_dKdiag_r'] = dL_dpsi0_r
            grad_dict['dL_dKfu_r'] = dL_dpsi1_r

        return post, logL, grad_dict

Example #10

Show file

    def inference(self,
                  kern,
                  X,
                  Z,
                  likelihood,
                  Y,
                  mean_function=None,
                  Y_metadata=None):
        assert mean_function is None, "inference with a mean function not implemented"

        num_inducing, _ = Z.shape
        num_data, output_dim = Y.shape

        #make sure the noise is not hetero
        sigma_n = likelihood.gaussian_variance(Y_metadata)
        if sigma_n.size > 1:
            raise NotImplementedError(
                "no hetero noise with this implementation of PEP")

        Kmm = kern.K(Z)
        Knn = kern.Kdiag(X)
        Knm = kern.K(X, Z)
        U = Knm

        #factor Kmm
        diag.add(Kmm, self.const_jitter)
        Kmmi, L, Li, _ = pdinv(Kmm)

        #compute beta_star, the effective noise precision
        LiUT = np.dot(Li, U.T)
        sigma_star = sigma_n + self.alpha * (Knn - np.sum(np.square(LiUT), 0))
        beta_star = 1. / sigma_star

        # Compute and factor A
        A = tdot(LiUT * np.sqrt(beta_star)) + np.eye(num_inducing)
        LA = jitchol(A)

        # back substitute to get b, P, v
        URiy = np.dot(U.T * beta_star, Y)
        tmp, _ = dtrtrs(L, URiy, lower=1)
        b, _ = dtrtrs(LA, tmp, lower=1)
        tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
        v, _ = dtrtrs(L, tmp, lower=1, trans=1)
        tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
        P = tdot(tmp.T)

        alpha_const_term = (1.0 - self.alpha) / self.alpha

        #compute log marginal
        log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
                       -np.sum(np.log(np.diag(LA)))*output_dim + \
                       0.5*output_dim*(1+alpha_const_term)*np.sum(np.log(beta_star)) + \
                       -0.5*np.sum(np.square(Y.T*np.sqrt(beta_star))) + \
                       0.5*np.sum(np.square(b)) + 0.5*alpha_const_term*num_data*np.log(sigma_n)
        #compute dL_dR
        Uv = np.dot(U, v)
        dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \
            + np.sum(np.square(Uv), 1))*beta_star**2

        # Compute dL_dKmm
        vvT_P = tdot(v.reshape(-1, 1)) + P
        dL_dK = 0.5 * (Kmmi - vvT_P)
        KiU = np.dot(Kmmi, U.T)
        dL_dK += self.alpha * np.dot(KiU * dL_dR, KiU.T)

        # Compute dL_dU
        vY = np.dot(v.reshape(-1, 1), Y.T)
        dL_dU = vY - np.dot(vvT_P, U.T)
        dL_dU *= beta_star
        dL_dU -= self.alpha * 2. * KiU * dL_dR

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
        dL_dthetaL += 0.5 * alpha_const_term * num_data / sigma_n
        grad_dict = {
            'dL_dKmm': dL_dK,
            'dL_dKdiag': dL_dR * self.alpha,
            'dL_dKnm': dL_dU.T,
            'dL_dthetaL': dL_dthetaL
        }

        #construct a posterior object
        post = Posterior(woodbury_inv=Kmmi - P,
                         woodbury_vector=v,
                         K=Kmm,
                         mean=None,
                         cov=None,
                         K_chol=L)

        return post, log_marginal, grad_dict

Example #11

Show file

File: gpmodel_library.py Project: szf2020/informative-path-planning

    def predict_value(self, xvals, include_noise=True, full_cov=False):
        # Calculate for the test point
        assert (xvals.shape[0] >= 1)
        assert (xvals.shape[1] == self.dimension)
        n_points, input_dim = xvals.shape

        # With no observations, predict 0 mean everywhere and prior variance
        if self.xvals is None:
            return np.zeros((n_points, 1)), np.ones(
                (n_points, 1)) * self.variance

        # Find neightbors within radius
        point_group = self.spatial_tree.query_ball_point(
            xvals, self.neighbor_radius)

        point_list = []
        for points in point_group:
            for index in points:
                point_list.append(index)

        point_set = Set(point_list)
        xpoints = [self.xvals[index] for index in point_set]
        zpoints = [self.zvals[index] for index in point_set]
        # print "Size before:", len(xpoints)

        # Brute force check the points in the waiting queue
        if self.xwait is not None and self.xwait.shape[0] > 0:
            wait_list = []
            for i, u in enumerate(self.xwait):
                for j, v in enumerate(xvals):
                    # if xvals.shape[0] < 10:
                    #     print "Comparing", i, j
                    #     print "Points:", u, v
                    dist = sp.spatial.distance.minkowski(u, v, p=2.0)
                    if dist <= self.neighbor_radius:
                        wait_list.append(i)
                        # if xvals.shape[0] < 10:
                        #     print "Adding point", u

            # if xvals.shape[0] < 10:
            #     print "The wait list:", wait_list

            wait_set = Set(wait_list)

            xpoints = [self.xwait[index] for index in wait_set] + xpoints
            zpoints = [self.zwait[index] for index in wait_set] + zpoints
            # print "Size after:", len(xpoints)

        xpoints = np.array(xpoints).reshape(-1, 2)
        zpoints = np.array(zpoints).reshape(-1, 1)

        if xpoints.shape[0] == 0:
            "No nearby points!"
            return np.zeros((n_points, 1)), np.ones(
                (n_points, 1)) * self.variance

        # if self.xvals is not None:
        #     print "Size of kernel array:", self.xvals
        # if self.xwait is not None:
        #     print "Size of wait array:", self.xwait.shape
        # if xpoints is not None:
        #     print "Size of returned points:", xpoints.shape

        Kx = self.kern.K(xpoints, xvals)
        K = self.kern.K(xpoints, xpoints)

        # Adds some additional noise to ensure well-conditioned
        Ky = K.copy()
        diag.add(Ky, self.noise + 1e-8)

        Wi, LW, LWi, W_logdet = pdinv(Ky)
        woodbury_inv = Wi
        woodbury_vector = np.dot(woodbury_inv, zpoints)

        mu = np.dot(Kx.T, woodbury_vector)
        if len(mu.shape) == 1:
            mu = mu.reshape(-1, 1)
        if full_cov:
            Kxx = self.kern.K(xvals)
            if self.woodbury_inv.ndim == 2:
                var = Kxx - np.dot(Kx.T, np.dot(woodbury_inv, Kx))
        else:
            Kxx = self.kern.Kdiag(xvals)
            var = (Kxx - np.sum(np.dot(woodbury_inv.T, Kx) * Kx, 0))[:, None]

        # If model noise should be included in the prediction
        if include_noise:
            var += self.noise

        update_legacy = False
        if update_legacy:
            # With no observations, predict 0 mean everywhere and prior variance
            if self.model == None:
                mean, variance = np.zeros((n_points, 1)), np.ones(
                    (n_points, 1)) * self.variance

            # Else, return the predicted values
            mean, variance = self.model.predict(
                xvals, full_cov=False, include_likelihood=include_noise)
            if xvals.shape[0] < 10:
                # print "-------- MEAN ------------"
                # print "spatial method:"
                # print mu
                # print "default method:"
                # print mean
                # print "-------- VARIANCE ------------"
                # print "spatial method:"
                # print var
                # print "default method:"
                # print variance

                print np.sum(mu - mean)
                print np.sum(var - variance)

        return mu, var

Example #12

Show file

    def reset_epoch(self):

        # update kernel with new hyperparams
        self.kern.lengthscale = self.params['ls'].copy()
        self.kern.variance = self.params['σ0']**2

        σ_n2 = self.params['σn']**2
        Z = self.params['R']

        # initialize all prior quantities
        self.n = np.zeros(
            self.num_inducing)  # natural mean vector (num_output = 1!)
        self.P = self.kern.K(Z)  # covariance matrix
        diag.add(self.P, self.const_jitter)
        L_P = jitchol(self.P)
        self.C, _ = dpotri(L_P, lower=1)  # precision matrix
        self._log_marginal_likelihood = 0.0  # log marginal likelihood
        self._log_Det_C = -2 * sum(np.log(
            np.diag(L_P)))  # log determinant of C

        self.Krr = self.P
        self.iKrr = self.C

        # derivative quantities
        J = self.num_inducing  # number of inducing points
        JD = self.num_inducing * self.kern.input_dim  # number of inducing points times dimension
        if self.params_EST['R']:
            self.dn_dR = np.zeros(
                (J, JD)
            )  # derivative of natural mean wrt inducing inputs (Rjd: R11,...,R1D, R21,...,RJD)
            self.dC_dR = np.zeros(
                (J, J, JD)
            )  # derivative of precision matrix wrt inducing inputs (Rjd: R11,...,R1D, R21,...,RJD)
            self.dψ_dR = np.zeros(
                (J, self.kern.input_dim))  # gradients of inducing inputs

            dKrr_sparse = self.kern.dK_dX(Z)
            for j in range(0, self.num_inducing):
                for d in range(0, self.kern.input_dim):

                    jd = j * self.kern.input_dim + d
                    self.dC_dR[:, :, jd] = -np.outer(
                        np.dot(self.C, dKrr_sparse[:, j, d]), self.C[:, j])
                    self.dC_dR[:, :,
                               jd] = self.dC_dR[:, :, jd] + self.dC_dR[:, :,
                                                                       jd].T
        else:
            self.dψ_dR = 0.0
            self.dn_dR = 0.0
            self.dC_dR = 0.0

        dKrr_dσ02 = self.kern.dK_dσ02(Z)
        self.dn_dσ02 = np.zeros(J)
        self.dC_dσ02 = -np.dot(np.dot(self.C, dKrr_dσ02), self.C)
        self.dψ_dσ02 = 0.0

        dKrr_dl = self.kern.dK_dl(Z)
        num_lengthscales = dKrr_dl.shape[2]
        self.dn_dl = np.zeros((J, num_lengthscales))
        self.dC_dl = np.zeros((J, J, num_lengthscales))
        self.dψ_dl = np.zeros(num_lengthscales)
        for d in range(0, num_lengthscales):
            self.dC_dl[:, :,
                       d] = -np.dot(np.dot(self.C, dKrr_dl[:, :, d]), self.C)

        self.dn_dσn2 = np.zeros(J)
        self.dC_dσn2 = np.zeros((J, J))
        self.dψ_dσn2 = 0.0

Example #13

Show file

    def inference(self, n0, C0, P0, log_marginal_likelihood0, log_Det_C0,
                  dn_dR, dC_dR, dψ_dR, dn_dσ02, dC_dσ02, dψ_dσ02, dn_dl, dC_dl,
                  dψ_dl, dn_dσn2, dC_dσn2, dψ_dσn2, X, Y):

        α = self.α
        α_const = (1 - α) / α

        num_data, _ = Y.shape
        num_inducing = n0.shape[0]  # it only works with num_outputs = 1

        y = Y[:, 0]  # it only works with num_outputs = 1

        # update kernel with new hyperparams
        self.kern.lengthscale = self.params['ls'].copy()
        self.kern.variance = self.params['σ0']**2

        σ_n2 = self.params['σn']**2
        Z = self.params['R']

        # compute kernel quantities
        Krr = self.kern.K(Z)  # kernel matrix of inducing inputs
        diag.add(Krr,
                 self.const_jitter)  # add some jitter for stability reasons
        Kxr = self.kern.K(
            X, Z)  # kernel matrix between mini-batch and inducing inputs
        kxx = self.kern.Kdiag(
            X
        )  #+const_jitter            # diagonal of kernel matrix auf mini-batch
        L_K = jitchol(Krr)  # lower cholesky matrix of kernel matrix
        iKrr, _ = dpotri(L_K)  # inverse of kernel matrix of inducinv inputs

        self.Krr = Krr
        self.iKrr = iKrr

        # compute state space matrices (and temporary matrices)
        H = np.dot(Kxr, iKrr)  # observation matrix
        Ht = H.T  # transpose of observation matrix
        d = kxx - np.sum(H * Kxr, 1)  # diagonal of correction matrix
        v = α * d + σ_n2  # diagonal of actual noise matrix
        a = α_const * (np.sum(np.log(v)) - num_data * np.log(σ_n2)
                       )  # PEP correction term in marignal likelihoo

        A_ = Ht / v
        α_ = np.dot(P0, n0)

        r = y - np.dot(H, α_)

        # update natural mean and precision + inversion yielding covariance matrix
        # n1 = ns + np.dot(A_,y)
        # C1 = Cs + np.dot(A_,H)

        n1 = n0 + np.dot(A_, y)
        C1 = C0 + np.dot(A_, H)
        L_C = jitchol(C1)
        P1, _ = dpotri(L_C)

        # more temporary matrices
        B_ = np.dot(H, P1)  # iV * H * Li'     # LAPACK?
        β_ = r / v
        γ_ = np.dot(B_.T, β_)
        δ_ = β_ - np.dot(A_.T, γ_)

        # update marginal log likelihood
        log_Det_C1 = 2 * sum(np.log(np.diag(L_C)))
        log_Ddet_V = sum(np.log(v))
        Δ0 = num_data * np.log(
            2 * np.pi) + log_Det_C1 - log_Det_C0 + log_Ddet_V + np.sum(
                r * δ_) + a
        log_marginal_likelihood1 = log_marginal_likelihood0 - 0.5 * Δ0

        # print('lik_i '+str(0.5*Δ0))

        # compute constant derivatives of likelihood wrt kernel matrices
        dL_dH = 2 * ((B_.T / v).T - np.outer(δ_, α_ + γ_))
        dL_dv = -(np.sum(H * B_, 1) - v / α + (r - np.dot(H, γ_))**2) / (v**2)

        D_ = α * (Ht * dL_dv).T
        E_ = np.dot(dL_dH, iKrr)

        dL_dKxr = E_ - 2 * D_
        dL_dKrr = -np.dot(Ht, E_ - D_)

        dL_dkxx = α * dL_dv

        dL_dn = -2 * np.dot(P0, np.dot(Ht, δ_))
        dL_dC = P1 - P0 - np.outer(dL_dn, α_) + np.outer(γ_, γ_)

        # dL_d_dn = 2*σ_n2 *sum(dL_dv) -2*num_data*α_const # wrt to dn
        dL_d_dn = sum(dL_dv) - num_data * α_const / σ_n2  # wrt to σn2

        iVy = y / v
        dH = np.zeros((num_data, num_inducing))

        scaleFact = 1  ###

        if self.params_EST['R']:
            # compute sparse kernel derivatives
            # dKrr_sparse = np.zeros((J,J,D))
            dKrr_sparse = self.kern.dK_dX(Z)  #, dK_dR=dKrr_sparse)
            # dKxr_sparse = np.zeros((B,J,D))
            dKxr_sparse = self.kern.dK_dX(X, Z)  #, dK_dR=dKxr_sparse)

            # loop over all inducing points
            for j in range(0, num_inducing):
                for d in range(0, self.D):

                    jd = j * self.D + d
                    kjd = dKrr_sparse[:, j, d]
                    k2jd = dKxr_sparse[:, j, d]

                    #dψ_dR[j,d] = dψ_dR[j,d] -0.5*( np.sum(dL_dKrr[:,j]*kjd) + np.sum(dL_dKrr[j,:]*kjd) + np.sum(dL_dKxr[:,j]*k2jd) + np.sum( dL_dn*dn_dR[:,jd]) + np.sum( dL_dC*dC_dR[:,:,jd]) )
                    ### dψ_dR[j,d] = dψ_dR[j,d] -0.5*( np.sum(dL_dkxx *dKxx_diag) +  dL_d_dn   )

                    delta = -0.5 * (np.sum(dL_dKrr[:, j] * kjd) + np.sum(
                        dL_dKrr[j, :] * kjd) + np.sum(dL_dKxr[:, j] * k2jd) +
                                    np.sum(dL_dn * dn_dR[:, jd]) +
                                    np.sum(dL_dC * dC_dR[:, :, jd]))
                    dψ_dR[j, d] = delta * scaleFact

                    dH = -np.outer(H[:, j], kjd)
                    dH[:, j] += -np.dot(H, kjd) + k2jd
                    dH = np.dot(dH, iKrr)

                    dd = -np.sum(dH * Kxr, 1
                                 ) - H[:, j] * k2jd  #### dKxx_diag for theta!!
                    div = -α * dd / (v**2)
                    dn_dR[:, jd] = dn_dR[:, jd] + np.dot(dH.T, iVy) + np.dot(
                        Ht, div * y)
                    F_ = np.dot(A_, dH)
                    dC_dR[:, :, jd] = dC_dR[:, :, jd] + F_ + F_.T + np.dot(
                        Ht * div, H)

        # compute kernel derivatives wrt variance_0
        dKrr_dσ02 = self.kern.dK_dσ02(Z)
        dKxr_dσ02 = self.kern.dK_dσ02(X, Z)
        dkxx_dσ02 = self.kern.dK_dσ02_diag(X)

        # dψ_dσ02 = dψ_dσ02 - 0.5*( np.sum(dL_dKrr*dKrr_dσ02) + np.sum(dL_dKxr*dKxr_dσ02) + np.sum( dL_dn*dn_dσ02) + np.sum( dL_dC*dC_dσ02) )
        # dψ_dσ02 = dψ_dσ02 - 0.5* np.sum(dL_dkxx *dkxx_dσ02)

        delta = -0.5 * (np.sum(dL_dKrr * dKrr_dσ02) +
                        np.sum(dL_dKxr * dKxr_dσ02) + np.sum(dL_dn * dn_dσ02) +
                        np.sum(dL_dC * dC_dσ02))
        delta = delta - 0.5 * np.sum(dL_dkxx * dkxx_dσ02)

        dψ_dσ02 = delta * scaleFact

        dH = dKxr_dσ02 - np.dot(H, dKrr_dσ02)
        dH = np.dot(dH, iKrr)

        dd = dkxx_dσ02 - np.sum(dH * Kxr, 1) - np.sum(H * dKxr_dσ02, 1)
        div = -α * dd / (v**2)
        dn_dσ02 = dn_dσ02 + np.dot(dH.T, iVy) + np.dot(Ht, div * y)
        F_ = np.dot(A_, dH)
        dC_dσ02 = dC_dσ02 + F_ + F_.T + np.dot(Ht * div, H)

        # compute kernel derivatives wrt lengthsacle(s)
        dKrr_dl = self.kern.dK_dl(Z)
        dKxr_dl = self.kern.dK_dl(X, Z)
        # dkxx_dl = kern.dK_dl_diag(X)   # zero anyway

        # loop over all lengthscales
        num_lengthscales = dKrr_dl.shape[2]
        for d in range(0, num_lengthscales):

            delta = -0.5 * (np.sum(dL_dKrr * dKrr_dl[:, :, d]) + np.sum(
                dL_dKxr * dKxr_dl[:, :, d]) + np.sum(dL_dn * dn_dl[:, d]) +
                            np.sum(dL_dC * dC_dl[:, :, d]))
            #############################

            dψ_dl[d] = delta * scaleFact
            dH = dKxr_dl[:, :, d] - np.dot(H, dKrr_dl[:, :, d])
            dH = np.dot(dH, iKrr)

            dd = -np.sum(dH * Kxr, 1) - np.sum(H * dKxr_dl[:, :, d], 1)
            div = -α * dd / (v**2)
            dn_dl[:, d] = dn_dl[:, d] + np.dot(dH.T, iVy) + np.dot(Ht, div * y)
            F_ = np.dot(A_, dH)
            dC_dl[:, :, d] = dC_dl[:, :, d] + F_ + F_.T + np.dot(Ht * div, H)

        # gaussian noise variance
        delta = -0.5 * (np.sum(dL_dn * dn_dσn2) + np.sum(dL_dC * dC_dσn2) +
                        dL_d_dn)
        # dψ_dσn2 = dψ_dσn2

        dψ_dσn2 = delta * scaleFact

        div = -1.0 / (v**2)
        dn_dσn2 = dn_dσn2 + np.dot(Ht, div * y)
        dC_dσn2 = dC_dσn2 + np.dot(Ht * div, H)

        m1 = np.dot(P1, n1)

        return log_marginal_likelihood1, n1, m1, C1, P1, log_Det_C1, dn_dR, dC_dR, dψ_dR, dn_dσ02, dC_dσ02, dψ_dσ02, dn_dl, dC_dl, dψ_dl, dn_dσn2, dC_dσn2, dψ_dσn2

Example #14

Show file

    def inference_root(self,
                       kern,
                       X,
                       Z,
                       likelihood,
                       Y,
                       Kuu_sigma=None,
                       Y_metadata=None,
                       Lm=None,
                       dL_dKmm=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]
        num_data_total = allReduceArrays([np.int32(num_data)],
                                         self.mpi_comm)[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(
            kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        try:
            Kmm = kern.K(Z).copy()
            if Kuu_sigma is not None:
                diag.add(Kmm, Kuu_sigma)
            else:
                diag.add(Kmm, self.const_jitter)
            Lm = jitchol(Kmm)

            LmInv = dtrtri(Lm)
            LmInvPsi2LmInvT = LmInv.dot(psi2.dot(LmInv.T))

            Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
            LL = jitchol(Lambda)
            LLInv = dtrtri(LL)
            flag = np.zeros((1, ), dtype=np.int32)
            self.mpi_comm.Bcast(flag, root=self.root)
        except LinAlgError as e:
            flag = np.ones((1, ), dtype=np.int32)
            self.mpi_comm.Bcast(flag, root=self.root)
            raise e

        broadcastArrays([LmInv, LLInv], self.mpi_comm, self.root)
        LmLLInv = LLInv.dot(LmInv)

        logdet_L = 2. * np.sum(np.log(np.diag(LL)))
        b = psi1Y.dot(LmLLInv.T)
        bbt = np.square(b).sum()
        v = b.dot(LmLLInv)
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)

        if psi1S is not None:
            psi1SLLinv = psi1S.dot(LmLLInv.T)
            bbt_sum = np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT_sum = tdot(psi1SLLinv.T)
            bbt_sum, LLinvPsi1TYYTPsi1LLinvT_sum = reduceArrays(
                [bbt_sum, LLinvPsi1TYYTPsi1LLinvT_sum], self.mpi_comm,
                self.root)
            bbt += bbt_sum
            LLinvPsi1TYYTPsi1LLinvT += LLinvPsi1TYYTPsi1LLinvT_sum
            psi1SP = psi1SLLinv.dot(LmLLInv)
        tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT +
                           output_dim * np.eye(input_dim)).dot(LLInv)
        dL_dpsi2R = LmInv.T.dot(tmp +
                                output_dim * np.eye(input_dim)).dot(LmInv) / 2.
        broadcastArrays([dL_dpsi2R], self.mpi_comm, self.root)

        #======================================================================
        # Compute log-likelihood
        #======================================================================
        logL_R = -num_data_total * np.log(beta)
        logL = -(output_dim * (num_data_total * log_2_pi + logL_R + psi0 -
                               np.trace(LmInvPsi2LmInvT)) + YRY -
                 bbt) / 2. - output_dim * logdet_L / 2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm = dL_dpsi2R - output_dim * LmInv.T.dot(LmInvPsi2LmInvT).dot(
            LmInv) / 2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        wd_inv = backsub_both_sides(
            Lm,
            np.eye(input_dim) -
            backsub_both_sides(LL, np.identity(input_dim), transpose='left'),
            transpose='left')
        post = Posterior(woodbury_inv=wd_inv,
                         woodbury_vector=v.T,
                         K=Kmm,
                         mean=None,
                         cov=None,
                         K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = (YRY * beta + beta * output_dim * psi0 - num_data_total *
                      output_dim * beta) / 2. - beta * (dL_dpsi2R * psi2).sum(
                      ) - beta * np.trace(LLinvPsi1TYYTPsi1LLinvT)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data, ))) / 2.

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            dL_dpsi1 = beta * (np.dot(m, v) + Shalf[:, None] * psi1SP)
        else:
            dL_dpsi1 = beta * np.dot(Y, v)

        if uncertain_inputs:
            dL_dpsi2 = beta * dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1, dL_dpsi2R) * 2.
            dL_dpsi2 = None

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            psi1LmiLLi = psi1.dot(LmLLInv.T)
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m * beta + psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta / -2. + np.square(psi1LmiLLi).sum(
                axis=1) / 2

        return post, logL, grad_dict

Example #15

Show file

File: svi_vardtc.py Project: zhenwendai/DeepGP

    def inference(self, kern, X, Z, likelihood, Y, qU_mean ,qU_var, Kuu_sigma=None):
        """
        The SVI-VarDTC inference
        """

        N, D, M, Q = Y.shape[0], Y.shape[1], Z.shape[0], Z.shape[1]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1./likelihood.variance

        psi0, psi2, YRY, psi1, psi1Y = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
        
        #======================================================================
        # Compute Common Components
        #======================================================================

        Kuu = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kuu, Kuu_sigma)
        else:
            diag.add(Kuu, self.const_jitter)
        Lm = jitchol(Kuu)
        
        mu, S = qU_mean, qU_var
        Ls = jitchol(S)
        LinvLs = dtrtrs(Lm, Ls)[0]
        Linvmu = dtrtrs(Lm, mu)[0]
        psi1YLinvT = dtrtrs(Lm,psi1Y.T)[0].T
        
        self.mid = {
                    'qU_L': Ls,
                    'LinvLu': LinvLs,
                    'L':Lm,
                    'Linvmu': Linvmu}
        
        if uncertain_inputs:
            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
        else:
            LmInvPsi2LmInvT = tdot(dtrtrs(Lm, psi1.T)[0])/beta 
        
        LmInvSmuLmInvT = tdot(LinvLs)*D+tdot(Linvmu)
        
#         logdet_L = np.sum(np.log(np.diag(Lm)))
#         logdet_S = np.sum(np.log(np.diag(Ls)))
        
        #======================================================================
        # Compute log-likelihood
        #======================================================================
        
        logL_R = -N*np.log(beta)
        logL = -N*D*log_2_pi/2. -D*logL_R/2. - D*psi0/2. - YRY/2.  \
                     -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. + np.trace(LmInvPsi2LmInvT)*D/2.+(Linvmu*psi1YLinvT.T).sum()
                
        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        tmp1 = backsub_both_sides(Lm,LmInvSmuLmInvT.dot(LmInvPsi2LmInvT), 'left')
        tmp2 = Linvmu.dot(psi1YLinvT)
        tmp3 = backsub_both_sides(Lm,  - D*LmInvPsi2LmInvT  -tmp2-tmp2.T, 'left')/2.

        dL_dKmm = (tmp1+tmp1.T)/2. + tmp3

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = -D*N*beta/2. -(- D*psi0/2. - YRY/2.-(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. + np.trace(LmInvPsi2LmInvT)*D/2.+(Linvmu*psi1YLinvT.T).sum())*beta
        
        #======================================================================
        # Compute dL_dqU
        #======================================================================
        
        tmp1 = backsub_both_sides(Lm, - LmInvPsi2LmInvT, 'left')
        dL_dqU_mean = tmp1.dot(mu) + dtrtrs(Lm, psi1YLinvT.T,trans=1)[0]
        dL_dqU_var = D/2.*tmp1
        
        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        KuuInvmu = dtrtrs(Lm, Linvmu, trans=1)[0]
        tmp = backsub_both_sides(Lm,  np.eye(M) - tdot(LinvLs), 'left')

        post = Posterior(woodbury_inv=tmp, woodbury_vector=KuuInvmu, K=Kuu, mean=mu, cov=S, K_chol=Lm)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -D * (beta * np.ones((N,)))/2.

        if uncertain_outputs:
            dL_dpsi1 = Y.mean.dot(dtrtrs(Lm,Linvmu,trans=1)[0].T)*beta
        else:
            dL_dpsi1 = Y.dot(dtrtrs(Lm,Linvmu,trans=1)[0].T)*beta

        dL_dpsi2 = beta*backsub_both_sides(Lm, D*np.eye(M)-LmInvSmuLmInvT, 'left')/2.
        if not uncertain_inputs:
            dL_dpsi1 += psi1.dot(dL_dpsi2+dL_dpsi2.T)/beta
            dL_dpsi2 = None
            
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL,
                         'dL_dqU_mean':dL_dqU_mean,
                         'dL_dqU_var':dL_dqU_var}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL,
                         'dL_dqU_mean':dL_dqU_mean,
                         'dL_dqU_var':dL_dqU_var}

        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            grad_dict['dL_dYmean'] = -m*beta+ dtrtrs(Lm,psi1.T)[0].T.dot(dtrtrs(Lm,mu)[0])
            grad_dict['dL_dYvar'] = beta/-2.

        return post, logL, grad_dict

Example #16

Show file

File: svi_ratio.py Project: zhenwendai/DeepGP

    def inference(self, kern, X, Z, likelihood, Y, qU):
        """
        The SVI-VarDTC inference
        """

        if isinstance(Y, np.ndarray) and np.any(np.isnan(Y)):
            missing_data = True
            N, M, Q = Y.shape[0], Z.shape[0], Z.shape[1]
            Ds = Y.shape[1] - (np.isnan(Y)*1).sum(1)
            Ymask = 1-np.isnan(Y)*1
            Y_masked = np.zeros_like(Y)
            Y_masked[Ymask==1] = Y[Ymask==1]
            ND = Ymask.sum()
        else:
            missing_data = False
            N, D, M, Q = Y.shape[0], Y.shape[1], Z.shape[0], Z.shape[1]
            ND = N*D

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1./np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y = self.gatherPsiStat(kern, X, Z, Y if not missing_data else Y_masked, beta, uncertain_inputs, D if not missing_data else Ds, missing_data)
        
        #======================================================================
        # Compute Common Components
        #======================================================================
        
        mu, S = qU.mean, qU.covariance
        mupsi1Y = mu.dot(psi1Y)

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)
        
        if missing_data:
            S_mu = S[None,:,:]+mu.T[:,:,None]*mu.T[:,None,:]
            NS_mu = S_mu.T.dot(Ymask.T).T
            LmInv = dtrtri(Lm)
            
            LmInvPsi2LmInvT = np.swapaxes(psi2.dot(LmInv.T),1,2).dot(LmInv.T)            
            LmInvSmuLmInvT =  np.swapaxes(NS_mu.dot(LmInv.T),1,2).dot(LmInv.T)
            
            B = mupsi1Y+ mupsi1Y.T +(Ds[:,None,None]*psi2).sum(0)
            tmp = backsub_both_sides(Lm, B,'right')
            
            logL =  -ND*log_2_pi/2. +ND*np.log(beta)/2. - psi0/2. - YRY/2.  \
                       -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. +np.trace(tmp)/2.
        else:
            S_mu = S*D+tdot(mu)
            if uncertain_inputs:
                LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
            else:
                LmInvPsi2LmInvT = tdot(dtrtrs(Lm, psi1.T)[0])/beta #tdot(psi1.dot(LmInv.T).T) /beta        
            LmInvSmuLmInvT = backsub_both_sides(Lm, S_mu, 'right')
            
            B = mupsi1Y+ mupsi1Y.T +D*psi2
            tmp = backsub_both_sides(Lm, B,'right')
            
            logL =  -ND*log_2_pi/2. +ND*np.log(beta)/2. - psi0/2. - YRY/2.  \
                       -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. +np.trace(tmp)/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm = np.eye(M)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = None #(YRY*beta + beta*output_dim*psi0 - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        if missing_data:
            dL_dpsi0 = -Ds * (beta * np.ones((N,)))/2.
        else:
            dL_dpsi0 = -D * (beta * np.ones((N,)))/2.

        if uncertain_outputs:
            Ym,Ys = Y.mean, Y.variance
            dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm, Ym.dot(mu.T).T)[0], trans=1)[0].T*beta
        else:
            if missing_data:
                dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm, (Y_masked).dot(mu.T).T)[0], trans=1)[0].T*beta
            else:
                dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm, Y.dot(mu.T).T)[0], trans=1)[0].T*beta

        if uncertain_inputs:
            if missing_data:
                dL_dpsi2 = np.swapaxes((Ds[:,None,None]*np.eye(M)[None,:,:]-LmInvSmuLmInvT).dot(LmInv),1,2).dot(LmInv)*beta/2.
            else:
                dL_dpsi2 = beta*backsub_both_sides(Lm, D*np.eye(M)-LmInvSmuLmInvT, 'left')/2.
        else:
            dL_dpsi1 += beta*psi1.dot(dL_dpsi2+dL_dpsi2.T) 
            dL_dpsi2 = None
            
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}

        if uncertain_outputs:
            Ym = Y.mean
            grad_dict['dL_dYmean'] = -Ym*beta+ dtrtrs(Lm,psi1.T)[0].T.dot(dtrtrs(Lm,mu)[0])
            grad_dict['dL_dYvar'] = beta/-2.

        return logL, grad_dict

Example #17

Show file

    def inference(self,
                  kern,
                  X,
                  Z,
                  likelihood,
                  Y,
                  indexD,
                  output_dim,
                  Y_metadata=None,
                  Lm=None,
                  dL_dKmm=None,
                  Kuu_sigma=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        input_dim = Z.shape[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)

        beta = 1. / likelihood.variance
        if len(beta) == 1:
            beta = np.zeros(output_dim) + beta

        beta_exp = np.zeros(indexD.shape[0])
        for d in range(output_dim):
            beta_exp[indexD == d] = beta[d]

        psi0, psi1, psi2 = self.gatherPsiStat(kern, X, Z, Y, beta,
                                              uncertain_inputs)

        psi2_sum = (beta_exp[:, None, None] * psi2).sum(0) / output_dim

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kmm = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kmm, Kuu_sigma)
        else:
            diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        logL = 0.
        dL_dthetaL = np.zeros(output_dim)
        dL_dKmm = np.zeros_like(Kmm)
        dL_dpsi0 = np.zeros_like(psi0)
        dL_dpsi1 = np.zeros_like(psi1)
        dL_dpsi2 = np.zeros_like(psi2)
        wv = np.empty((Kmm.shape[0], output_dim))

        for d in range(output_dim):
            idx_d = indexD == d
            Y_d = Y[idx_d]
            N_d = Y_d.shape[0]
            beta_d = beta[d]

            psi2_d = psi2[idx_d].sum(0) * beta_d
            psi1Y = Y_d.T.dot(psi1[idx_d]) * beta_d
            psi0_d = psi0[idx_d].sum() * beta_d
            YRY_d = np.square(Y_d).sum() * beta_d

            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_d, 'right')

            Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
            LL = jitchol(Lambda)
            LmLL = Lm.dot(LL)

            b = dtrtrs(LmLL, psi1Y.T)[0].T
            bbt = np.square(b).sum()
            v = dtrtrs(LmLL, b.T, trans=1)[0].T
            LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)

            tmp = -backsub_both_sides(LL, LLinvPsi1TYYTPsi1LLinvT)
            dL_dpsi2R = backsub_both_sides(Lm, tmp + np.eye(input_dim)) / 2

            logL_R = -N_d * np.log(beta_d)
            logL += -((N_d * log_2_pi + logL_R + psi0_d -
                       np.trace(LmInvPsi2LmInvT)) + YRY_d - bbt) / 2.

            dL_dKmm += dL_dpsi2R - backsub_both_sides(Lm, LmInvPsi2LmInvT) / 2

            dL_dthetaL[d:d +
                       1] = (YRY_d * beta_d + beta_d * psi0_d - N_d *
                             beta_d) / 2. - beta_d * (dL_dpsi2R * psi2_d).sum(
                             ) - beta_d * np.trace(LLinvPsi1TYYTPsi1LLinvT)

            dL_dpsi0[idx_d] = -beta_d / 2.
            dL_dpsi1[idx_d] = beta_d * np.dot(Y_d, v)
            dL_dpsi2[idx_d] = beta_d * dL_dpsi2R
            wv[:, d] = v

        LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_sum, 'right')

        Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
        LL = jitchol(Lambda)
        LmLL = Lm.dot(LL)
        logdet_L = 2. * np.sum(np.log(np.diag(LL)))
        dL_dpsi2R_common = dpotri(LmLL)[0] / -2.
        dL_dpsi2 += dL_dpsi2R_common[None, :, :] * beta_exp[:, None, None]

        for d in range(output_dim):
            dL_dthetaL[d] += (dL_dpsi2R_common * psi2[indexD == d].sum(0)
                              ).sum() * -beta[d] * beta[d]

        dL_dKmm += dL_dpsi2R_common * output_dim

        logL += -output_dim * logdet_L / 2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        # dL_dKmm =  dL_dpsi2R - output_dim* backsub_both_sides(Lm, LmInvPsi2LmInvT)/2 #LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        LLInvLmT = dtrtrs(LL, Lm.T)[0]
        cov = tdot(LLInvLmT.T)

        wd_inv = backsub_both_sides(
            Lm,
            np.eye(input_dim) -
            backsub_both_sides(LL, np.identity(input_dim), transpose='left'),
            transpose='left')
        post = Posterior(woodbury_inv=wd_inv,
                         woodbury_vector=wv,
                         K=Kmm,
                         mean=None,
                         cov=cov,
                         K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        # for d in range(output_dim):
        #     dL_dthetaL[d:d+1] += - beta[d]*beta[d]*(dL_dpsi2R[None,:,:] * psi2[indexD==d]/output_dim).sum()
        # dL_dthetaL += - (dL_dpsi2R[None,:,:] * psi2_sum*D beta*(dL_dpsi2R*psi2).sum()

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        if not uncertain_inputs:
            dL_dpsi1 += (psi1[:, None, :] * dL_dpsi2).sum(2) * 2.

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        return post, logL, grad_dict

Example #18

Show file

File: vardtc.py Project: zhenwendai/DeepGP

    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, Kuu_sigma=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """


        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        # from ..models.sslvm import Gaussian_Gamma
        # if isinstance(likelihood, Gaussian_Gamma):
        #     beta = likelihood.expectation_beta()
        #     logL_R = -num_data*likelihood.expectation_logbeta()
        # else:
        beta = 1./np.fmax(likelihood.variance, 1e-6)
        logL_R = -num_data*np.log(beta)


        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kmm = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kmm, Kuu_sigma)
        else:
            diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        #LmInv = dtrtri(Lm)
        if uncertain_inputs:
            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
        else:
            LmInvPsi2LmInvT = tdot(dtrtrs(Lm, psi1.T)[0])/beta #tdot(psi1.dot(LmInv.T).T) /beta
            
        Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
        LL = jitchol(Lambda)
        LmLL = Lm.dot(LL)
#        LLInv = dtrtri(LL)
 #       LmLLInv = LLInv.dot(LmInv)
        
        logdet_L = 2.*np.sum(np.log(np.diag(LL)))
        b  = dtrtrs(LmLL, psi1Y.T)[0].T #psi1Y.dot(LmLLInv.T)
        bbt = np.square(b).sum()
        v = dtrtrs(LmLL, b.T, trans=1)[0].T #b.dot(LmLLInv)
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
        
        if psi1S is not None:
            psi1SLLinv = dtrtrs(LmLL, psi1S.T)[0].T #psi1S.dot(LmLLInv.T)
            bbt += np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT += tdot(psi1SLLinv.T)
            psi1SP = dtrtrs(LmLL, psi1SLLinv.T, trans=1)[0].T #psi1SLLinv.dot(LmLLInv)
        tmp = -backsub_both_sides(LL, LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim))
        dL_dpsi2R = backsub_both_sides(Lm, tmp+output_dim*np.eye(input_dim))/2
        #tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
        #dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
        
        #======================================================================
        # Compute log-likelihood
        #======================================================================
        
        logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0-np.trace(LmInvPsi2LmInvT))+YRY- bbt)/2.-output_dim*logdet_L/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm =  dL_dpsi2R - output_dim* backsub_both_sides(Lm, LmInvPsi2LmInvT)/2 #LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        LLInvLmT = dtrtrs(LL, Lm.T)[0]
        cov = tdot(LLInvLmT.T)

        wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
        post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v.T, K=Kmm, mean=None, cov=cov, K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        # if isinstance(likelihood, Gaussian_Gamma):
        #     from scipy.special import polygamma
        #     dL_dthetaL = ((YRY + output_dim*psi0)/2. - (dL_dpsi2R*psi2).sum() - np.trace(LLinvPsi1TYYTPsi1LLinvT))/-beta
        #     likelihood.q_a.gradient = num_data*output_dim/2.*polygamma(1, likelihood.q_a) + dL_dthetaL/likelihood.q_b
        #     likelihood.q_b.gradient = num_data*output_dim/(-2.*likelihood.q_b) +dL_dthetaL*(-likelihood.q_a/(likelihood.q_b*likelihood.q_b))
        # else:
        dL_dthetaL = (YRY*beta + beta*output_dim*psi0 - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data,)))/2.

        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            dL_dpsi1 = beta*(np.dot(m,v)+Shalf[:,None]*psi1SP)
        else:
            dL_dpsi1 = beta*np.dot(Y,v)

        if uncertain_inputs:
            dL_dpsi2 = beta* dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1,dL_dpsi2R)*2.
            dL_dpsi2 = None
        
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}
            
        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            psi1LmiLLi = dtrtrs(LmLL, psi1.T)[0].T 
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m*beta+ psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta/-2.+ np.square(psi1LmiLLi).sum(axis=1)/2

        return post, logL, grad_dict

Example #19

Show file

File: vardtc_parallel.py Project: zhenwendai/DeepGP

    def inference_root(self, kern, X, Z, likelihood, Y, Kuu_sigma=None, Y_metadata=None, Lm=None, dL_dKmm=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]
        num_data_total = allReduceArrays([np.int32(num_data)], self.mpi_comm)[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1./np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        try:
            Kmm = kern.K(Z).copy()
            if Kuu_sigma is not None:
                diag.add(Kmm, Kuu_sigma)
            else:
                diag.add(Kmm, self.const_jitter)
            Lm = jitchol(Kmm)
    
            LmInv = dtrtri(Lm)
            LmInvPsi2LmInvT = LmInv.dot(psi2.dot(LmInv.T))
                
            Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
            LL = jitchol(Lambda)        
            LLInv = dtrtri(LL)
            flag = np.zeros((1,),dtype=np.int32)
            self.mpi_comm.Bcast(flag,root=self.root)
        except LinAlgError as e:
            flag = np.ones((1,),dtype=np.int32)
            self.mpi_comm.Bcast(flag,root=self.root)
            raise e
            
        broadcastArrays([LmInv, LLInv],self.mpi_comm,  self.root)
        LmLLInv = LLInv.dot(LmInv)
        
        logdet_L = 2.*np.sum(np.log(np.diag(LL)))
        b  = psi1Y.dot(LmLLInv.T)
        bbt = np.square(b).sum()
        v = b.dot(LmLLInv)
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
        
        if psi1S is not None:
            psi1SLLinv = psi1S.dot(LmLLInv.T)
            bbt_sum = np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT_sum = tdot(psi1SLLinv.T)
            bbt_sum, LLinvPsi1TYYTPsi1LLinvT_sum = reduceArrays([bbt_sum,  LLinvPsi1TYYTPsi1LLinvT_sum], self.mpi_comm, self.root)
            bbt += bbt_sum
            LLinvPsi1TYYTPsi1LLinvT += LLinvPsi1TYYTPsi1LLinvT_sum
            psi1SP = psi1SLLinv.dot(LmLLInv)
        tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
        dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
        broadcastArrays([dL_dpsi2R], self.mpi_comm, self.root)

        #======================================================================
        # Compute log-likelihood
        #======================================================================
        logL_R = -num_data_total*np.log(beta)
        logL = -(output_dim*(num_data_total*log_2_pi+logL_R+psi0-np.trace(LmInvPsi2LmInvT))+YRY- bbt)/2.-output_dim*logdet_L/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm =  dL_dpsi2R - output_dim* LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
        post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v.T, K=Kmm, mean=None, cov=None, K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = (YRY*beta + beta*output_dim*psi0 - num_data_total*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data,)))/2.

        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            dL_dpsi1 = beta*(np.dot(m,v)+Shalf[:,None]*psi1SP)
        else:
            dL_dpsi1 = beta*np.dot(Y,v)

        if uncertain_inputs:
            dL_dpsi2 = beta* dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1,dL_dpsi2R)*2.
            dL_dpsi2 = None
        
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}
            
        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            psi1LmiLLi = psi1.dot(LmLLInv.T)
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m*beta+ psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta/-2.+ np.square(psi1LmiLLi).sum(axis=1)/2

        return post, logL, grad_dict