コード例 #1
0
    def kl_divergences(self, q_U, p_U, dims):
        """
        Description:  Returns the sum of KL divergences
        Equation:     \sum_q KL[q(u_q)|| p(u_q)]
        Paper:        In Section 2.2.2 / Variational Bounds and Appendix 1
        """
        Q = dims['Q']
        M = dims['M']

        #------------------------------------------#     ALGEBRA       #-----------------------------------------------#
        #######  Algebra for q(u)  #######
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        #######  Algebra for p(u)  #######
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()

        #----------------------------#     KL DIVERGENCE BETWEEN TWO GAUSSIANS     #-----------------------------------#
        KL = 0
        for q in range(Q):
            KL += 0.5 * np.sum(Kuui[q, :, :] * S_u[q, :, :]) \
                  + 0.5 * np.dot(m_u[:, q, None].T, np.dot(Kuui[q, :, :], m_u[:, q, None])) \
                  - 0.5 * M \
                  + 0.5 * 2. * np.sum(np.log(np.abs(np.diag(Luu[q, :, :])))) \
                  - 0.5 * 2. * np.sum(np.log(np.abs(np.diag(L_u[q, :, :]))))
        return KL
コード例 #2
0
    def calculate_KL(self, q_U, p_U, M, J):
        """
        Calculates the KL divergence (see KL-div for multivariate normals)
        Equation: \sum_Q KL{q(uq)|p(uq)}
        """
        # Algebra for q(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())

        S_u = np.empty((J, M, M))
        [np.dot(L_u[j, :, :], L_u[j, :, :].T, S_u[j, :, :]) for j in range(J)]

        # Algebra for p(u):
        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()

        KL = 0
        for j in range(J):
            KL += 0.5 * np.sum(Kuui[j, :, :] * S_u[j, :, :]) \
                  + 0.5 * np.dot(m_u[:, j, None].T,np.dot(Kuui[j,:,:],m_u[:, j, None])) \
                  - 0.5 * M \
                  + 0.5 * 2. * np.sum(np.log(np.abs(np.diag(Luu[j, :, :])))) \
                  - 0.5 * 2. * np.sum(np.log(np.abs(np.diag(L_u[j, :, :]))))
        return KL
コード例 #3
0
    def natural_grad_qu(model, n_iter=1, step_size=step_rate, momentum=0.0):
        global mk_ant, mk_aux, mk, V_i, Vk, Lk, Vk, Vki_ant
        """"Initialize the step-sizes""" ""
        beta2_k = step_size  #use step_size*0.1 for Convolutional MOGP
        gamma2_k = momentum
        alpha2_k = step_size
        N_posteriors = model.q_u_means.shape[1]

        if n_iter == 1:
            V_i = choleskies.multiple_dpotri(
                choleskies.flat_to_triang(model.q_u_chols.values)).copy()
            Vk = np.zeros_like(V_i)
            for i in range(N_posteriors):
                Vk[i, :, :] = 0.5 * (model.posteriors[i].covariance.copy() +
                                     model.posteriors[i].covariance.T.copy())

            Lk = np.zeros_like(Vk)
            mk = model.q_u_means.values.copy()

            Vki_ant = V_i.copy()
            mk_aux = mk.copy()

        dL_dm, dL_dV = compute_stoch_grads_for_qu_HetMOGP(model=model)

        mk_ant = mk_aux.copy()
        mk_aux = mk.copy()

        if not model.q_u_means.is_fixed and not model.q_u_chols.is_fixed:
            mk_ant = mk_aux.copy()
            mk_aux = mk.copy()

            for i in range(N_posteriors):
                try:
                    V_i[i, :, :] = V_i[i, :, :] + 2 * beta2_k * dL_dV[
                        i]  #+ 1.0e-6*np.eye(*Vk[i,:,:].shape)
                    Vk[i, :, :] = np.linalg.inv(V_i[i, :, :])
                    Vk[i, :, :] = 0.5 * (np.array(Vk[i, :, :]) +
                                         np.array(Vk[i, :, :].T))
                    Lk[i, :, :] = np.linalg.cholesky(Vk[i, :, :])
                    mk[:, i] = mk[:, i] - alpha2_k * np.dot(
                        Vk[i, :, :], dL_dm[i]) + gamma2_k * np.dot(
                            np.dot(Vk[i, :, :], Vki_ant[i, :, :]),
                            (mk[:, i] - mk_ant[:, i]))
                except LinAlgError:
                    print("Overflow")
                    Vk[i, :, :] = np.linalg.inv(V_i[i, :, :])
                    Vk[i, :, :] = 1.0e-1 * np.eye(
                        *Vk[i, :, :].shape
                    )  #nearestPD(Vk[i,:,:]) # + 1.0e-3*np.eye(*Vk[i,:,:].shape)
                    Lk[i, :, :] = linalg.jitchol(Vk[i, :, :])
                    V_i[i, :, :] = np.linalg.inv(Vk[i, :, :])
                    mk[:, i] = mk[:, i] * 0.0

            Vki_ant = V_i.copy()

            model.L_u.setfield(choleskies.triang_to_flat(Lk.copy()),
                               np.float64)
            model.m_u.setfield(mk.copy(), np.float64)
コード例 #4
0
    def calculate_q_f(self, X, Z, q_U, p_U, kern_list, kern_list_Gdj, kern_aux,
                      B, M, N, j):
        """
        Calculates the mean and variance of q(f_d) as
        Equation: E_q(U)\{p(f_d|U)\}
        """
        # Algebra for q(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        #S_u = np.empty((M, M))
        S_u = np.dot(L_u[j, :, :], L_u[j, :, :].T) + 1e-6 * np.eye(M)
        #        [np.dot(L_u[j, :, :], L_u[j, :, :].T, S_u[j, :, :]) for j in range(J)]
        #for j in range(J): S_u[j,:,:] = S_u[j,:,:] + 1e-6*np.eye(M)

        # Algebra for p(f_d|u):
        #Kfdu = util.conv_cross_covariance_full(X, Z, B, kern_list, kern_list_Gdj, kern_aux,j)
        #Kff = util.conv_function_covariance(X, B, kern_list, kern_list_Gdj, kern_aux,j)
        Kff, Kfdu = util.both_convoled_Kff_and_Kfu_full(
            X, Z, B, kern_list, kern_list_Gdj, kern_aux, j)

        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()
        Kff_diag = np.diag(Kff)

        # Algebra for q(f_d) = E_{q(u)}[p(f_d|u)]
        #Afdu = np.empty((N, M)) #Afdu = K_{fduq}Ki_{uquq}
        m_fd = np.zeros((N, 1))
        v_fd = np.zeros((N, 1))
        S_fd = np.zeros((N, N))
        v_fd += Kff_diag[:, None]  #+ 1e-1
        S_fd += Kff  #+ 1e-1*np.eye(N)

        # Expectation part
        #R, _ = linalg.dpotrs(np.asfortranarray(Luu[q, :, :]), Kfdu[:, q * M:(q * M) + M].T)
        #R = np.dot(Kuui[q, :, :], Kfdu[:, q * M:(q * M) + M].T)
        R = np.linalg.solve(Kuu[j, :, :], Kfdu.T)
        Afdu = R.T  #Afdu = K_{fduq}Ki_{uquq}
        m_fd += np.dot(Afdu, m_u[:, j, None])  #exp
        #tmp = dtrmm(alpha=1.0, a=L_u[q, :, :].T, b=R, lower=0, trans_a=0)
        #v_fd += np.sum(np.square(tmp), 0)[:,None] - np.sum(R * Kfdu[:, q * M:(q * M) + M].T,0)[:,None] #exp
        S_fd += np.dot(np.dot(R.T, S_u), R) - np.dot(Kfdu, R)
        #S_fd += np.dot(np.dot(R.T, S_u[q, :, :]), R) - np.dot(np.dot(R.T, Kuu[q, :, :]), R) # - np.dot(Kfdu[:, q * M:(q * M) + M], R)

        v_fd = np.diag(S_fd)[:, None]
        if (v_fd < 0).any():
            #v_fd = np.abs(v_fd)
            #v_fd[v_fd < 0] = 1.0e-6
            print('v negative!')
            #print(np.linalg.eig(S_u[q, :, :]))

        q_fd = qfd(m_fd=m_fd, v_fd=v_fd, Kfdu=Kfdu, Afdu=Afdu, S_fd=S_fd)
        return q_fd
コード例 #5
0
    def calculate_mu_var(self, X, Y, Z, q_u_mean, q_u_chol, kern, mean_function, num_inducing, num_data, num_outputs):
        """
        Calculate posterior mean and variance for the latent function values for use in the
        expectation over the likelihood
        """
        #expand cholesky representation
        L = choleskies.flat_to_triang(q_u_chol)
        #S = linalg.ijk_ljk_to_ilk(L, L) #L.dot(L.T)
        S = np.empty((num_outputs, num_inducing, num_inducing))
        [np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
        #logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
        logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
        #compute mean function stuff
        if mean_function is not None:
            prior_mean_u = mean_function.f(Z)
            prior_mean_f = mean_function.f(X)
        else:
            prior_mean_u = np.zeros((num_inducing, num_outputs))
            prior_mean_f = np.zeros((num_data, num_outputs))

        #compute kernel related stuff
        Kmm = kern.K(Z)
        #Knm = kern.K(X, Z)
        Kmn = kern.K(Z, X)
        Knn_diag = kern.Kdiag(X)
        #Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm)
        Lm = linalg.jitchol(Kmm)
        logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
        Kmmi, _ = linalg.dpotri(Lm)

        #compute the marginal means and variances of q(f)
        #A = np.dot(Knm, Kmmi)
        A, _ = linalg.dpotrs(Lm, Kmn)
        #mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
        mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
        #v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S), 1)
        v = np.empty((num_data, num_outputs))
        for i in range(num_outputs):
            tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
            v[:,i] = np.sum(np.square(tmp),0)
        v += (Knn_diag - np.sum(A*Kmn,0))[:,None]

        #compute the KL term
        Kmmim = np.dot(Kmmi, q_u_mean)
        #KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KL = KLs.sum()

        latent_detail = LatentFunctionDetails(q_u_mean=q_u_mean, q_u_chol=q_u_chol, mean_function=mean_function,
                                              mu=mu, v=v, prior_mean_u=prior_mean_u, L=L, A=A,
                                              S=S, Kmm=Kmm, Kmmi=Kmmi, Kmmim=Kmmim, KL=KL)
        return latent_detail
コード例 #6
0
    def calculate_KL(self, q_U, p_U_new, p_U_old, p_U_var, M, Mold, Q):
        """
        Calculates the KL divergence (see KL-div for multivariate normals)
        Equation: \sum_Q KL{q(uq)|p(uq)}
        """
        # Algebra for q(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        # Algebra for p(u|psi_new):
        Kuu_new = p_U_new.Kuu.copy()
        Luu_new = p_U_new.Luu.copy()
        Kuui_new = p_U_new.Kuui.copy()

        # Algebra for p(u|psi_old):
        Kuu_old = p_U_old.Kuu.copy()
        Luu_old = p_U_old.Luu.copy()
        Kuui_old = p_U_old.Kuui.copy()

        # Algebra for q(u|phi_old):
        Mu_var = p_U_var.Mu.copy()
        Kuu_var = p_U_var.Kuu.copy()
        Luu_var = p_U_var.Luu.copy()
        Kuui_var = p_U_var.Kuui.copy()

        KLnew = 0
        KLold = 0
        KLvar = 0
        for q in range(Q):
            KLnew += 0.5 * np.sum(Kuui_new[q, :, :] * S_u[q, :, :]) \
                  + 0.5 * np.dot(m_u[:, q, None].T,np.dot(Kuui_new[q,:,:],m_u[:, q, None])) \
                  - 0.5 * M \
                  + 0.5 * 2. * np.sum(np.log(np.abs(np.diag(Luu_new[q, :, :])))) \
                  - 0.5 * 2. * np.sum(np.log(np.abs(np.diag(L_u[q, :, :]))))

            KLold += 0.5 * np.sum(Kuui_old[q, :, :] * S_u[q, :, :]) \
                     + 0.5 * np.dot(m_u[:, q, None].T, np.dot(Kuui_old[q, :, :], m_u[:, q, None])) \
                     - 0.5 * M \
                     + 0.5 * 2. * np.sum(np.log(np.abs(np.diag(Luu_old[q, :, :])))) \
                     - 0.5 * 2. * np.sum(np.log(np.abs(np.diag(L_u[q, :, :]))))

            KLvar += 0.5 * np.sum(Kuui_var[q, :, :] * S_u[q, :, :]) \
                     + 0.5 * np.dot((Mu_var[q, :, :] - m_u[:, q, None]).T, np.dot(Kuui_var[q, :, :], (Mu_var[q, :, :] - m_u[:, q, None]))) \
                     - 0.5 * M \
                     + 0.5 * 2. * np.sum(np.log(np.abs(np.diag(Luu_var[q, :, :])))) \
                     - 0.5 * 2. * np.sum(np.log(np.abs(np.diag(L_u[q, :, :]))))

        return KLnew, KLold, KLvar
コード例 #7
0
    def variational_q_fd(self, X, Z, q_U, p_U, kern_list, B, N, dims, d):
        """
        Description:  Returns the posterior approximation q(f) for the latent output functions (LOFs)
        Equation:     q(f) = \int p(f|u)q(u)du
        Paper:        In Section 2.2.2 / Variational Bounds
        """
        Q = dims['Q']
        M = dims['M']

        #-----------------------------------------#      POSTERIOR ALGEBRA       #-------------------------------------#
        #######  Algebra for q(u)  #######
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        #######  Algebra for p(f_d|u)  #######
        Kfdu = multi_output.cross_covariance(X, Z, B, kern_list, d)
        Luu = p_U.Luu.copy()
        Kff = multi_output.function_covariance(X, B, kern_list, d)
        Kff_diag = np.diag(Kff)

        ####### Algebra for q(f_d) = E_{q(u)}[p(f_d|u)] #######
        Afdu = np.empty((Q, N, M))  # Afdu = K_{fduq}Ki_{uquq}
        m_fd = np.zeros((N, 1))
        v_fd = np.zeros((N, 1))
        S_fd = np.zeros((N, N))
        v_fd += Kff_diag[:, None]
        S_fd += Kff
        for q in range(Q):
            ####### Expectation w.r.t. u_q part  #######
            R, _ = linalg.dpotrs(np.asfortranarray(Luu[q, :, :]),
                                 Kfdu[:, q * M:(q * M) + M].T)
            Afdu[q, :, :] = R.T
            m_fd += np.dot(Afdu[q, :, :], m_u[:, q, None])  # exp
            tmp = dtrmm(alpha=1.0, a=L_u[q, :, :].T, b=R, lower=0, trans_a=0)
            v_fd += np.sum(np.square(tmp), 0)[:, None] - np.sum(
                R * Kfdu[:, q * M:(q * M) + M].T, 0)[:, None]  # exp
            S_fd += np.dot(np.dot(R.T, S_u[q, :, :]), R) - np.dot(
                Kfdu[:, q * M:(q * M) + M], R)

        if (v_fd < 0).any():
            print('v negative!')

        #--------------------------------------#     VARIATIONAL POSTERIOR (LOFs)  #-----------------------------------#
        ####### Variational output distribution q_fd() #######
        q_fd = qfd(m_fd=m_fd, v_fd=v_fd, Kfdu=Kfdu, Afdu=Afdu, S_fd=S_fd)

        return q_fd
コード例 #8
0
    def calculate_q_f(self, X, Z, q_U, p_U, kern_list, B, M, N, Q, D, d):
        """
        Calculates the mean and variance of q(f_d) as
        Equation: E_q(U)\{p(f_d|U)\}
        """
        # Algebra for q(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        # Algebra for p(f_d|u):
        Kfdu = util.cross_covariance(X, Z, B, kern_list, d)
        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()
        Kff = util.function_covariance(X, B, kern_list, d)
        Kff_diag = np.diag(Kff)

        # Algebra for q(f_d) = E_{q(u)}[p(f_d|u)]
        Afdu = np.empty((Q, N, M))  #Afdu = K_{fduq}Ki_{uquq}
        m_fd = np.zeros((N, 1))
        v_fd = np.zeros((N, 1))
        S_fd = np.zeros((N, N))
        v_fd += Kff_diag[:, None]
        S_fd += Kff
        for q in range(Q):
            # Expectation part
            R, _ = linalg.dpotrs(np.asfortranarray(Luu[q, :, :]),
                                 Kfdu[:, q * M:(q * M) + M].T)
            Afdu[q, :, :] = R.T
            m_fd += np.dot(Afdu[q, :, :], m_u[:, q, None])  #exp
            tmp = dtrmm(alpha=1.0, a=L_u[q, :, :].T, b=R, lower=0, trans_a=0)
            v_fd += np.sum(np.square(tmp), 0)[:, None] - np.sum(
                R * Kfdu[:, q * M:(q * M) + M].T, 0)[:, None]  #exp
            S_fd += np.dot(np.dot(R.T, S_u[q, :, :]), R) - np.dot(
                Kfdu[:, q * M:(q * M) + M], R)

        if (v_fd < 0).any():
            print('v negative!')

        q_fd = qfd(m_fd=m_fd, v_fd=v_fd, Kfdu=Kfdu, Afdu=Afdu, S_fd=S_fd)
        return q_fd
コード例 #9
0
    def posteriors_F(self, Xnew, which_out=None):
        # This function returns all the q(f*) associated to each output (It is the )
        # We assume that Xnew can be a list of length equal to the number of likelihoods defined for the HetMOGP
        # or Xnew can be a numpy array so that we can replicate it per each outout

        if isinstance(Xnew, list):
            Xmulti_all_new = Xnew
        else:
            Xmulti_all_new = []
            for i in range(self.num_output_funcs):
                Xmulti_all_new.append(Xnew.copy())

        M = self.Z.shape[0]
        Q = len(self.kern_list)
        D = self.likelihood.num_output_functions(self.Y_metadata)
        Kuu, Luu, Kuui = util.VIK_covariance(self.Z, self.kern_list,
                                             self.kern_list_Tq, self.kern_aux)
        p_U = pu(Kuu=Kuu, Luu=Luu, Kuui=Kuui)
        q_U = qu(mu_u=self.q_u_means, chols_u=self.q_u_chols)
        S_u = np.empty((Q, M, M))
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        # for every latent function f_d calculate q(f_d) and keep it as q(F):
        posteriors_F = []
        f_index = self.Y_metadata['function_index'].flatten()
        d_index = self.Y_metadata['d_index'].flatten()

        if which_out is None:
            indix_aux = f_index.copy()
        else:
            which_out = np.array(which_out)
            indix_aux = -1 * np.ones_like(f_index)
            for i in range(which_out.shape[0]):
                posix = np.where(f_index == which_out[i])
                indix_aux[posix] = f_index[posix].copy()

        for d in range(D):
            if f_index[d] == indix_aux[d]:
                Xtask = Xmulti_all_new[f_index[d]]
                q_fd, _ = self.inference_method.calculate_q_f(
                    X=Xtask,
                    Z=self.Z,
                    q_U=q_U,
                    S_u=S_u,
                    p_U=p_U,
                    kern_list=self.kern_list,
                    kern_list_Gdj=self.kern_list_Gdj,
                    kern_list_Tq=self.kern_list_Tq,
                    kern_aux=self.kern_aux,
                    B=self.B_list,
                    M=M,
                    N=Xtask.shape[0],
                    Q=Q,
                    D=D,
                    d=d)
                # Posterior objects for output functions (used in prediction)
                posterior_fd = Posterior(mean=q_fd.m_fd.copy(),
                                         cov=q_fd.S_fd.copy(),
                                         K=util.function_covariance(
                                             X=Xtask,
                                             B=self.B_list,
                                             kernel_list=self.kern_list,
                                             d=d),
                                         prior_mean=np.zeros(q_fd.m_fd.shape))
                posteriors_F.append(posterior_fd)
            else:
                #posteriors_F.append(fake_posterior)
                posteriors_F.append([])
        return posteriors_F
    def inference(self,
                  q_u_means,
                  q_u_chols,
                  X,
                  Y,
                  Z,
                  kern_list,
                  kern_list_Gdj,
                  kern_aux,
                  likelihood,
                  B_list,
                  Y_metadata,
                  KL_scale=1.0,
                  batch_scale=None,
                  predictive=False,
                  Gauss_Newton=False):
        M = Z.shape[0]
        T = len(Y)
        if batch_scale is None:
            batch_scale = [1.0] * T
        Ntask = []
        [Ntask.append(Y[t].shape[0]) for t in range(T)]
        Q = len(kern_list)
        D = likelihood.num_output_functions(Y_metadata)
        Kuu, Luu, Kuui = util.latent_funs_cov(Z, kern_list)
        p_U = pu(Kuu=Kuu, Luu=Luu, Kuui=Kuui)
        q_U = qu(mu_u=q_u_means.copy(), chols_u=q_u_chols.copy())
        S_u = np.empty((Q, M, M))
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]
        Su_add_Kuu = np.zeros((Q, M, M))
        Su_add_Kuu_chol = np.zeros((Q, M, M))
        for q in range(Q):
            Su_add_Kuu[q, :, :] = S_u[q, :, :] + Kuu[q, :, :]
            Su_add_Kuu_chol[q, :, :] = linalg.jitchol(Su_add_Kuu[q, :, :])

        # for every latent function f_d calculate q(f_d) and keep it as q(F):
        q_F = []
        posteriors_F = []
        f_index = Y_metadata['function_index'].flatten()
        d_index = Y_metadata['d_index'].flatten()

        for d in range(D):
            Xtask = X[f_index[d]]
            q_fd, q_U = self.calculate_q_f(X=Xtask,
                                           Z=Z,
                                           q_U=q_U,
                                           S_u=S_u,
                                           p_U=p_U,
                                           kern_list=kern_list,
                                           kern_list_Gdj=kern_list_Gdj,
                                           kern_aux=kern_aux,
                                           B=B_list,
                                           M=M,
                                           N=Xtask.shape[0],
                                           Q=Q,
                                           D=D,
                                           d=d)
            # Posterior objects for output functions (used in prediction)
            #I have to get rid of function below Posterior for it is not necessary
            posterior_fd = Posterior(mean=q_fd.m_fd.copy(),
                                     cov=q_fd.S_fd.copy(),
                                     K=util.conv_function_covariance(
                                         X=Xtask,
                                         B=B_list,
                                         kernel_list=kern_list,
                                         kernel_list_Gdj=kern_list_Gdj,
                                         kff_aux=kern_aux,
                                         d=d),
                                     prior_mean=np.zeros(q_fd.m_fd.shape))
            posteriors_F.append(posterior_fd)
            q_F.append(q_fd)

        mu_F = []
        v_F = []
        for t in range(T):
            mu_F_task = np.empty((X[t].shape[0], 1))
            v_F_task = np.empty((X[t].shape[0], 1))
            for d, q_fd in enumerate(q_F):
                if f_index[d] == t:
                    mu_F_task = np.hstack((mu_F_task, q_fd.m_fd))
                    v_F_task = np.hstack((v_F_task, q_fd.v_fd))

            mu_F.append(mu_F_task[:, 1:])
            v_F.append(v_F_task[:, 1:])

        # posterior_Fnew for predictive
        if predictive:
            return posteriors_F
        # inference for rest of cases
        else:
            # Variational Expectations
            VE = likelihood.var_exp(Y, mu_F, v_F, Y_metadata)
            VE_dm, VE_dv = likelihood.var_exp_derivatives(
                Y, mu_F, v_F, Y_metadata, Gauss_Newton)
            for t in range(T):
                VE[t] = VE[t] * batch_scale[t]
                VE_dm[t] = VE_dm[t] * batch_scale[t]
                VE_dv[t] = VE_dv[t] * batch_scale[t]

            # KL Divergence
            KL = self.calculate_KL(q_U=q_U,
                                   Su_add_Kuu=Su_add_Kuu,
                                   Su_add_Kuu_chol=Su_add_Kuu_chol,
                                   p_U=p_U,
                                   M=M,
                                   Q=Q,
                                   D=D)

            # Log Marginal log(p(Y))
            F = 0
            for t in range(T):
                F += VE[t].sum()

            log_marginal = F - KL

            # Gradients and Posteriors
            dL_dS_u = []
            dL_dmu_u = []
            dL_dL_u = []
            dL_dKmm = []
            dL_dKmn = []
            dL_dKdiag = []
            posteriors = []
            for q in range(Q):
                (dL_dmu_q, dL_dL_q, dL_dS_q, posterior_q, dL_dKqq, dL_dKdq,
                 dL_dKdiag_q) = self.calculate_gradients(
                     q_U=q_U,
                     S_u=S_u,
                     Su_add_Kuu_chol=Su_add_Kuu_chol,
                     p_U=p_U,
                     q_F=q_F,
                     VE_dm=VE_dm,
                     VE_dv=VE_dv,
                     Ntask=Ntask,
                     M=M,
                     Q=Q,
                     D=D,
                     f_index=f_index,
                     d_index=d_index,
                     q=q)
                dL_dmu_u.append(dL_dmu_q)
                dL_dL_u.append(dL_dL_q)
                dL_dS_u.append(dL_dS_q)
                dL_dKmm.append(dL_dKqq)
                dL_dKmn.append(dL_dKdq)
                dL_dKdiag.append(dL_dKdiag_q)
                posteriors.append(posterior_q)

            gradients = {
                'dL_dmu_u': dL_dmu_u,
                'dL_dL_u': dL_dL_u,
                'dL_dS_u': dL_dS_u,
                'dL_dKmm': dL_dKmm,
                'dL_dKmn': dL_dKmn,
                'dL_dKdiag': dL_dKdiag
            }

            return log_marginal, gradients, posteriors, posteriors_F
    def calculate_gradients(self, q_U, S_u, Su_add_Kuu_chol, p_U, q_F, VE_dm,
                            VE_dv, Ntask, M, Q, D, f_index, d_index, q):
        """
        Calculates gradients of the Log-marginal distribution p(Y) wrt variational
        parameters mu_q, S_q
        """
        # Algebra for q(u) and p(u):
        m_u = q_U.mu_u.copy()
        #L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        #S_u = np.empty((Q, M, M))
        #[np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]
        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()
        S_qi, _ = linalg.dpotri(np.asfortranarray(Su_add_Kuu_chol[q, :, :]))

        if np.any(np.isinf(S_qi)):
            raise ValueError("Sqi: Cholesky representation unstable")

        # KL Terms
        dKL_dmu_q = []
        dKL_dKqq = 0
        for d in range(D):
            dKL_dmu_q.append(np.dot(Kuui[q, :, :], m_u[d][:, q, None]))  #same
            dKL_dKqq += -0.5 * S_qi + 0.5 * Kuui[q, :, :] - 0.5 * Kuui[q, :, :].dot(S_u[q, :, :]).dot(Kuui[q, :, :]) \
                       - 0.5 * np.dot(Kuui[q, :, :], np.dot(m_u[d][:, q, None], m_u[d][:, q, None].T)).dot(Kuui[q, :, :].T)  # same
        #dKL_dS_q = 0.5 * (Kuui[q,:,:] - S_qi)             #old
        dKL_dS_q = 0.5 * (Kuui[q, :, :] - S_qi) * D

        # VE Terms
        #dVE_dmu_q = np.zeros((M, 1))
        dVE_dmu_q = []
        dVE_dS_q = np.zeros((M, M))
        dVE_dKqq = np.zeros((M, M))
        dVE_dKqd = []
        dVE_dKdiag = []
        dL_dmu_q = []

        for d, q_fd in enumerate(q_F):
            Nt = Ntask[f_index[d]]
            dVE_dmu_q.append(
                np.dot(q_fd.Afdu[q, :, :].T,
                       VE_dm[f_index[d]][:, d_index[d]])[:, None])
            dL_dmu_q.append(dVE_dmu_q[d] - dKL_dmu_q[d])
            Adv = q_fd.Afdu[q, :, :].T * VE_dv[f_index[d]][:, d_index[d],
                                                           None].T
            Adv = np.ascontiguousarray(Adv)
            AdvA = np.dot(Adv.reshape(-1, Nt),
                          q_fd.Afdu[q, :, :]).reshape(M, M)
            dVE_dS_q += AdvA

            # Derivatives dKuquq
            tmp_dv = np.dot(AdvA, S_u[q, :, :]).dot(Kuui[q, :, :])
            dVE_dKqq += -tmp_dv - tmp_dv.T  #+ AdvA last term not included in the derivative
            Adm = np.dot(q_fd.Afdu[q, :, :].T, VE_dm[f_index[d]][:, d_index[d],
                                                                 None])
            dVE_dKqq += -np.dot(Adm,
                                np.dot(Kuui[q, :, :], m_u[d][:, q, None]).T)

            # Derivatives dKuqfd
            tmp = np.dot(S_u[q, :, :], Kuui[q, :, :])
            tmp = 2. * tmp  #2. * (tmp - np.eye(M))  # the term -2Adv not included
            dve_kqd = np.dot(np.dot(Kuui[q, :, :], m_u[d][:, q, None]),
                             VE_dm[f_index[d]][:, d_index[d], None].T)
            dve_kqd += np.dot(tmp.T, Adv)
            dVE_dKqd.append(dve_kqd)

            # Derivatives dKdiag
            dVE_dKdiag.append(VE_dv[f_index[d]][:, d_index[d]])

        dVE_dKqq = 0.5 * (dVE_dKqq + dVE_dKqq.T)
        # Sum of VE and KL terms
        #dL_dmu_q = dVE_dmu_q - dKL_dmu_q
        dL_dS_q = dVE_dS_q - dKL_dS_q
        dL_dKqq = dVE_dKqq - dKL_dKqq
        dL_dKdq = dVE_dKqd
        dL_dKdiag = dVE_dKdiag

        # Pass S_q gradients to its low-triangular representation L_q
        chol_u = q_U.chols_u.copy()
        L_q = choleskies.flat_to_triang(chol_u[:, q:q + 1])
        dL_dL_q = 2. * np.array(
            [np.dot(a, b) for a, b in zip(dL_dS_q[None, :, :], L_q)])
        dL_dL_q = choleskies.triang_to_flat(dL_dL_q)

        # Posterior
        posterior_q = []
        for d in range(D):
            posterior_q.append(
                Posterior(mean=m_u[d][:, q, None],
                          cov=S_u[q, :, :] + Kuu[q, :, :],
                          K=Kuu[q, :, :],
                          prior_mean=np.zeros(m_u[d][:, q, None].shape)))

        return dL_dmu_q, dL_dL_q, dL_dS_q, posterior_q, dL_dKqq, dL_dKdq, dL_dKdiag
コード例 #12
0
    def calculate_gradients(self, q_U, p_U, q_F, VE_dm, VE_dv, Ntask, M, Q, D,
                            f_index, d_index, j):
        """
        Calculates gradients of the Log-marginal distribution p(Y) wrt variational
        parameters mu_q, S_q
        """
        # Algebra for q(u) and p(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        #S_u = np.empty((Q, M, M))
        S_u = np.dot(
            L_u[j, :, :], L_u[j, :, :].T
        )  #This could be done outside and recieve it to reduce computation
        #[np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]
        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()
        S_qi, _ = linalg.dpotri(np.asfortranarray(L_u[j, :, :]))

        if np.any(np.isinf(S_qi)):
            raise ValueError("Sqi: Cholesky representation unstable")

        # KL Terms
        dKL_dmu_j = np.dot(Kuui[j, :, :], m_u[:, j, None])
        dKL_dS_j = 0.5 * (Kuui[j, :, :] - S_qi)
        dKL_dKjj = 0.5 * Kuui[j,:,:] - 0.5 * Kuui[j,:,:].dot(S_u).dot(Kuui[j,:,:]) \
                   - 0.5 * np.dot(Kuui[j,:,:],np.dot(m_u[:, j, None],m_u[:, j, None].T)).dot(Kuui[j,:,:].T)

        # VE Terms
        dVE_dmu_j = np.zeros((M, 1))
        dVE_dS_j = np.zeros((M, M))
        dVE_dKjj = np.zeros((M, M))
        dVE_dKjd = []
        dVE_dKdiag = []

        Nt = Ntask[f_index[j]]
        dVE_dmu_j += np.dot(q_F[j].Afdu.T,
                            VE_dm[f_index[j]][:, d_index[j]])[:, None]
        Adv = q_F[j].Afdu.T * VE_dv[f_index[j]][:, d_index[j], None].T
        Adv = np.ascontiguousarray(Adv)
        AdvA = np.dot(Adv.reshape(-1, Nt), q_F[j].Afdu).reshape(M, M)
        dVE_dS_j += AdvA

        # Derivatives dKuquq
        tmp_dv = np.dot(AdvA, S_u).dot(Kuui[j, :, :])
        dVE_dKjj += AdvA - tmp_dv - tmp_dv.T
        Adm = np.dot(q_F[j].Afdu.T, VE_dm[f_index[j]][:, d_index[j], None])
        dVE_dKjj += -np.dot(Adm, np.dot(Kuui[j, :, :], m_u[:, j, None]).T)

        # Derivatives dKuqfd
        tmp = np.dot(S_u, Kuui[j, :, :])
        tmp = 2. * (tmp - np.eye(M))
        dve_kjd = np.dot(np.dot(Kuui[j, :, :], m_u[:, j, None]),
                         VE_dm[f_index[j]][:, d_index[j], None].T)
        dve_kjd += np.dot(tmp.T, Adv)
        dVE_dKjd.append(dve_kjd)

        # Derivatives dKdiag
        dVE_dKdiag.append(VE_dv[f_index[j]][:, d_index[j]])

        dVE_dKjj = 0.5 * (dVE_dKjj + dVE_dKjj.T)
        # Sum of VE and KL terms
        dL_dmu_j = dVE_dmu_j - dKL_dmu_j
        dL_dS_j = dVE_dS_j - dKL_dS_j
        dL_dKjj = dVE_dKjj - dKL_dKjj
        dL_dKdj = dVE_dKjd[0].copy()  #Here we just pass the unique position
        dL_dKdiag = dVE_dKdiag[0].copy(
        )  #Here we just pass the unique position

        # Pass S_q gradients to its low-triangular representation L_q
        chol_u = q_U.chols_u.copy()
        L_j = choleskies.flat_to_triang(chol_u[:, j:j + 1])
        dL_dL_j = 2. * np.array(
            [np.dot(a, b) for a, b in zip(dL_dS_j[None, :, :], L_j)])
        dL_dL_j = choleskies.triang_to_flat(dL_dL_j)

        # Posterior
        posterior_j = Posterior(mean=m_u[:, j, None],
                                cov=S_u,
                                K=Kuu[j, :, :],
                                prior_mean=np.zeros(m_u[:, j, None].shape))

        return dL_dmu_j, dL_dL_j, dL_dS_j, posterior_j, dL_dKjj, dL_dKdj, dL_dKdiag
コード例 #13
0
    def elbo_derivatives(self, q_U, p_U, q_F, VE_dm, VE_dv, Ntask, dims,
                         f_index, d_index, q):
        """
        Description:  Returns ELBO derivatives w.r.t. variational parameters and hyperparameters
        Equation:     gradients = {dL/dmu, dL/dS, dL/dKmm, dL/Kmn, dL/dKdiag}
        Paper:        In Appendix 4 and 5
        Extra_Info:   Gradients w.r.t. hyperparameters use chain-rule and GPy. Note that Kmm, Kmn, Kdiag are matrices
        """
        Q = dims['Q']
        M = dims['M']

        #------------------------------------#      ALGEBRA FOR DERIVATIVES       #------------------------------------#
        #######  Algebra for q(u) and p(u)  #######
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]
        Kuu = p_U.Kuu.copy()
        Kuui = p_U.Kuui.copy()
        S_qi, _ = linalg.dpotri(np.asfortranarray(L_u[q, :, :]))

        if np.any(np.isinf(S_qi)):
            raise ValueError("Sqi: Cholesky representation unstable")

        #-------------------------------------#      DERIVATIVES OF ELBO TERMS      #----------------------------------#
        #######  KL Terms  #######
        dKL_dmu_q = np.dot(Kuui[q, :, :], m_u[:, q, None])
        dKL_dS_q = 0.5 * (Kuui[q, :, :] - S_qi)
        dKL_dKqq = 0.5 * Kuui[q, :, :] - 0.5 * Kuui[q, :, :].dot(S_u[q, :, :]).dot(Kuui[q, :, :]) \
                   - 0.5 * np.dot(Kuui[q, :, :], np.dot(m_u[:, q, None], m_u[:, q, None].T)).dot(Kuui[q, :, :].T)

        ####### Variational Expectation (VE) Terms #######
        dVE_dmu_q = np.zeros((M, 1))
        dVE_dS_q = np.zeros((M, M))
        dVE_dKqq = np.zeros((M, M))
        dVE_dKqd = []
        dVE_dKdiag = []

        for d, q_fd in enumerate(q_F):
            Nt = Ntask[f_index[d]]
            dVE_dmu_q += np.dot(q_fd.Afdu[q, :, :].T,
                                VE_dm[f_index[d]][:, d_index[d]])[:, None]
            Adv = q_fd.Afdu[q, :, :].T * VE_dv[f_index[d]][:, d_index[d],
                                                           None].T
            Adv = np.ascontiguousarray(Adv)
            AdvA = np.dot(Adv.reshape(-1, Nt),
                          q_fd.Afdu[q, :, :]).reshape(M, M)
            dVE_dS_q += AdvA

            #######  Derivatives dKuquq #######
            tmp_dv = np.dot(AdvA, S_u[q, :, :]).dot(Kuui[q, :, :])
            dVE_dKqq += AdvA - tmp_dv - tmp_dv.T
            Adm = np.dot(q_fd.Afdu[q, :, :].T, VE_dm[f_index[d]][:, d_index[d],
                                                                 None])
            dVE_dKqq += -np.dot(Adm, np.dot(Kuui[q, :, :], m_u[:, q, None]).T)

            #######  Derivatives dKuqfd  #######
            tmp = np.dot(S_u[q, :, :], Kuui[q, :, :])
            tmp = 2. * (tmp - np.eye(M))
            dve_kqd = np.dot(np.dot(Kuui[q, :, :], m_u[:, q, None]),
                             VE_dm[f_index[d]][:, d_index[d], None].T)
            dve_kqd += np.dot(tmp.T, Adv)
            dVE_dKqd.append(dve_kqd)

            #######  Derivatives dKdiag  #######
            dVE_dKdiag.append(VE_dv[f_index[d]][:, d_index[d]])

        dVE_dKqq = 0.5 * (dVE_dKqq + dVE_dKqq.T)

        #--------------------------------------#      FINAL ELBO DERIVATIVES      #------------------------------------#
        #######  ELBO derivatives ---> sum of VE and KL terms  #######
        dL_dmu_q = dVE_dmu_q - dKL_dmu_q
        dL_dS_q = dVE_dS_q - dKL_dS_q
        dL_dKqq = dVE_dKqq - dKL_dKqq
        dL_dKdq = dVE_dKqd
        dL_dKdiag = dVE_dKdiag

        ####### Pass S_q gradients to its low-triangular representation L_q  #######
        chol_u = q_U.chols_u.copy()
        L_q = choleskies.flat_to_triang(chol_u[:, q:q + 1])
        dL_dL_q = 2. * np.array(
            [np.dot(a, b) for a, b in zip(dL_dS_q[None, :, :], L_q)])
        dL_dL_q = choleskies.triang_to_flat(dL_dL_q)

        return dL_dmu_q, dL_dL_q, dL_dS_q, dL_dKqq, dL_dKdq, dL_dKdiag
コード例 #14
0
    def calculate_gradients(self, q_U, p_U_new, p_U_old, p_U_var, q_F, VE_dm, VE_dv, Ntask, M, Q, D, f_index, d_index,q):
        """
        Calculates gradients of the Log-marginal distribution p(Y) wrt variational
        parameters mu_q, S_q
        """
        # Algebra for q(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        S_qi, _ = linalg.dpotri(np.asfortranarray(L_u[q, :, :]))
        if np.any(np.isinf(S_qi)):
            raise ValueError("Sqi: Cholesky representation unstable")

        # Algebra for p(u)
        Kuu_new = p_U_new.Kuu.copy()
        Luu_new = p_U_new.Luu.copy()
        Kuui_new = p_U_new.Kuui.copy()

        Kuu_old = p_U_old.Kuu.copy()
        Luu_old = p_U_old.Luu.copy()
        Kuui_old = p_U_old.Kuui.copy()

        Mu_var = p_U_var.Mu.copy()
        Kuu_var = p_U_var.Kuu.copy()
        Luu_var = p_U_var.Luu.copy()
        Kuui_var = p_U_var.Kuui.copy()


        # KL Terms
        dKLnew_dmu_q = np.dot(Kuui_new[q,:,:], m_u[:, q, None])
        dKLnew_dS_q = 0.5 * (Kuui_new[q,:,:] - S_qi)

        dKLold_dmu_q = np.dot(Kuui_old[q,:,:], m_u[:, q, None])
        dKLold_dS_q = 0.5 * (Kuui_old[q,:,:] - S_qi)

        dKLvar_dmu_q = np.dot(Kuui_var[q,:,:], (m_u[:, q, None] - Mu_var[q, :, :])) # important!! (Eq. 69 MCB)
        dKLvar_dS_q = 0.5 * (Kuui_var[q,:,:] - S_qi)

        dKLnew_dKqq = 0.5 * Kuui_new[q,:,:] - 0.5 * Kuui_new[q,:,:].dot(S_u[q, :, :]).dot(Kuui_new[q,:,:]) \
                   - 0.5 * np.dot(Kuui_new[q,:,:],np.dot(m_u[:, q, None],m_u[:, q, None].T)).dot(Kuui_new[q,:,:].T)

        dKLold_dKqq = 0.5 * Kuui_old[q,:,:] - 0.5 * Kuui_old[q,:,:].dot(S_u[q, :, :]).dot(Kuui_old[q,:,:]) \
                   - 0.5 * np.dot(Kuui_old[q,:,:],np.dot(m_u[:, q, None],m_u[:, q, None].T)).dot(Kuui_old[q,:,:].T)

        #dKLvar_dKqq = 0.5 * Kuui_var[q,:,:] - 0.5 * Kuui_var[q,:,:].dot(S_u[q, :, :]).dot(Kuui_var[q,:,:]) \
        #           - 0.5 * np.dot(Kuui_var[q,:,:],np.dot(m_u[:, q, None],m_u[:, q, None].T)).dot(Kuui_var[q,:,:].T) \
        #            + 0.5 * np.dot(Kuui_var[q,:,:], np.dot(m_u[:,q,None], Mu_var[q,:,:].T)).dot(Kuui_var[q,:,:].T) \
        #            + 0.5 * np.dot(Kuui_var[q,:,:], np.dot(Mu_var[q,:,:], m_u[:,q,None].T)).dot(Kuui_var[q,:,:].T) \
        #              - 0.5 * np.dot(Kuui_var[q,:,:],np.dot(Mu_var[q,:,:], Mu_var[q,:,:].T)).dot(Kuui_var[q,:,:].T)


        #KLvar += 0.5 * np.sum(Kuui_var[q, :, :] * S_u[q, :, :]) \
        #             + 0.5 * np.dot((Mu_var[q, :, :] - m_u[:, q, None]).T, np.dot(Kuui_var[q, :, :], (Mu_var[q, :, :] - m_u[:, q, None]))) \
        #             - 0.5 * M \
        #             + 0.5 * 2. * np.sum(np.log(np.abs(np.diag(Luu_var[q, :, :])))) \
        #             - 0.5 * 2. * np.sum(np.log(np.abs(np.diag(L_u[q, :, :]))))

        #

        # VE Terms
        dVE_dmu_q = np.zeros((M, 1))
        dVE_dS_q = np.zeros((M, M))
        dVE_dKqq = np.zeros((M, M))
        dVE_dKqd = []
        dVE_dKdiag = []

        for d, q_fd in enumerate(q_F):
            Nt = Ntask[f_index[d]]
            dVE_dmu_q += np.dot(q_fd.Afdu[q, :, :].T, VE_dm[f_index[d]][:,d_index[d]])[:, None]
            Adv = q_fd.Afdu[q,:,:].T * VE_dv[f_index[d]][:,d_index[d],None].T
            Adv = np.ascontiguousarray(Adv)
            AdvA = np.dot(Adv.reshape(-1, Nt), q_fd.Afdu[q, :, :]).reshape(M, M)
            dVE_dS_q += AdvA

            # Derivatives dKuquq
            tmp_dv = np.dot(AdvA, S_u[q, :, :]).dot(Kuui_new[q,:,:])
            dVE_dKqq += AdvA - tmp_dv - tmp_dv.T
            Adm = np.dot(q_fd.Afdu[q, :, :].T, VE_dm[f_index[d]][:,d_index[d],None])
            dVE_dKqq += - np.dot(Adm, np.dot(Kuui_new[q,:,:], m_u[:, q, None]).T)

            # Derivatives dKuqfd
            tmp = np.dot(S_u[q, :, :], Kuui_new[q,:,:])
            tmp = 2. * (tmp - np.eye(M))
            dve_kqd = np.dot(np.dot(Kuui_new[q,:,:], m_u[:, q, None]), VE_dm[f_index[d]][:,d_index[d],None].T)
            dve_kqd += np.dot(tmp.T, Adv)
            dVE_dKqd.append(dve_kqd)

            # Derivatives dKdiag
            dVE_dKdiag.append(VE_dv[f_index[d]][:,d_index[d]])

        dVE_dKqq = 0.5 * (dVE_dKqq + dVE_dKqq.T)

        # Derivatives of variational parameters
        dL_dmu_q = dVE_dmu_q - dKLnew_dmu_q + dKLold_dmu_q - dKLvar_dmu_q
        dL_dS_q = dVE_dS_q - dKLnew_dS_q + dKLold_dS_q - dKLvar_dS_q

        # Derivatives of prior hyperparameters
        # if using Zgrad, dL_dKqq = dVE_dKqq - dKLnew_dKqq + dKLold_dKqq - dKLvar_dKqq
        # otherwise for hyperparameters: dL_dKqq = dVE_dKqq - dKLnew_dKqq
        dL_dKqq = dVE_dKqq - dKLnew_dKqq #+ dKLold_dKqq - dKLvar_dKqq # dKLold_dKqq sólo para Zgrad, dKLvar_dKqq to be done (for Zgrad)
        dL_dKdq = dVE_dKqd
        dL_dKdiag = dVE_dKdiag

        # Pass S_q gradients to its low-triangular representation L_q
        chol_u = q_U.chols_u.copy()
        L_q = choleskies.flat_to_triang(chol_u[:,q:q+1])
        dL_dL_q = 2. * np.array([np.dot(a, b) for a, b in zip(dL_dS_q[None,:,:], L_q)])
        dL_dL_q = choleskies.triang_to_flat(dL_dL_q)

        # Posterior
        posterior_q = Posterior(mean=m_u[:, q, None], cov=S_u[q, :, :], K=Kuu_new[q,:,:], prior_mean=np.zeros(m_u[:, q, None].shape))

        return dL_dmu_q, dL_dL_q, dL_dS_q, posterior_q, dL_dKqq, dL_dKdq, dL_dKdiag