Exemple #1
0
    def calculate_q_f(self, X, Z, q_U, p_U, kern_list, B, M, N, Q, D, d):
        """
        Calculates the mean and variance of q(f_d) as
        Equation: E_q(U)\{p(f_d|U)\}
        """
        # Algebra for q(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        # Algebra for p(f_d|u):
        Kfdu = util.cross_covariance(X, Z, B, kern_list, d)
        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()
        Kff = util.function_covariance(X, B, kern_list, d)
        Kff_diag = np.diag(Kff)

        # Algebra for q(f_d) = E_{q(u)}[p(f_d|u)]
        Afdu = np.empty((Q, N, M))  #Afdu = K_{fduq}Ki_{uquq}
        m_fd = np.zeros((N, 1))
        v_fd = np.zeros((N, 1))
        S_fd = np.zeros((N, N))
        v_fd += Kff_diag[:, None]
        S_fd += Kff
        for q in range(Q):
            # Expectation part
            R, _ = linalg.dpotrs(np.asfortranarray(Luu[q, :, :]),
                                 Kfdu[:, q * M:(q * M) + M].T)
            Afdu[q, :, :] = R.T
            m_fd += np.dot(Afdu[q, :, :], m_u[:, q, None])  #exp
            tmp = dtrmm(alpha=1.0, a=L_u[q, :, :].T, b=R, lower=0, trans_a=0)
            v_fd += np.sum(np.square(tmp), 0)[:, None] - np.sum(
                R * Kfdu[:, q * M:(q * M) + M].T, 0)[:, None]  #exp
            S_fd += np.dot(np.dot(R.T, S_u[q, :, :]), R) - np.dot(
                Kfdu[:, q * M:(q * M) + M], R)

        if (v_fd < 0).any():
            print('v negative!')

        q_fd = qfd(m_fd=m_fd, v_fd=v_fd, Kfdu=Kfdu, Afdu=Afdu, S_fd=S_fd)
        return q_fd
Exemple #2
0
    def inference(self,
                  q_u_means,
                  q_u_chols,
                  X,
                  Y,
                  Z,
                  kern_list,
                  likelihood,
                  B_list,
                  Y_metadata,
                  KL_scale=1.0,
                  batch_scale=None,
                  predictive=False):
        M = Z.shape[0]
        T = len(Y)
        if batch_scale is None:
            batch_scale = [1.0] * T
        Ntask = []
        [Ntask.append(Y[t].shape[0]) for t in range(T)]
        Q = len(kern_list)
        D = likelihood.num_output_functions(Y_metadata)
        Kuu, Luu, Kuui = util.latent_funs_cov(Z, kern_list)
        p_U = pu(Kuu=Kuu, Luu=Luu, Kuui=Kuui)
        q_U = qu(mu_u=q_u_means, chols_u=q_u_chols)

        # for every latent function f_d calculate q(f_d) and keep it as q(F):
        q_F = []
        posteriors_F = []
        f_index = Y_metadata['function_index'].flatten()
        d_index = Y_metadata['d_index'].flatten()

        for d in range(D):
            Xtask = X[f_index[d]]
            q_fd = self.calculate_q_f(X=Xtask,
                                      Z=Z,
                                      q_U=q_U,
                                      p_U=p_U,
                                      kern_list=kern_list,
                                      B=B_list,
                                      M=M,
                                      N=Xtask.shape[0],
                                      Q=Q,
                                      D=D,
                                      d=d)
            # Posterior objects for output functions (used in prediction)
            posterior_fd = Posterior(mean=q_fd.m_fd.copy(),
                                     cov=q_fd.S_fd.copy(),
                                     K=util.function_covariance(
                                         X=Xtask,
                                         B=B_list,
                                         kernel_list=kern_list,
                                         d=d),
                                     prior_mean=np.zeros(q_fd.m_fd.shape))
            posteriors_F.append(posterior_fd)
            q_F.append(q_fd)

        mu_F = []
        v_F = []
        for t in range(T):
            mu_F_task = np.empty((X[t].shape[0], 1))
            v_F_task = np.empty((X[t].shape[0], 1))
            for d, q_fd in enumerate(q_F):
                if f_index[d] == t:
                    mu_F_task = np.hstack((mu_F_task, q_fd.m_fd))
                    v_F_task = np.hstack((v_F_task, q_fd.v_fd))

            mu_F.append(mu_F_task[:, 1:])
            v_F.append(v_F_task[:, 1:])

        # posterior_Fnew for predictive
        if predictive:
            return posteriors_F
        # inference for rest of cases
        else:
            # Variational Expectations
            VE = likelihood.var_exp(Y, mu_F, v_F, Y_metadata)
            VE_dm, VE_dv = likelihood.var_exp_derivatives(
                Y, mu_F, v_F, Y_metadata)
            for t in range(T):
                VE[t] = VE[t] * batch_scale[t]
                VE_dm[t] = VE_dm[t] * batch_scale[t]
                VE_dv[t] = VE_dv[t] * batch_scale[t]

            # KL Divergence
            KL = self.calculate_KL(q_U=q_U, p_U=p_U, M=M, Q=Q)

            # Log Marginal log(p(Y))
            F = 0
            for t in range(T):
                F += VE[t].sum()

            log_marginal = F - KL

            # Gradients and Posteriors
            dL_dmu_u = []
            dL_dL_u = []
            dL_dKmm = []
            dL_dKmn = []
            dL_dKdiag = []
            posteriors = []
            for q in range(Q):
                (dL_dmu_q, dL_dL_q, posterior_q, dL_dKqq, dL_dKdq,
                 dL_dKdiag_q) = self.calculate_gradients(q_U=q_U,
                                                         p_U=p_U,
                                                         q_F=q_F,
                                                         VE_dm=VE_dm,
                                                         VE_dv=VE_dv,
                                                         Ntask=Ntask,
                                                         M=M,
                                                         Q=Q,
                                                         D=D,
                                                         f_index=f_index,
                                                         d_index=d_index,
                                                         q=q)
                dL_dmu_u.append(dL_dmu_q)
                dL_dL_u.append(dL_dL_q)
                dL_dKmm.append(dL_dKqq)
                dL_dKmn.append(dL_dKdq)
                dL_dKdiag.append(dL_dKdiag_q)
                posteriors.append(posterior_q)

            gradients = {
                'dL_dmu_u': dL_dmu_u,
                'dL_dL_u': dL_dL_u,
                'dL_dKmm': dL_dKmm,
                'dL_dKmn': dL_dKmn,
                'dL_dKdiag': dL_dKdiag
            }

            return log_marginal, gradients, posteriors, posteriors_F
    def posteriors_F(self, Xnew, which_out=None, kern_list=None):
        # This function returns all the q(f*) associated to each output (It is the )
        # We assume that Xnew can be a list of length equal to the number of likelihoods defined for the HetMOGP
        # or Xnew can be a numpy array so that we can replicate it per each outout

        class empty_posterior():
            def __init__(self):
                self.mean = np.array([0.0])
                self.covariance = np.array([0.0])

        fake_posterior = empty_posterior()

        if kern_list is None:
            kern_list = self.kern_list

        if isinstance(Xnew, list):
            Xmulti_all_new = Xnew
        else:
            Xmulti_all_new = []
            for i in range(self.num_output_funcs):
                Xmulti_all_new.append(Xnew.copy())

        M = self.Z.shape[0]
        Q = len(self.kern_list)
        D = self.likelihood.num_output_functions(self.Y_metadata)
        Kuu, Luu, Kuui = util.latent_funs_cov(self.Z, self.kern_list)
        p_U = pu(Kuu=Kuu, Luu=Luu, Kuui=Kuui)
        q_U = qu(mu_u=self.q_u_means, chols_u=self.q_u_chols)

        # for every latent function f_d calculate q(f_d) and keep it as q(F):
        posteriors_F = []
        f_index = self.Y_metadata['function_index'].flatten()
        d_index = self.Y_metadata['d_index'].flatten()

        if which_out is None:
            indix_aux = f_index.copy()
        else:
            which_out = np.array(which_out)
            indix_aux = -1 * np.ones_like(f_index)
            for i in range(which_out.shape[0]):
                posix = np.where(f_index == which_out[i])
                indix_aux[posix] = f_index[posix].copy()

        for d in range(D):
            if f_index[d] == indix_aux[d]:
                Xtask = Xmulti_all_new[f_index[d]]
                q_fd = self.inference_method.calculate_q_f(
                    X=Xtask,
                    Z=self.Z,
                    q_U=q_U,
                    p_U=p_U,
                    kern_list=self.kern_list,
                    B=self.B_list,
                    M=M,
                    N=Xtask.shape[0],
                    Q=Q,
                    D=D,
                    d=d)
                # Posterior objects for output functions (used in prediction)
                posterior_fd = Posterior(mean=q_fd.m_fd.copy(),
                                         cov=q_fd.S_fd.copy(),
                                         K=util.function_covariance(
                                             X=Xtask,
                                             B=self.B_list,
                                             kernel_list=self.kern_list,
                                             d=d),
                                         prior_mean=np.zeros(q_fd.m_fd.shape))
                posteriors_F.append(posterior_fd)
            else:
                #posteriors_F.append(fake_posterior)
                posteriors_F.append([])
        return posteriors_F