def calculate_q_f(self, X, Z, q_U, p_U, kern_list, B, M, N, Q, D, d): """ Calculates the mean and variance of q(f_d) as Equation: E_q(U)\{p(f_d|U)\} """ # Algebra for q(u): m_u = q_U.mu_u.copy() L_u = choleskies.flat_to_triang(q_U.chols_u.copy()) S_u = np.empty((Q, M, M)) [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)] # Algebra for p(f_d|u): Kfdu = util.cross_covariance(X, Z, B, kern_list, d) Kuu = p_U.Kuu.copy() Luu = p_U.Luu.copy() Kuui = p_U.Kuui.copy() Kff = util.function_covariance(X, B, kern_list, d) Kff_diag = np.diag(Kff) # Algebra for q(f_d) = E_{q(u)}[p(f_d|u)] Afdu = np.empty((Q, N, M)) #Afdu = K_{fduq}Ki_{uquq} m_fd = np.zeros((N, 1)) v_fd = np.zeros((N, 1)) S_fd = np.zeros((N, N)) v_fd += Kff_diag[:, None] S_fd += Kff for q in range(Q): # Expectation part R, _ = linalg.dpotrs(np.asfortranarray(Luu[q, :, :]), Kfdu[:, q * M:(q * M) + M].T) Afdu[q, :, :] = R.T m_fd += np.dot(Afdu[q, :, :], m_u[:, q, None]) #exp tmp = dtrmm(alpha=1.0, a=L_u[q, :, :].T, b=R, lower=0, trans_a=0) v_fd += np.sum(np.square(tmp), 0)[:, None] - np.sum( R * Kfdu[:, q * M:(q * M) + M].T, 0)[:, None] #exp S_fd += np.dot(np.dot(R.T, S_u[q, :, :]), R) - np.dot( Kfdu[:, q * M:(q * M) + M], R) if (v_fd < 0).any(): print('v negative!') q_fd = qfd(m_fd=m_fd, v_fd=v_fd, Kfdu=Kfdu, Afdu=Afdu, S_fd=S_fd) return q_fd
def inference(self, q_u_means, q_u_chols, X, Y, Z, kern_list, likelihood, B_list, Y_metadata, KL_scale=1.0, batch_scale=None, predictive=False): M = Z.shape[0] T = len(Y) if batch_scale is None: batch_scale = [1.0] * T Ntask = [] [Ntask.append(Y[t].shape[0]) for t in range(T)] Q = len(kern_list) D = likelihood.num_output_functions(Y_metadata) Kuu, Luu, Kuui = util.latent_funs_cov(Z, kern_list) p_U = pu(Kuu=Kuu, Luu=Luu, Kuui=Kuui) q_U = qu(mu_u=q_u_means, chols_u=q_u_chols) # for every latent function f_d calculate q(f_d) and keep it as q(F): q_F = [] posteriors_F = [] f_index = Y_metadata['function_index'].flatten() d_index = Y_metadata['d_index'].flatten() for d in range(D): Xtask = X[f_index[d]] q_fd = self.calculate_q_f(X=Xtask, Z=Z, q_U=q_U, p_U=p_U, kern_list=kern_list, B=B_list, M=M, N=Xtask.shape[0], Q=Q, D=D, d=d) # Posterior objects for output functions (used in prediction) posterior_fd = Posterior(mean=q_fd.m_fd.copy(), cov=q_fd.S_fd.copy(), K=util.function_covariance( X=Xtask, B=B_list, kernel_list=kern_list, d=d), prior_mean=np.zeros(q_fd.m_fd.shape)) posteriors_F.append(posterior_fd) q_F.append(q_fd) mu_F = [] v_F = [] for t in range(T): mu_F_task = np.empty((X[t].shape[0], 1)) v_F_task = np.empty((X[t].shape[0], 1)) for d, q_fd in enumerate(q_F): if f_index[d] == t: mu_F_task = np.hstack((mu_F_task, q_fd.m_fd)) v_F_task = np.hstack((v_F_task, q_fd.v_fd)) mu_F.append(mu_F_task[:, 1:]) v_F.append(v_F_task[:, 1:]) # posterior_Fnew for predictive if predictive: return posteriors_F # inference for rest of cases else: # Variational Expectations VE = likelihood.var_exp(Y, mu_F, v_F, Y_metadata) VE_dm, VE_dv = likelihood.var_exp_derivatives( Y, mu_F, v_F, Y_metadata) for t in range(T): VE[t] = VE[t] * batch_scale[t] VE_dm[t] = VE_dm[t] * batch_scale[t] VE_dv[t] = VE_dv[t] * batch_scale[t] # KL Divergence KL = self.calculate_KL(q_U=q_U, p_U=p_U, M=M, Q=Q) # Log Marginal log(p(Y)) F = 0 for t in range(T): F += VE[t].sum() log_marginal = F - KL # Gradients and Posteriors dL_dmu_u = [] dL_dL_u = [] dL_dKmm = [] dL_dKmn = [] dL_dKdiag = [] posteriors = [] for q in range(Q): (dL_dmu_q, dL_dL_q, posterior_q, dL_dKqq, dL_dKdq, dL_dKdiag_q) = self.calculate_gradients(q_U=q_U, p_U=p_U, q_F=q_F, VE_dm=VE_dm, VE_dv=VE_dv, Ntask=Ntask, M=M, Q=Q, D=D, f_index=f_index, d_index=d_index, q=q) dL_dmu_u.append(dL_dmu_q) dL_dL_u.append(dL_dL_q) dL_dKmm.append(dL_dKqq) dL_dKmn.append(dL_dKdq) dL_dKdiag.append(dL_dKdiag_q) posteriors.append(posterior_q) gradients = { 'dL_dmu_u': dL_dmu_u, 'dL_dL_u': dL_dL_u, 'dL_dKmm': dL_dKmm, 'dL_dKmn': dL_dKmn, 'dL_dKdiag': dL_dKdiag } return log_marginal, gradients, posteriors, posteriors_F
def posteriors_F(self, Xnew, which_out=None, kern_list=None): # This function returns all the q(f*) associated to each output (It is the ) # We assume that Xnew can be a list of length equal to the number of likelihoods defined for the HetMOGP # or Xnew can be a numpy array so that we can replicate it per each outout class empty_posterior(): def __init__(self): self.mean = np.array([0.0]) self.covariance = np.array([0.0]) fake_posterior = empty_posterior() if kern_list is None: kern_list = self.kern_list if isinstance(Xnew, list): Xmulti_all_new = Xnew else: Xmulti_all_new = [] for i in range(self.num_output_funcs): Xmulti_all_new.append(Xnew.copy()) M = self.Z.shape[0] Q = len(self.kern_list) D = self.likelihood.num_output_functions(self.Y_metadata) Kuu, Luu, Kuui = util.latent_funs_cov(self.Z, self.kern_list) p_U = pu(Kuu=Kuu, Luu=Luu, Kuui=Kuui) q_U = qu(mu_u=self.q_u_means, chols_u=self.q_u_chols) # for every latent function f_d calculate q(f_d) and keep it as q(F): posteriors_F = [] f_index = self.Y_metadata['function_index'].flatten() d_index = self.Y_metadata['d_index'].flatten() if which_out is None: indix_aux = f_index.copy() else: which_out = np.array(which_out) indix_aux = -1 * np.ones_like(f_index) for i in range(which_out.shape[0]): posix = np.where(f_index == which_out[i]) indix_aux[posix] = f_index[posix].copy() for d in range(D): if f_index[d] == indix_aux[d]: Xtask = Xmulti_all_new[f_index[d]] q_fd = self.inference_method.calculate_q_f( X=Xtask, Z=self.Z, q_U=q_U, p_U=p_U, kern_list=self.kern_list, B=self.B_list, M=M, N=Xtask.shape[0], Q=Q, D=D, d=d) # Posterior objects for output functions (used in prediction) posterior_fd = Posterior(mean=q_fd.m_fd.copy(), cov=q_fd.S_fd.copy(), K=util.function_covariance( X=Xtask, B=self.B_list, kernel_list=self.kern_list, d=d), prior_mean=np.zeros(q_fd.m_fd.shape)) posteriors_F.append(posterior_fd) else: #posteriors_F.append(fake_posterior) posteriors_F.append([]) return posteriors_F