def calculateELBO_k(self, k): """ Method to calculate ELBO per factor - required for optimization in Sigma node """ # Collect parameters and expectations of current node Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations() Qmean, Qcov = Qpar['mean'], Qpar['cov'] QE = Qexp['E'] if 'Sigma' in self.markov_blanket: Sigma = self.markov_blanket['Sigma'].getInverseTerms() p_cov_inv = Sigma['inv'] p_cov_inv_logdet = Sigma['inv_logdet'] else: p_cov = self.P.params['cov'] p_cov_inv = self.p_cov_inv p_cov_inv_logdet = np.linalg.slogdet(self.p_cov_inv)[1] # compute term from the precision factor in front of the Gaussian term1 = 0.5 * p_cov_inv_logdet[k] # compute term from the exponential in the Gaussian term2 = -0.5 * np.trace(gpu_utils.dot(p_cov_inv[k,:,:], Qcov[k,:,:])) term3 = -0.5 * gpu_utils.dot(QE[:,k].transpose(), gpu_utils.dot(p_cov_inv[k,:,:], QE[:,k])) # tmp1 = -0.5 * (np.trace(gpu_utils.dot(p_cov_inv[k,:,:], Qcov[k,:,:])) + gpu_utils.dot(QE[:,k].transpose(), gpu_utils.dot(p_cov_inv[k,:,:], QE[:,k])))# expectation of quadratic form # tmp2 = 0.5 * p_cov_inv_logdet[k] # lb_p = tmp1 + tmp2 lb_p = term1 + term2 + term3 lb_q = -0.5 * np.linalg.slogdet(Qcov[k,:,:])[1] # term -N*(log(2* np.pi)) cancels out between p and q term; -N/2 is added below return lb_p - lb_q
def calculateELBO_k(self, k): # Collect parameters and expectations of current node Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations() Qmean, Qcov = Qpar['mean'], Qpar['cov'] QE = Qexp['E'] assert "Sigma" in self.markov_blanket, "Sigma not found in Markov blanket of U node" Sigma = self.markov_blanket['Sigma'].getExpectations() p_cov = Sigma['cov'] p_cov_inv = Sigma['inv'] p_cov_inv_logdet = Sigma['inv_logdet'] # compute term from the exponential in the Gaussian tmp1 = -0.5 * ( np.trace(gpu_utils.dot(p_cov_inv[k, :, :], Qcov[k, :, :])) + gpu_utils.dot(QE[:, k].transpose(), gpu_utils.dot(p_cov_inv[k, :, :], QE[:, k])) ) # expectation of quadratic form # compute term from the precision factor in front of the Gaussian tmp2 = 0.5 * p_cov_inv_logdet[k] lb_p = tmp1 + tmp2 lb_q = -0.5 * np.linalg.slogdet(Qcov[k, :, :])[1] # term -N*(log(2* np.pi)) cancels out between p and q term; -N/2 is added below return lb_p - lb_q
def _updateParameters(self, Y, Z, tau, Mu, Alpha, Qmean, Qvar, coeff, ro): for k in range(self.dim[1]): foo = coeff * np.dot(Z["E2"][:, k], tau) bar_tmp1 = gpu_utils.array(Z["E"][:, k]) bar_tmp2 = -gpu_utils.dot( gpu_utils.array(Z["E"][:, s.arange(self.dim[1]) != k]), gpu_utils.array(Qmean[:, s.arange(self.dim[1]) != k].T)) bar_tmp2 += gpu_utils.array(Y) bar_tmp2 *= gpu_utils.array(tau) bar = coeff * gpu_utils.asnumpy(gpu_utils.dot(bar_tmp1, bar_tmp2)) # stochastic update of W Qvar[:, k] *= (1 - ro) Qvar[:, k] += ro / (Alpha[:, k] + foo) # NOTE Do not use "Qvar" in the update like we used to because this # does not hold for stochastic because of the ro weighting Qmean[:, k] *= (1 - ro) Qmean[:, k] += ro * (1 / (Alpha[:, k] + foo)) * (bar + Alpha[:, k] * Mu[:, k])
def calc_Sigma_element(self, par, lidx, k, id1, id2): """ Method to calculated elements of Sigma matrix in hyperparameters Only used for debugging purposes """ self.zeta[k] = par[0] # set lengthscale parameter self.Kc.set_gridix(lidx, k) # if required set group parameters if self.model_groups: sigma = par[1] x = par[2:] assert len(x) == self.Kg.rank * self.G, \ "Length of x incorrect: Is %s, should be %s * %s" % (len(x), self.Kg.rank, self.G) x = x.reshape(self.Kg.rank, self.G) self.Kg.set_parameters(x=x, sigma=sigma, k=k, spectral_decomp=self.kronecker) if self.kronecker: Vc, Dc = self.Kc.get_kernel_components_k(k) Kc = gpu_utils.dot(gpu_utils.dot(Vc, np.diag(Dc)), Vc.transpose()) Vg, Dg = self.Kg.get_kernel_components_k(k) Kg = gpu_utils.dot(gpu_utils.dot(Vg, np.diag(Dg)), Vg.transpose()) val = (1-self.zeta[k]) * np.kron(Kg, Kc) + self.zeta[k] * np.eye(self.Nu) else: Kc = self.Kc.Kmat[self.Kc.get_best_lidx(k),:,:] Kg = self.Kg.Kmat[k,:,:] if self.model_groups: val = (1-self.zeta[k]) * Kc[ self.covidx, :][:,self.covidx] * Kg[self.groupsidx, :][:, self.groupsidx] + self.zeta[k] *np.eye(self.Nu) else: val = (1-self.zeta[k]) * Kc + self.zeta[k] *np.eye(self.Nu) return val[id1,id2]
def _updateParameters(self, Y, W, Z, tau, Qmean, Qcov, SigmaUZ, p_cov_inv, mask): """ Hidden method to compute parameter updates """ M = len(Y) N = Y[0].shape[0] K = self.dim[1] # Masking for m in range(M): tau[m][mask[m]] = 0. weights = [1] * M if self.weight_views and M > 1: total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum() weights = np.asarray( [total_w / (M * Y[m].shape[1]) for m in range(M)]) weights = weights / weights.sum() * M # Precompute terms to speed up GPU computation foo = gpu_utils.array(s.zeros((N, K))) precomputed_bar = gpu_utils.array(s.zeros((N, K))) for m in range(M): tau_gpu = gpu_utils.array(tau[m]) foo += weights[m] * gpu_utils.dot(tau_gpu, gpu_utils.array(W[m]["E2"])) bar_tmp1 = gpu_utils.array(W[m]["E"]) bar_tmp2 = tau_gpu * gpu_utils.array(Y[m]) precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) foo = gpu_utils.asnumpy(foo) # Calculate variational updates - term for mean for k in range(K): bar = gpu_utils.array(s.zeros((N, ))) tmp_cp1 = gpu_utils.array(Z['E'][:, s.arange(K) != k]) for m in range(M): tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T) bar_tmp1 = gpu_utils.array(W[m]["E"][:, k]) bar_tmp2 = gpu_utils.array( tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2)) bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) bar += precomputed_bar[:, k] bar = gpu_utils.asnumpy(bar) # note: no Alpha scaling required here compared to Z nodes as done in the updateParameters function Mcross = gpu_utils.dot(p_cov_inv[k, :, :], SigmaUZ[k, :, :]) Mtmp = gpu_utils.dot( Mcross, gpu_utils.dot(np.diag(foo[:, k]), Mcross.transpose())) Qcov[k, :, :] = np.linalg.inv(Mtmp + p_cov_inv[k, :, :]) Qmean[:, k] = gpu_utils.dot( Qcov[k, :, :], gpu_utils.dot( gpu_utils.dot(p_cov_inv[k, :, :], SigmaUZ[k, :, :]), bar)) return {'Qmean': Qmean, 'Qcov': Qcov}
def _updateParameters(self, Y, W, tau, Mu, Alpha, Qmean, Qvar, mask): """ Hidden method to compute parameter updates """ # Speed analysis: the pre-computation part does not benefit from GPU, but the next updates doe N = Y[0].shape[0] # this is different from self.N for minibatch M = len(Y) K = self.dim[1] # Masking for m in range(M): tau[m][mask[m]] = 0. weights = [1] * M if self.weight_views and M > 1: total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum() weights = np.asarray( [total_w / (M * Y[m].shape[1]) for m in range(M)]) weights = weights / weights.sum() * M # weights = [(total_w-Y[m].shape[1])/total_w * M / (M-1) for m in range(M)] # Precompute terms to speed up GPU computation foo = gpu_utils.array(s.zeros((N, K))) precomputed_bar = gpu_utils.array(s.zeros((N, K))) for m in range(M): tau_gpu = gpu_utils.array(tau[m]) foo += weights[m] * gpu_utils.dot(tau_gpu, gpu_utils.array(W[m]["E2"])) bar_tmp1 = gpu_utils.array(W[m]["E"]) bar_tmp2 = tau_gpu * gpu_utils.array(Y[m]) precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) foo = gpu_utils.asnumpy(foo) # Calculate variational updates for k in range(K): bar = gpu_utils.array(s.zeros((N, ))) tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k]) for m in range(M): tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T) bar_tmp1 = gpu_utils.array(W[m]["E"][:, k]) bar_tmp2 = gpu_utils.array( tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2)) bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) bar += precomputed_bar[:, k] bar = gpu_utils.asnumpy(bar) Qvar[:, k] = 1. / (Alpha[:, k] + foo[:, k]) Qmean[:, k] = Qvar[:, k] * (bar + Alpha[:, k] * Mu[:, k]) # Save updated parameters of the Q distribution return {'Qmean': Qmean, 'Qvar': Qvar}
def calcELBOgrad_k(self, k, gradSigma): """ Method to calculate ELBO gradients per factor - required for optimization in Sigma node """ Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations() Qmean, Qcov = Qpar['mean'], Qpar['cov'] QE = Qexp['E'] assert "Sigma" in self.markov_blanket, "Sigma not found in Markov blanket of U node" Sigma = self.markov_blanket['Sigma'].getExpectations() p_cov = Sigma['cov'] p_cov_inv = Sigma['inv'] p_cov_inv_logdet = Sigma['inv_logdet'] term1 = -0.5 * np.trace(gpu_utils.dot(gradSigma, p_cov_inv[k, :, :])) term2 = 0.5 * np.trace( gpu_utils.dot( p_cov_inv[k, :, :], gpu_utils.dot(gradSigma, gpu_utils.dot(p_cov_inv[k, :, :], Qcov[k, :, :])))) term3 = 0.5 * gpu_utils.dot( QE[:, k].transpose(), gpu_utils.dot( p_cov_inv[k, :, :], gpu_utils.dot(gradSigma, gpu_utils.dot(p_cov_inv[k, :, :], QE[:, k])))) return term1 + term2 + term3
def calculateELBO_k(self, k): """ Method to calulcate the ELBO term for the k-th factor (required for the grid search on the optimal lengthscale in sigma per factor) """ # Collect parameters and expectations of current node Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations() Qmean, Qvar = Qpar['mean'], Qpar['var'] QE, QE2 = Qexp['E'], Qexp['E2'] if 'Sigma' in self.markov_blanket: Sigma = self.markov_blanket['Sigma'].getExpectations() p_cov = Sigma['cov'] p_cov_inv = Sigma['inv'] p_cov_inv_diag = Sigma['inv_diag'] p_cov_inv_logdet = Sigma['inv_logdet'] else: p_cov = self.P.params['cov'] p_cov_inv = self.p_cov_inv p_cov_inv_diag = self.p_cov_inv_diag p_cov_inv_logdet = np.linalg.slogdet(self.p_cov_inv)[1] if 'AlphaZ' in self.markov_blanket: Alpha = self.markov_blanket['AlphaZ'].getExpectations(expand=True) else: Alpha = dict() Alpha['E'] = s.ones((self.N, self.K)) * 1. Alpha['lnE'] = s.zeros((self.N, self.K)) # compute term from the exponential in the Gaussian p_cov_inv_k_with_zerodiag = p_cov_inv[ k, :, :] - p_cov_inv_diag[k, :] * s.eye(self.N) scaled_inv_with_zerodiag = gpu_utils.dot( gpu_utils.dot(np.diag(np.sqrt(Alpha['E'][:, k])), p_cov_inv_k_with_zerodiag[:, :]), np.diag(np.sqrt(Alpha['E'][:, k]))) tmp1 = -0.5 * QE[:, k].transpose().dot( (scaled_inv_with_zerodiag.dot(QE[:, k]))) - 0.5 * ( (Alpha['E'][:, k] * p_cov_inv_diag[k, :]).dot(QE2[:, k])) # compute term from the precision factor in front of the Gaussian tmp2 = 0.5 * p_cov_inv_logdet[k] + 0.5 * Alpha["lnE"][:, k].sum() lb_p = tmp1 + tmp2 lb_q = -0.5 * s.log(Qvar[:, k]).sum( ) # term -N*K*(log(2* np.pi)) cancels out between p and q term; -N/2 is added below return lb_p - lb_q
def _updateParameters(self, Y, W, tau, Qmean, Qcov, p_cov_inv, mask): """ Hidden method to compute parameter updates """ N = Y[0].shape[0] # this is different from self.N for minibatch M = len(Y) K = self.dim[1] # Masking for m in range(M): tau[m][mask[m]] = 0. weights = [1] * M if self.weight_views and M > 1: total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum() weights = np.asarray([total_w / (M * Y[m].shape[1]) for m in range(M)]) weights = weights / weights.sum() * M # Precompute terms to speed up GPU computation foo = gpu_utils.array(s.zeros((N,K))) precomputed_bar = gpu_utils.array(s.zeros((N,K))) for m in range(M): tau_gpu = gpu_utils.array(tau[m]) foo += weights[m] * gpu_utils.dot(tau_gpu, gpu_utils.array(W[m]["E2"])) bar_tmp1 = gpu_utils.array(W[m]["E"]) bar_tmp2 = tau_gpu * gpu_utils.array(Y[m]) precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) foo = gpu_utils.asnumpy(foo) # Calculate variational updates for k in range(K): bar = gpu_utils.array(s.zeros((N,))) tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k]) for m in range(M): tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T) bar_tmp1 = gpu_utils.array(W[m]["E"][:,k]) bar_tmp2 = gpu_utils.array(tau[m])*(-gpu_utils.dot(tmp_cp1, tmp_cp2)) bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) bar += precomputed_bar[:,k] bar = gpu_utils.asnumpy(bar) Qcov[k,:,:] = np.linalg.inv(np.eye(N) * foo[:,k] + p_cov_inv[k,:,:]) Qmean[:, k] = gpu_utils.dot(Qcov[k,:,:], bar) # Save updated parameters of the Q distribution return {'Qmean': Qmean, 'Qcov':Qcov}
def calcELBOgrad_k(self, k, gradSigma): """ Method to calculate ELBO gradients per factor - required for optimization in Sigma node """ Qpar, Qexp = self.Q.getParameters(), self.Q.getExpectations() Qmean, Qcov = Qpar['mean'], Qpar['cov'] QE = Qexp['E'] if 'Sigma' in self.markov_blanket: Sigma = self.markov_blanket['Sigma'].getInverseTerms() p_cov_inv = Sigma['inv'] p_cov_inv_logdet = Sigma['inv_logdet'] else: p_cov = self.P.params['cov'] p_cov_inv = self.p_cov_inv p_cov_inv_logdet = np.linalg.slogdet(self.p_cov_inv)[1] term1 = - 0.5 * np.trace(gpu_utils.dot(gradSigma, p_cov_inv[k, :,:])) term2 = 0.5 * np.trace(gpu_utils.dot(p_cov_inv[k, :,:], gpu_utils.dot(gradSigma, gpu_utils.dot(p_cov_inv[k, :,:], Qcov[k, :, :])))) term3 = 0.5 * gpu_utils.dot(QE[:, k].transpose(), gpu_utils.dot(p_cov_inv[k, :,:], gpu_utils.dot(gradSigma, gpu_utils.dot(p_cov_inv[k, :,:], QE[:,k])))) return term1 + term2 + term3
def _updateParameters(self, Y, W, tau, Alpha, Qmean, Qvar, p_cov_inv, p_cov_inv_diag, mask): """ Hidden method to compute parameter updates """ N = Y[0].shape[0] # this is different from self.N for minibatch M = len(Y) K = self.dim[1] # Masking for m in range(M): tau[m][mask[m]] = 0. weights = [1] * M if self.weight_views and M > 1: total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum() weights = np.asarray( [total_w / (M * Y[m].shape[1]) for m in range(M)]) weights = weights / weights.sum() * M # Precompute terms to speed up GPU computation foo = gpu_utils.array(s.zeros((N, K))) precomputed_bar = gpu_utils.array(s.zeros((N, K))) for m in range(M): tau_gpu = gpu_utils.array(tau[m]) foo += weights[m] * gpu_utils.dot(tau_gpu, gpu_utils.array(W[m]["E2"])) bar_tmp1 = gpu_utils.array(W[m]["E"]) bar_tmp2 = tau_gpu * gpu_utils.array(Y[m]) precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) foo = gpu_utils.asnumpy(foo) # Calculate variational updates for k in range(K): bar = gpu_utils.array(s.zeros((N, ))) tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k]) for m in range(M): tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T) bar_tmp1 = gpu_utils.array(W[m]["E"][:, k]) bar_tmp2 = gpu_utils.array( tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2)) bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) bar += precomputed_bar[:, k] bar = gpu_utils.asnumpy(bar) p_cov_inv_k_with_zerodiag = p_cov_inv[ k, :, :] - p_cov_inv_diag[k, :] * s.eye(N) scaled_inv_with_zerodiag = gpu_utils.dot( gpu_utils.dot(np.diag(np.sqrt(Alpha[:, k])), p_cov_inv_k_with_zerodiag), np.diag(np.sqrt(Alpha[:, k]))) Qvar[:, k] = 1. / (Alpha[:, k] * p_cov_inv_diag[k, :].transpose() + foo[:, k]) Qmean[:, k] = Qvar[:, k] * (bar - scaled_inv_with_zerodiag.dot( Qmean[:, k])) # can take all samples here as zeros on diagonal # Save updated parameters of the Q distribution return {'Qmean': Qmean, 'Qvar': Qvar}
def calculateELBO(self): # Compute Lower Bound using the Bernoulli likelihood with observed data Z = self.markov_blanket["Z"].getExpectation() W = self.markov_blanket["W"].getExpectation() mask = self.getMask() # tmp = s.dot(Z,W.T) tmp = gpu_utils.asnumpy( gpu_utils.dot( gpu_utils.array(Z),gpu_utils.array(W).T ) ) lb = self.obs*tmp - s.log(1.+s.exp(tmp)) lb[mask] = 0. return lb.sum()
def calc_sigma_terms_k(self, k, only_inverse = False): """ Method to compute the inverse of sigma and its log determinant based on the spectral decomposition of the kernel matrices for a given factor k """ if self.zeta[k] == 1: self.Sigma_inv[k, :, :] = np.eye(self.Nu) self.Sigma_inv_logdet[k] = 1 self.Sigma[k, :, :] = np.eye(self.N) else: if self.kronecker: components = self.get_components(k) term1 = np.kron(components['Vg'], components['Vc']) term2diag = 1/ (np.repeat(components['Dg'], self.C) * np.tile(components['Dc'], self.G) + self.zeta[k] / (1-self.zeta[k])) term3 = np.kron(components['Vg'].transpose(), components['Vc'].transpose()) self.Sigma_inv[k, :, :] = 1 / (1 - self.zeta[k]) * gpu_utils.dot(gpu_utils.dot(term1, np.diag(term2diag)), term3) self.Sigma_inv_logdet[k] = - self.Nu * s.log(1 - self.zeta[k]) + s.log(term2diag).sum() if not only_inverse: components = self.get_components(k) term1 = np.kron(components['Vg'], components['Vc']) term2diag = np.repeat(components['Dg'], self.C) * np.tile(components['Dc'], self.G) + \ self.zeta[k] / (1 - self.zeta[k]) term3 = np.kron(components['Vg'].transpose(), components['Vc'].transpose()) self.Sigma[k, :, :] = (1 - self.zeta[k]) * gpu_utils.dot(term1, gpu_utils.dot(np.diag(term2diag), term3)) else: if self.model_groups: Sigma = (1 - self.zeta[k]) * self.Kc.Kmat[self.Kc.get_best_lidx(k), self.covidx,:][:, self.covidx] * self.Kg.Kmat[k,self.groupsidx,:][:,self.groupsidx] + self.zeta[k] * np.eye(self.N) else: Sigma = (1 - self.zeta[k]) * self.Kc.Kmat[self.Kc.get_best_lidx(k), :, :] + self.zeta[k] * np.eye(self.N) self.Sigma_inv[k, :, :] = np.linalg.inv(Sigma) self.Sigma_inv_logdet[k] = np.linalg.slogdet(self.Sigma_inv[k, :, :])[1] if not only_inverse: self.Sigma[k, :, :] = Sigma
def _updateParameters(self, Y, W, WW, Z, ZZ, Pa, Pb, mask, ro, groups): """ Hidden method to compute parameter updates """ Q = self.Q.getParameters() Qa, Qb = Q['a'], Q['b'] # Move matrices to the GPU Y_gpu = gpu_utils.array(Y) Z_gpu = gpu_utils.array(Z) W_gpu = gpu_utils.array(W).T # Calculate terms for the update (SPEED EFFICIENT, MEMORY INEFFICIENT FOR GPU) # ZW = Z_gpu.dot(W_gpu) # tmp = gpu_utils.asnumpy( gpu_utils.square(Y_gpu) \ # + gpu_utils.array(ZZ).dot(gpu_utils.array(WW.T)) \ # - gpu_utils.dot(gpu_utils.square(Z_gpu),gpu_utils.square(W_gpu)) + gpu_utils.square(ZW) \ # - 2*ZW*Y_gpu ) # tmp[mask] = 0. # Calculate terms for the update (SPEED INEFFICIENT, MEMORY EFFICIENT FOR GPU) tmp = gpu_utils.asnumpy( gpu_utils.square(Y_gpu) \ + gpu_utils.array(ZZ).dot(gpu_utils.array(WW.T)) \ - gpu_utils.dot(gpu_utils.square(Z_gpu),gpu_utils.square(W_gpu)) + gpu_utils.square(Z_gpu.dot(W_gpu)) \ - 2*Z_gpu.dot(W_gpu)*Y_gpu ) tmp[mask] = 0. # Compute updates Qa *= (1 - ro) Qb *= (1 - ro) for g in range(self.n_groups): g_mask = (groups == g) n_batch = g_mask.sum() if n_batch == 0: continue # Calculate scaling coefficient for mini-batch coeff = self.n_per_group[g] / n_batch Qa[g, :] += ro * ( Pa[g, :] + 0.5 * coeff * (mask[g_mask, :].shape[0] - mask[g_mask, :].sum(axis=0))) Qb[g, :] += ro * (Pb[g, :] + 0.5 * coeff * tmp[g_mask, :].sum(axis=0)) return Qa, Qb
def updateParameters(self, ix=None, ro=None): Z = self.markov_blanket["Z"].getExpectation() W = self.markov_blanket["W"].getExpectation() # self.params["zeta"] = s.dot(Z,W.T) self.params["zeta"] = gpu_utils.dot(gpu_utils.array(Z), gpu_utils.array(W).T)
def _updateParameters(self, Y, Z, tau, mask, Alpha, Qmean_S1, Qvar_S1, Qvar_S0, Qtheta, SW, theta_lnE, theta_lnEInv, coeff, ro): # Mask matrices tau[mask] = 0. # Copy matrices to GPU # Y_gpu = gpu_utils.array(Y) tau_gpu = gpu_utils.array(tau) Z_gpu = gpu_utils.array(Z["E"]) ZZ_gpu = gpu_utils.array(Z["E2"]) # precompute terms # tauY_gpu = gpu_utils.array(tau*Y).T tauY_gpu = (tau_gpu * gpu_utils.array(Y)).T foo = gpu_utils.asnumpy(gpu_utils.dot(ZZ_gpu.T, tau_gpu).T) term4_tmp1 = gpu_utils.asnumpy(gpu_utils.dot(tauY_gpu, Z_gpu)) del tauY_gpu, ZZ_gpu # Update each latent variable in turn for k in range(self.dim[1]): # Compute terms term1 = (theta_lnE - theta_lnEInv)[:, k] term2 = 0.5 * s.log(Alpha[:, k]) term3 = 0.5 * coeff * s.log(foo[:, k] + Alpha[:, k]) # term4_tmp1 = gpu_utils.dot(tauYT, Zk_cp) # term4_tmp2_1 = gpu_utils.array(SW[:,s.arange(self.dim[1])!=k].T) # term4_tmp2_2 = (Z_gpu[:,k] * gpu_utils.array(Z['E'][:,s.arange(self.dim[1])!=k]).T).T # term4_tmp2 = (tau_gpu*gpu_utils.dot(term4_tmp2_2, term4_tmp2_1)).sum(axis=0) term4_tmp2 = gpu_utils.asnumpy((tau_gpu * gpu_utils.dot( (Z_gpu[:, k] * gpu_utils.array(Z['E'][:, s.arange(self.dim[1]) != k]).T).T, gpu_utils.array(SW[:, s.arange(self.dim[1]) != k].T))).sum( axis=0)) term4_tmp3 = foo[:, k] + Alpha[:, k] term4 = coeff * 0.5 * s.divide( s.square(term4_tmp1[:, k] - term4_tmp2), term4_tmp3) # Update S Qtheta[:, k] *= (1 - ro) Qtheta[:, k] += ro * (1. / (1. + s.exp(-(term1 + term2 - term3 + term4)))) # Update W tmp_var = 1. / term4_tmp3 Qvar_S1[:, k] *= (1 - ro) Qvar_S1[:, k] += ro * tmp_var Qmean_S1[:, k] *= (1 - ro) Qmean_S1[:, k] += ro * tmp_var * (term4_tmp1[:, k] - term4_tmp2) # Update Expectations for the next iteration SW[:, k] = Qtheta[:, k] * Qmean_S1[:, k] del term1, term2, term3, term4_tmp2, term4_tmp3 # update of Qvar_S0 Qvar_S0 *= (1 - ro) Qvar_S0 += ro / Alpha # Save updated parameters of the Q distribution self.Q.setParameters(mean_B0=s.zeros((self.dim[0], self.dim[1])), var_B0=Qvar_S0, mean_B1=Qmean_S1, var_B1=Qvar_S1, theta=Qtheta)
def _updateParameters(self, U, Sigma, GPparam, Qmean, Qvar, Y, W, tau, mask): """ Hidden method to compute parameter updates """ K = self.dim[1] N = Sigma['cov'].shape[1] M = len(Y) # Masking for m in range(M): tau[m][mask[m]] = 0. weights = [1] * M if self.weight_views and M > 1: total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum() weights = np.asarray( [total_w / (M * Y[m].shape[1]) for m in range(M)]) weights = weights / weights.sum() * M # for non-structured factors take the standard updates for Z, ignoring U # Precompute terms to speed up GPU computation (only required for non-structured updates) foo = gpu_utils.array(s.zeros((N, K))) precomputed_bar = gpu_utils.array(s.zeros((N, K))) for m in range(M): tau_gpu = gpu_utils.array(tau[m]) foo += weights[m] * gpu_utils.dot(tau_gpu, gpu_utils.array(W[m]["E2"])) bar_tmp1 = gpu_utils.array(W[m]["E"]) bar_tmp2 = tau_gpu * gpu_utils.array(Y[m]) precomputed_bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) foo = gpu_utils.asnumpy(foo) # Calculate updates for k in range(K): unstructured = (Sigma['cov'][k] == np.eye(N)).all( ) # TODO: Are there better ways to choose between sparse and non-sparse inference depending on factor smoothness? if unstructured: # updates according to q(z) without sparse inference bar = gpu_utils.array(s.zeros((N, ))) tmp_cp1 = gpu_utils.array(Qmean[:, s.arange(K) != k]) for m in range(M): tmp_cp2 = gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T) bar_tmp1 = gpu_utils.array(W[m]["E"][:, k]) bar_tmp2 = gpu_utils.array( tau[m]) * (-gpu_utils.dot(tmp_cp1, tmp_cp2)) bar += weights[m] * gpu_utils.dot(bar_tmp2, bar_tmp1) bar += precomputed_bar[:, k] bar = gpu_utils.asnumpy(bar) Qvar[:, k] = 1. / (1 + foo[:, k]) Qmean[:, k] = Qvar[:, k] * bar else: # updates according to p(z|u) SigmaZZ = Sigma['cov'][k] SigmaZU = SigmaZZ[:, self.idx_inducing] p_cov_inv = Sigma['inv'][k, :, :] mat = gpu_utils.dot(SigmaZU, p_cov_inv) Qmean[:, k] = gpu_utils.dot(mat, U['E'][:, k]) for n in range(N): exp_var = SigmaZZ[n, n] - gpu_utils.dot( gpu_utils.dot(SigmaZZ[n, self.idx_inducing], p_cov_inv), SigmaZZ[self.idx_inducing, n]) var_exp = gpu_utils.dot( gpu_utils.dot(mat[n, :], U['cov'][k, :, :]), mat[n, :].transpose()) Qvar[n, k] = exp_var + var_exp # Save updated parameters of the Q distribution return {'Qmean': Qmean, 'Qvar': Qvar}
def calc_gradient_Sigma(self, par, lidx, k): """ Method to calculate gradients of covariance matrix wrt to hyperparameters """ self.zeta[k] = par[0] self.Kc.set_gridix(lidx, k) # if required set group parameters if self.model_groups: sigma = par[1] x = par[2:] assert len(x) == self.Kg.rank * self.G, \ "Length of x incorrect: Is %s, should be %s * %s" % (len(x), self.Kg.rank, self.G) x = x.reshape(self.Kg.rank, self.G) self.Kg.set_parameters(x=x, sigma=sigma, k=k, spectral_decomp=self.kronecker) # set and recalculate group kernel (matrix and spectral decomposition if Kronecker # get kernel matrices if self.kronecker: # TODO avoid building the full matrix use V and D below Vc, Dc = self.Kc.get_kernel_components_k(k) Kc = gpu_utils.dot(gpu_utils.dot(Vc, np.diag(Dc)), Vc.transpose()) Vg, Dg = self.Kg.get_kernel_components_k(k) Kg = gpu_utils.dot(gpu_utils.dot(Vg, np.diag(Dg)), Vg.transpose()) # gradient wrt zeta gradient_Sigma_zeta = - np.kron(Kg, Kc) + np.eye(self.Nu) else: Kc = self.Kc.Kmat[self.Kc.get_best_lidx(k),:,:] Kg = self.Kg.Kmat[k,:,:] # gradient wrt zeta if self.model_groups: gradient_Sigma_zeta = - Kc[self.covidx, :][:,self.covidx] *\ Kg[self.groupsidx, :][:, self.groupsidx] +\ np.eye(self.Nu) else: gradient_Sigma_zeta = - Kc + np.eye(self.Nu) if self.model_groups: # gradient wrt sigma Z = np.dot(x.transpose(), x) # diagonal can be neglected as set to 1, gradient 0 Gmat_unscaled = Z + sigma * np.eye(self.G) # this is Kg before scaled to correlation Gmat_unscaled_sqrt = np.sqrt(Gmat_unscaled) N = np.outer(np.diag(Gmat_unscaled_sqrt), np.diag(Gmat_unscaled_sqrt)) # N = np.array([[Gmat_unscaled_sqrt[g,g] * Gmat_unscaled_sqrt[h,h] for g in range(self.G)] for h in range(self.G)]) tmp = -0.5 * np.outer(np.diag(Gmat_unscaled_sqrt), 1/np.diag(Gmat_unscaled_sqrt)) AN_sigma = tmp + tmp.transpose() # AN_sigma = np.array([[-0.5 * Gmat_unscaled_sqrt[g,g] / Gmat_unscaled_sqrt[h,h] -0.5 * Gmat_unscaled_sqrt[h,h] / Gmat_unscaled_sqrt[g,g] for g in range(self.G)] for h in range(self.G)]) N2 = N**2 # N2 = np.array([[Gmat_unscaled[g,g] * Gmat_unscaled[h,h] for g in range(self.G)]for h in range(self.G)]) # AZ_sigma = 0 diffGmat_sigma = (1-np.eye(self.G)) * Z * AN_sigma / N2 if self.kronecker: gradient_Sigma_sigma = (1 - self.zeta[k]) * np.kron(diffGmat_sigma, Kc) else: gradient_Sigma_sigma = (1 - self.zeta[k]) *\ diffGmat_sigma[self.groupsidx, :][:,self.groupsidx] \ * Kc[self.covidx, :][:, self.covidx] # gradient wrt x gradient_Sigma_x = [] for r in range(self.Kg.rank): drg = - 1/np.diag(Gmat_unscaled_sqrt) * x[r,:] for g in range(self.G): tmp = np.outer(np.diag(Gmat_unscaled_sqrt), drg[g] * np.eye(self.G)[g, :]) AN_x = tmp + tmp.transpose() tmp = np.outer(x[r, :], np.eye(self.G)[g, :]) AZ_x = tmp + tmp.transpose() diffGmat_x = (1-np.eye(self.G)) * (Z * AN_x + AZ_x * N) / N2 if self.kronecker: grad = (1 - self.zeta[k]) * np.kron(diffGmat_x, Kc) else: grad = (1 - self.zeta[k]) * diffGmat_x[self.groupsidx, :][:,self.groupsidx] * Kc[self.covidx, :][:,self.covidx] gradient_Sigma_x.append(grad) # drg = [[-0.5 * 1/ Gmat_unscaled_sqrt[g,g] * 2 * x[r, g] for r in range(self.Kg.rank)] for g in range(self.G)] # # below diagonal can be neglected as set to 1, gradient 0 # AN_x = [[np.outer(np.diag(Gmat_unscaled_sqrt), drg[g][r] * np.eye(self.G)[g, :]) + np.outer(np.diag(Gmat_unscaled_sqrt), drg[g][r] * np.eye(self.G)[g, :]).transpose() for r in range(self.Kg.rank)] for g in range(self.G)] # AZ_x = [[np.outer(x[r, :], np.eye(self.G)[g, :]) + np.outer(x[r, :],np.eye(self.G)[g,:]).transpose() for r in range(self.Kg.rank)] for g in range(self.G)] # diffGmat_x = [[(1-np.eye(self.G)) * (Z * AN_x[g][r] + AZ_x[g][r] * N) / N2 for r in range(self.Kg.rank)] for g in range(self.G)] # gradient_Sigma_x = [(1 - self.zeta[k]) * # diffGmat_x[g][r][self.groupsidx, :][:,self.groupsidx] * # Kc[self.covidx, :][:,self.covidx] # for r in range(self.Kg.rank) for g in range(self.G)] else: gradient_Sigma_sigma = None gradient_Sigma_x = None return gradient_Sigma_zeta, gradient_Sigma_sigma, gradient_Sigma_x
def _updateParameters(self, Y, W, tau, mask, Alpha, Qmean_T1, Qvar_T1, Qtheta, SZ, theta_lnE, theta_lnEInv): """ Hidden method to compute parameter updates """ # Mask matrices for m in range(len(Y)): tau[m][mask[m]] = 0. # Precompute terms to speed up GPU computation N = Qmean_T1.shape[0] M = len(Y) K = self.dim[1] weights = [1] * M if self.weight_views and M > 1: total_w = np.asarray([Y[m].shape[1] for m in range(M)]).sum() # weights = [(total_w-Y[m].shape[1])/total_w * M / (M-1) for m in range(M)] weights = np.asarray( [total_w / (M * Y[m].shape[1]) for m in range(M)]) weights = weights / weights.sum() * M # term4_tmp1 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ] # term4_tmp2 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ] # term4_tmp3 = [ gpu_utils.array(Alpha[:,k]) for k in range(self.dim[1]) ] # for m in range(M): # tau_gpu = gpu_utils.array(tau[m]) # Y_gpu = gpu_utils.array(Y[m]) # for k in range(K): # Wk_gpu = gpu_utils.array(W[m]["E"][:,k]) # WWk_gpu = gpu_utils.array(W[m]["E2"][:,k]) # term4_tmp1[k] += gpu_utils.dot(tau_gpu*Y_gpu, Wk_gpu) # term4_tmp3[k] += gpu_utils.dot(tau_gpu, WWk_gpu) # del tau_gpu, Y_gpu, Wk_gpu, WWk_gpu term4_tmp1 = gpu_utils.array(s.zeros((N, K)) + Alpha) term4_tmp2 = gpu_utils.array(s.zeros((N, K)) + Alpha) term4_tmp3 = gpu_utils.array(s.zeros((N, K)) + Alpha) for m in range(M): tau_gpu = gpu_utils.array(tau[m]) Y_gpu = gpu_utils.array(Y[m]) W_gpu = gpu_utils.array(W[m]["E"]) WW_gpu = gpu_utils.array(W[m]["E2"]) term4_tmp1 += weights[m] * gpu_utils.dot(tau_gpu * Y_gpu, W_gpu) term4_tmp3 += weights[m] * gpu_utils.dot(tau_gpu, WW_gpu) del tau_gpu, Y_gpu, W_gpu, WW_gpu # Update each latent variable in turn (notice that the update of Z[,k] depends on the other values of Z!) for k in range(K): term1 = (theta_lnE - theta_lnEInv)[:, k] term2 = 0.5 * s.log(Alpha[:, k]) for m in range(M): tau_gpu = gpu_utils.array(tau[m]) Wk_gpu = gpu_utils.array(W[m]["E"][:, k]) term4_tmp2_tmp = (tau_gpu * gpu_utils.dot( gpu_utils.array(SZ[:, s.arange(K) != k]), (Wk_gpu * gpu_utils.array(W[m]["E"][:, s.arange(K) != k].T)))).sum( axis=1) term4_tmp2[:, k] += weights[m] * term4_tmp2_tmp del tau_gpu, Wk_gpu, term4_tmp2_tmp # term4_tmp3[k] += Alpha[:,k] term3 = gpu_utils.asnumpy(0.5 * gpu_utils.log(term4_tmp3[:, k])) term4 = gpu_utils.asnumpy(0.5 * gpu_utils.divide( gpu_utils.square(term4_tmp1[:, k] - term4_tmp2[:, k]), term4_tmp3[:, k])) # Update S # NOTE there could be some precision issues in T --> loads of 1s in result Qtheta[:, k] = 1. / (1. + s.exp(-(term1 + term2 - term3 + term4))) Qtheta[:, k] = np.nan_to_num(Qtheta[:, k]) # Update Z Qvar_T1[:, k] = gpu_utils.asnumpy(1. / term4_tmp3[:, k]) Qmean_T1[:, k] = Qvar_T1[:, k] * gpu_utils.asnumpy(term4_tmp1[:, k] - term4_tmp2[:, k]) # Update Expectations for the next iteration SZ[:, k] = Qtheta[:, k] * Qmean_T1[:, k] return {'mean_B1': Qmean_T1, 'var_B1': Qvar_T1, 'theta': Qtheta}