def _compute_bound(self, R, logdet=None, inv=None, gradient=False, terms=False): """ Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R') Assume: :math:`p(\mathbf{X}) = \prod^M_{m=1} N(\mathbf{x}_m|0, \mathbf{\Lambda})` Assume unit innovation noise covariance. """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check # that. # Assume constant mean and precision matrix over plates.. if inv is None: invR = np.linalg.inv(R) else: invR = inv if logdet is None: logdetR = np.linalg.slogdet(R)[1] else: logdetR = logdet # Transform moments of X and A: Lambda_R_X0X0 = sum_to_plates(dot(self.Lambda, R, self.X0X0), (), plates_from=self.X_node.plates, ndim=2) R_XnXn = dot(R, self.XnXn) RA_XpXp_A = dot(R, self.A_XpXp_A) sumr = np.sum(R, axis=0) R_CovA_XpXp = sumr * self.CovA_XpXp # Compute entropy H(X) M = self.N * np.prod(self.X_node.plates) # total number of rotated vectors logH_X = random.gaussian_entropy(-2 * M * logdetR, 0) # Compute <log p(X)> yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T) yz = tracedot(dot(R, self.A_XpXn), R.T) + tracedot(self.Lambda_mu_X0, R.T) zz = tracedot(RA_XpXp_A, R.T) + np.einsum("...k,...k->...", R_CovA_XpXp, sumr) logp_X = random.gaussian_logpdf(yy, yz, zz, 0, 0) # Compute the bound if terms: bound = {self.X_node: logp_X + logH_X} else: bound = logp_X + logH_X if not gradient: return bound # Compute dH(X) dlogH_X = random.gaussian_entropy(-2 * M * invR.T, 0) # Compute d<log p(X)> dyy = 2 * (R_XnXn + Lambda_R_X0X0) dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0 dzz = 2 * (RA_XpXp_A + R_CovA_XpXp[None, :]) dlogp_X = random.gaussian_logpdf(dyy, dyz, dzz, 0, 0) if terms: d_bound = {self.X_node: dlogp_X + dlogH_X} else: d_bound = +dlogp_X + dlogH_X return (bound, d_bound)
def _compute_bound(self, R, logdet=None, inv=None, Q=None, gradient=False): """ Rotate q(X) and q(alpha). """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check that. # # Transform the distributions and moments # D1 = self.D1 D2 = self.D2 N = self.N D = D1 * D2 # Compute rotated second moment if Q is not None: X = np.reshape(self.X, (N,D1,D2)) CovX = np.reshape(self.CovX, (N,D1,D2,D1,D2)) # Rotate plates sumQ = np.sum(Q, axis=0) QX = np.einsum('ik,kab->iab', Q, X) logdet_Q = np.sum(np.log(np.abs(sumQ))) if self.axis == 'cols': # Sum "rows" #X = np.einsum('nkj->nj', X) # Rotate "columns" X_R = np.einsum('jk,nik->nij', R, X) r_CovX_r = np.einsum('bk,bl,nakal->nab', R, R, CovX) XX = (np.einsum('kai,kaj->aij', QX, QX) + np.einsum('d,daiaj->aij', sumQ**2, CovX)) else: # Rotate "rows" #print("OR THE BUG IS HERE...") X_R = np.einsum('ik,nkj->nji', R, X) r_CovX_r = np.einsum('ak,al,nkblb->nba', R, R, CovX) XX = (np.einsum('kib,kjb->bij', QX, QX) + np.einsum('d,dibjb->bij', sumQ**2, CovX)) Q_X_R = np.einsum('nk,kij->nij', Q, X_R) else: # Reshape into matrix form sh = (D1,D2,D1,D2) XX = np.reshape(self.XX, sh) if self.axis == 'cols': XX = np.einsum('aiaj->aij', XX) else: XX = np.einsum('ibjb->bij', XX) logdet_Q = 0 # Reshape vector to matrix if self.axis == 'cols': # Apply rotation on the right R_XX = np.einsum('ik,akj->aij', R, XX) r_XX_r = np.einsum('ik,il,akl->ai', R, R, XX) if Q is not None: # Debug stuff: #print("debug for Q", np.shape(XX_R)) r_XQQX_r = np.einsum('ab->ab', r_XX_r) #r_XX_r #np.einsum('abab->b', XX_R) # Compute q(alpha) a_alpha = self.a b_alpha = self.b0 + 0.5 * r_XX_r #np.einsum('ab->b', r_XX_r) alpha_R = a_alpha / b_alpha logalpha_R = -np.log(b_alpha) # + const logdet_R = logdet inv_R = inv # Bug in here: XX_dalpha = -np.einsum('ab,ab,abj->bj', alpha_R/b_alpha, r_XX_r, R_XX) #XX = np.einsum('aiaj->ij', XX) #XX_R = np.einsum('aiaj->ij', XX_R) #print("THERE") alpha_R_XX = np.einsum('ai,aij->ij', alpha_R, R_XX) # BUG IN HERE? In gradient dalpha_R_XX = np.einsum('ai,aij->ij', alpha_R/b_alpha, R_XX) invb_R_XX = np.einsum('ai,aij->ij', 1/b_alpha, R_XX) #dalpha_RXXR = np.einsum('i,ii->i', alpha_R/b_alpha, XX_R) ND = self.N * D1 else: # Apply rotation on the left R_XX = np.einsum('ik,bkj->bij', R, XX) r_XX_r = np.einsum('ik,il,bkl->bi', R, R, XX) if Q is not None: # Debug stuff: #print("debug for Q", np.shape(XX_R)) r_XQQX_r = np.einsum('bi->bi', r_XX_r) #np.einsum('abab->b', XX_R) # Compute q(alpha) a_alpha = self.a b_alpha = self.b0 + 0.5 * r_XX_r #np.einsum('bi->b', r_XX_r) #b_alpha = self.b0 + 0.5 * np.einsum('abab->b', XX_R) alpha_R = a_alpha / b_alpha logalpha_R = -np.log(b_alpha) # + const logdet_R = logdet inv_R = inv #print("HERE IS THE BUG SOMEWHERE") # Compute: <alpha>_* R < #print(np.shape(alpha_R), np.shape(R), np.shape(XX), np.shape(R_XX), QX.shape, D1, D2) alpha_R_XX = np.einsum('bi,bij->ij', alpha_R, R_XX) dalpha_R_XX = np.einsum('bi,bij->ij', alpha_R/b_alpha, R_XX) invb_R_XX = np.einsum('bi,bij->ij', 1/b_alpha, R_XX) #dalpha_RXXR = np.einsum('b,ibib->i', alpha_R/b_alpha, XX_R) XX_dalpha = -np.einsum('ba,ba,baj->aj', alpha_R/b_alpha, r_XX_r, R_XX) #XX = np.einsum('ibjb->ij', XX) #XX_R = np.nan * np.einsum('ibjb->ij', XX_R) ND = self.N * D2 # # Compute the cost # # Compute entropy H(X) logH_X = utils.random.gaussian_entropy(-2*ND*logdet_R - 2*D*logdet_Q, 0) # Compute entropy H(alpha) logH_alpha = utils.random.gamma_entropy(0, np.sum(np.log(b_alpha)), 0, 0, 0) # Compute <log p(X|alpha)> #logp_X = utils.random.gaussian_logpdf(np.einsum('ii,i', XX_R, alpha_R), logp_X = utils.random.gaussian_logpdf(tracedot(alpha_R_XX, R.T), 0, 0, N*np.sum(logalpha_R), 0) # Compute <log p(alpha)> logp_alpha = utils.random.gamma_logpdf(self.b0*np.sum(alpha_R), np.sum(logalpha_R), self.a0*np.sum(logalpha_R), 0, 0) # Compute the bound bound = (0 + logp_X + logp_alpha + logH_X + logH_alpha ) if not gradient: return bound # # Compute the gradient with respect to R # # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2*ND*inv_R.T, 0) # Compute dH(alpha) d_log_b = invb_R_XX #np.einsum('i,ik,kj->ij', 1/b_alpha, R, XX) dlogH_alpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0) # Compute d<log p(X|alpha)> d_log_alpha = -d_log_b dXX_alpha = 2*alpha_R_XX #np.einsum('i,ik,kj->ij', alpha_R, R, XX) #dalpha_xx = np.einsum('id,di', dalpha_R_XX, R.T) #XX_dalpha = -np.einsum('i,ik,kj', dalpha_RXXR, R, XX) # BUG IS IN THIS TERM!!!! #np.einsum('i,i,ii,ik,kj->ij', alpha_R, 1/b_alpha, XX_R, R, XX) # TODO/FIXME: This gradient term seems to have a bug. # # DEBUG: If you set these gradient terms to zero, the gradient is more # accurate..?! #dXX_alpha = 0 #XX_dalpha = 0 dlogp_X = utils.random.gaussian_logpdf(dXX_alpha + XX_dalpha, 0, 0, N*d_log_alpha, 0) # Compute d<log p(alpha)> d_alpha = -dalpha_R_XX #np.einsum('i,i,ik,kj->ij', alpha_R, 1/b_alpha, R, XX) dlogp_alpha = utils.random.gamma_logpdf(self.b0*d_alpha, d_log_alpha, self.a0*d_log_alpha, 0, 0) dR_bound = (0*dlogp_X + dlogp_X + dlogp_alpha + dlogH_X + dlogH_alpha ) if Q is None: return (bound, dR_bound) # # Compute the gradient with respect to Q (if Q given) # def d_helper(v): return (np.einsum('iab,ab,jab->ij', Q_X_R, v, X_R) + np.einsum('n,ab,nab->n', sumQ, v, r_CovX_r)) # Compute dH(X) dQ_logHX = utils.random.gaussian_entropy(-2*D/sumQ, 0) # Compute dH(alpha) d_log_b = d_helper(1/b_alpha) dQ_logHalpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0) # Compute d<log p(X|alpha)> dXX_alpha = 2*d_helper(alpha_R) XX_dalpha = -d_helper(r_XQQX_r*alpha_R/b_alpha) d_log_alpha = -d_log_b dQ_logpX = utils.random.gaussian_logpdf(dXX_alpha + XX_dalpha, 0, 0, N*d_log_alpha, 0) # Compute d<log p(alpha)> d_alpha = -d_helper(alpha_R/b_alpha) dQ_logpalpha = utils.random.gamma_logpdf(self.b0*d_alpha, d_log_alpha, self.a0*d_log_alpha, 0, 0) dQ_bound = (0*dQ_logpX + dQ_logpX + dQ_logpalpha + dQ_logHX + dQ_logHalpha ) return (bound, dR_bound, dQ_bound)
def _compute_bound(self, R, logdet=None, inv=None, Q=None, gradient=False): """ Rotate q(X) and q(alpha). """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check that. # # Transform the distributions and moments # D1 = self.D1 D2 = self.D2 N = self.N D = D1 * D2 # Compute rotated second moment if Q is not None: X = np.reshape(self.X, (N, D1, D2)) CovX = np.reshape(self.CovX, (N, D1, D2, D1, D2)) # Rotate plates sumQ = np.sum(Q, axis=0) QX = np.einsum('ik,kab->iab', Q, X) logdet_Q = np.sum(np.log(np.abs(sumQ))) if self.axis == 'cols': # Sum "rows" #X = np.einsum('nkj->nj', X) # Rotate "columns" X_R = np.einsum('jk,nik->nij', R, X) r_CovX_r = np.einsum('bk,bl,nakal->nab', R, R, CovX) XX = (np.einsum('kai,kaj->aij', QX, QX) + np.einsum('d,daiaj->aij', sumQ**2, CovX)) else: # Rotate "rows" #print("OR THE BUG IS HERE...") X_R = np.einsum('ik,nkj->nji', R, X) r_CovX_r = np.einsum('ak,al,nkblb->nba', R, R, CovX) XX = (np.einsum('kib,kjb->bij', QX, QX) + np.einsum('d,dibjb->bij', sumQ**2, CovX)) Q_X_R = np.einsum('nk,kij->nij', Q, X_R) else: # Reshape into matrix form sh = (D1, D2, D1, D2) XX = np.reshape(self.XX, sh) if self.axis == 'cols': XX = np.einsum('aiaj->aij', XX) else: XX = np.einsum('ibjb->bij', XX) logdet_Q = 0 # Reshape vector to matrix if self.axis == 'cols': # Apply rotation on the right R_XX = np.einsum('ik,akj->aij', R, XX) r_XX_r = np.einsum('ik,il,akl->ai', R, R, XX) if Q is not None: # Debug stuff: #print("debug for Q", np.shape(XX_R)) r_XQQX_r = np.einsum( 'ab->ab', r_XX_r) #r_XX_r #np.einsum('abab->b', XX_R) # Compute q(alpha) a_alpha = self.a b_alpha = self.b0 + 0.5 * r_XX_r #np.einsum('ab->b', r_XX_r) alpha_R = a_alpha / b_alpha logalpha_R = -np.log(b_alpha) # + const logdet_R = logdet inv_R = inv # Bug in here: XX_dalpha = -np.einsum('ab,ab,abj->bj', alpha_R / b_alpha, r_XX_r, R_XX) #XX = np.einsum('aiaj->ij', XX) #XX_R = np.einsum('aiaj->ij', XX_R) #print("THERE") alpha_R_XX = np.einsum('ai,aij->ij', alpha_R, R_XX) # BUG IN HERE? In gradient dalpha_R_XX = np.einsum('ai,aij->ij', alpha_R / b_alpha, R_XX) invb_R_XX = np.einsum('ai,aij->ij', 1 / b_alpha, R_XX) #dalpha_RXXR = np.einsum('i,ii->i', alpha_R/b_alpha, XX_R) ND = self.N * D1 else: # Apply rotation on the left R_XX = np.einsum('ik,bkj->bij', R, XX) r_XX_r = np.einsum('ik,il,bkl->bi', R, R, XX) if Q is not None: # Debug stuff: #print("debug for Q", np.shape(XX_R)) r_XQQX_r = np.einsum('bi->bi', r_XX_r) #np.einsum('abab->b', XX_R) # Compute q(alpha) a_alpha = self.a b_alpha = self.b0 + 0.5 * r_XX_r #np.einsum('bi->b', r_XX_r) #b_alpha = self.b0 + 0.5 * np.einsum('abab->b', XX_R) alpha_R = a_alpha / b_alpha logalpha_R = -np.log(b_alpha) # + const logdet_R = logdet inv_R = inv #print("HERE IS THE BUG SOMEWHERE") # Compute: <alpha>_* R < #print(np.shape(alpha_R), np.shape(R), np.shape(XX), np.shape(R_XX), QX.shape, D1, D2) alpha_R_XX = np.einsum('bi,bij->ij', alpha_R, R_XX) dalpha_R_XX = np.einsum('bi,bij->ij', alpha_R / b_alpha, R_XX) invb_R_XX = np.einsum('bi,bij->ij', 1 / b_alpha, R_XX) #dalpha_RXXR = np.einsum('b,ibib->i', alpha_R/b_alpha, XX_R) XX_dalpha = -np.einsum('ba,ba,baj->aj', alpha_R / b_alpha, r_XX_r, R_XX) #XX = np.einsum('ibjb->ij', XX) #XX_R = np.nan * np.einsum('ibjb->ij', XX_R) ND = self.N * D2 # # Compute the cost # # Compute entropy H(X) logH_X = utils.random.gaussian_entropy( -2 * ND * logdet_R - 2 * D * logdet_Q, 0) # Compute entropy H(alpha) logH_alpha = utils.random.gamma_entropy(0, np.sum(np.log(b_alpha)), 0, 0, 0) # Compute <log p(X|alpha)> #logp_X = utils.random.gaussian_logpdf(np.einsum('ii,i', XX_R, alpha_R), logp_X = utils.random.gaussian_logpdf(tracedot(alpha_R_XX, R.T), 0, 0, N * np.sum(logalpha_R), 0) # Compute <log p(alpha)> logp_alpha = utils.random.gamma_logpdf(self.b0 * np.sum(alpha_R), np.sum(logalpha_R), self.a0 * np.sum(logalpha_R), 0, 0) # Compute the bound bound = (0 + logp_X + logp_alpha + logH_X + logH_alpha) if not gradient: return bound # # Compute the gradient with respect to R # # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2 * ND * inv_R.T, 0) # Compute dH(alpha) d_log_b = invb_R_XX #np.einsum('i,ik,kj->ij', 1/b_alpha, R, XX) dlogH_alpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0) # Compute d<log p(X|alpha)> d_log_alpha = -d_log_b dXX_alpha = 2 * alpha_R_XX #np.einsum('i,ik,kj->ij', alpha_R, R, XX) #dalpha_xx = np.einsum('id,di', dalpha_R_XX, R.T) #XX_dalpha = -np.einsum('i,ik,kj', dalpha_RXXR, R, XX) # BUG IS IN THIS TERM!!!! #np.einsum('i,i,ii,ik,kj->ij', alpha_R, 1/b_alpha, XX_R, R, XX) # TODO/FIXME: This gradient term seems to have a bug. # # DEBUG: If you set these gradient terms to zero, the gradient is more # accurate..?! #dXX_alpha = 0 #XX_dalpha = 0 dlogp_X = utils.random.gaussian_logpdf(dXX_alpha + XX_dalpha, 0, 0, N * d_log_alpha, 0) # Compute d<log p(alpha)> d_alpha = -dalpha_R_XX #np.einsum('i,i,ik,kj->ij', alpha_R, 1/b_alpha, R, XX) dlogp_alpha = utils.random.gamma_logpdf(self.b0 * d_alpha, d_log_alpha, self.a0 * d_log_alpha, 0, 0) dR_bound = (0 * dlogp_X + dlogp_X + dlogp_alpha + dlogH_X + dlogH_alpha) if Q is None: return (bound, dR_bound) # # Compute the gradient with respect to Q (if Q given) # def d_helper(v): return (np.einsum('iab,ab,jab->ij', Q_X_R, v, X_R) + np.einsum('n,ab,nab->n', sumQ, v, r_CovX_r)) # Compute dH(X) dQ_logHX = utils.random.gaussian_entropy(-2 * D / sumQ, 0) # Compute dH(alpha) d_log_b = d_helper(1 / b_alpha) dQ_logHalpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0) # Compute d<log p(X|alpha)> dXX_alpha = 2 * d_helper(alpha_R) XX_dalpha = -d_helper(r_XQQX_r * alpha_R / b_alpha) d_log_alpha = -d_log_b dQ_logpX = utils.random.gaussian_logpdf(dXX_alpha + XX_dalpha, 0, 0, N * d_log_alpha, 0) # Compute d<log p(alpha)> d_alpha = -d_helper(alpha_R / b_alpha) dQ_logpalpha = utils.random.gamma_logpdf(self.b0 * d_alpha, d_log_alpha, self.a0 * d_log_alpha, 0, 0) dQ_bound = (0 * dQ_logpX + dQ_logpX + dQ_logpalpha + dQ_logHX + dQ_logHalpha) return (bound, dR_bound, dQ_bound)
def _compute_bound(self, R, logdet=None, inv=None, gradient=False): """ Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R') Assume: :math:`p(\mathbf{X}) = \prod^M_{m=1} N(\mathbf{x}_m|0, \mathbf{\Lambda})` Assume unit innovation noise covariance. """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check # that. # TODO/FIXME: Allow non-zero prior mean! # Assume constant mean and precision matrix over plates.. invR = inv logdetR = logdet # Transform moments of X: # Transform moments of A: Lambda_R_X0X0 = dot(self.Lambda, R, self.X0X0) R_XnXn = dot(R, self.XnXn) RA_XpXp_A = dot(R, self.A_XpXp_A) sumr = np.sum(R, axis=0) R_CovA_XpXp = sumr * self.CovA_XpXp ## if not gradient: ## print("DEBUG TOO", dot(R_XnXn,R.T)) # Compute entropy H(X) logH_X = utils.random.gaussian_entropy(-2*self.N*logdetR, 0) # Compute <log p(X)> yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T) yz = tracedot(dot(R,self.A_XpXn),R.T) + tracedot(self.Lambda_mu_X0, R.T) zz = tracedot(RA_XpXp_A, R.T) + np.dot(R_CovA_XpXp, sumr) #RR_CovA_XpXp logp_X = utils.random.gaussian_logpdf(yy, yz, zz, 0, 0) # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2*self.N*invR.T, 0) # Compute the bound bound = (0 + logp_X + logH_X ) # TODO/FIXME: There might be a very small error in the gradient? if gradient: # Compute d<log p(X)> dyy = 2 * (R_XnXn + Lambda_R_X0X0) dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0 dzz = 2 * (RA_XpXp_A + R_CovA_XpXp) dlogp_X = utils.random.gaussian_logpdf(dyy, dyz, dzz, 0, 0) d_bound = (0*dlogp_X + dlogp_X + dlogH_X ) return (bound, d_bound) else: return bound
def _compute_bound(self, R, logdet=None, inv=None, gradient=False): """ Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R') Assume: :math:`p(\mathbf{X}) = \prod^M_{m=1} N(\mathbf{x}_m|0, \mathbf{\Lambda})` Assume unit innovation noise covariance. """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check # that. # TODO/FIXME: Allow non-zero prior mean! # Assume constant mean and precision matrix over plates.. invR = inv logdetR = logdet # Transform moments of X: # Transform moments of A: Lambda_R_X0X0 = dot(self.Lambda, R, self.X0X0) R_XnXn = dot(R, self.XnXn) RA_XpXp_A = dot(R, self.A_XpXp_A) sumr = np.sum(R, axis=0) R_CovA_XpXp = sumr * self.CovA_XpXp ## if not gradient: ## print("DEBUG TOO", dot(R_XnXn,R.T)) # Compute entropy H(X) logH_X = utils.random.gaussian_entropy(-2 * self.N * logdetR, 0) # Compute <log p(X)> yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T) yz = tracedot(dot(R, self.A_XpXn), R.T) + tracedot( self.Lambda_mu_X0, R.T) zz = tracedot(RA_XpXp_A, R.T) + np.dot(R_CovA_XpXp, sumr) #RR_CovA_XpXp logp_X = utils.random.gaussian_logpdf(yy, yz, zz, 0, 0) # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2 * self.N * invR.T, 0) # Compute the bound bound = (0 + logp_X + logH_X) # TODO/FIXME: There might be a very small error in the gradient? if gradient: # Compute d<log p(X)> dyy = 2 * (R_XnXn + Lambda_R_X0X0) dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0 dzz = 2 * (RA_XpXp_A + R_CovA_XpXp) dlogp_X = utils.random.gaussian_logpdf(dyy, dyz, dzz, 0, 0) d_bound = (0 * dlogp_X + dlogp_X + dlogH_X) return (bound, d_bound) else: return bound