def rotate(self, R, inv=None, logdet=None): if inv is not None: invR = inv else: invR = np.linalg.inv(R) if logdet is not None: logdetR = logdet else: logdetR = np.linalg.slogdet(R)[1] # It would be more efficient and simpler, if you just rotated the # moments and didn't touch phi. However, then you would need to call # update() before lower_bound_contribution. This is more error-safe. #print('rotate debug in gmc', self.phi[0]) #print(R, invR, np.shape(self.phi[0])) # Transform parameters self.phi[0] = mvdot(invR.T, self.phi[0]) self.phi[1] = dot(invR.T, self.phi[1], invR) self.phi[2] = dot(invR.T, self.phi[2], invR) N = self.dims[0][0] if False: #print(self.phi[0]) self._update_moments_and_cgf() else: # Transform moments and g u0 = mvdot(R, self.u[0]) u1 = dot(R, self.u[1], R.T) u2 = dot(R, self.u[2], R.T) self.u = [u0, u1, u2] self.g -= N * logdetR
def d_helper(v): R_v_R = np.einsum('ki,k,kj->ij', R, v, R) tr_R_v_R_Cov = np.einsum('ij,dji->d', R_v_R, self.CovX) mu_v_R = np.einsum('ik,k,kj', self.mu, v, R) return (dot(QX, R_v_R, self.X.T) + sumQ * tr_R_v_R_Cov - dot(mu_v_R, self.X.T))
def rotate(self, R, inv=None, logdet=None): if inv is not None: invR = inv else: invR = np.linalg.inv(R) if logdet is not None: logdetR = logdet else: logdetR = np.linalg.slogdet(R)[1] # It would be more efficient and simpler, if you just rotated the # moments and didn't touch phi. However, then you would need to call # update() before lower_bound_contribution. This is more error-safe. # Transform parameters self.phi[0] = linalg.mvdot(invR.T, self.phi[0]) self.phi[1] = linalg.dot(invR.T, self.phi[1], invR) self.phi[2] = linalg.dot(invR.T, self.phi[2], invR) N = self.dims[0][0] if False: self._update_moments_and_cgf() else: # Transform moments and g u0 = linalg.mvdot(R, self.u[0]) u1 = linalg.dot(R, self.u[1], R.T) u2 = linalg.dot(R, self.u[2], R.T) self.u = [u0, u1, u2] self.g -= N*logdetR
def _compute_bound(self, R, logdet=None, inv=None, gradient=False): """ Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R') Assume: :math:`p(\mathbf{X}) = \prod^M_{m=1} N(\mathbf{x}_m|0, \mathbf{\Lambda})` """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check # that. # TODO/FIXME: Allow non-zero prior mean! # Assume constant mean and precision matrix over plates.. # Compute rotated moments XX_R = dot(R, self.XX, R.T) inv_R = inv logdet_R = logdet # Compute entropy H(X) logH_X = utils.random.gaussian_entropy(-2*self.N*logdet_R, 0) # Compute <log p(X)> logp_X = utils.random.gaussian_logpdf(np.vdot(XX_R, self.Lambda), 0, 0, 0, 0) # Compute the bound bound = logp_X + logH_X if gradient: # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2*self.N*inv_R.T, 0) # Compute d<log p(X)> dXX = 2*dot(self.Lambda, R, self.XX) dlogp_X = utils.random.gaussian_logpdf(dXX, 0, 0, 0, 0) d_bound = dlogp_X + dlogH_X return (bound, d_bound) else: return bound
def rotate_matrix(self, R1, R2, inv1=None, logdet1=None, inv2=None, logdet2=None, Q=None): """ The vector is reshaped into a matrix by stacking the row vectors. Computes R1*X*R2', which is identical to kron(R1,R2)*x (??) Note that this is slightly different from the standard Kronecker product definition because Numpy stacks row vectors instead of column vectors. Parameters ---------- R1 : ndarray A matrix from the left R2 : ndarray A matrix from the right """ if Q is not None: # Rotate moments using Q #print("Debug in rotate matrix", np.shape(self.u[0]), self.get_shape(0)) self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0]) sumQ = np.sum(Q, axis=0) # Rotate natural parameters using Q self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1]) self.phi[0] = np.einsum('dij,dj->di', -2*self.phi[1], self.u[0]) if inv1 is None: inv1 = np.linalg.inv(R1) if logdet1 is None: logdet1 = np.linalg.slogdet(R1)[1] if inv2 is None: inv2 = np.linalg.inv(R2) if logdet2 is None: logdet2 = np.linalg.slogdet(R2)[1] D1 = np.shape(R1)[0] D2 = np.shape(R2)[0] # Reshape into matrices sh0 = np.shape(self.phi[0])[:-1] + (D1,D2) sh1 = np.shape(self.phi[1])[:-2] + (D1,D2,D1,D2) phi0 = np.reshape(self.phi[0], sh0) phi1 = np.reshape(self.phi[1], sh1) # Apply rotations to phi #phi0 = dot(inv1, phi0, inv2.T) phi0 = dot(inv1.T, phi0, inv2) phi1 = np.einsum('...ia,...abcd->...ibcd', inv1.T, phi1) phi1 = np.einsum('...ic,...abcd->...abid', inv1.T, phi1) phi1 = np.einsum('...ib,...abcd->...aicd', inv2.T, phi1) phi1 = np.einsum('...id,...abcd->...abci', inv2.T, phi1) # Reshape back into vectors self.phi[0] = np.reshape(phi0, self.phi[0].shape) self.phi[1] = np.reshape(phi1, self.phi[1].shape) # It'd be better to rotate the moments too.. #g0 = np.sum(np.ones(self.plates)*self.g) self._update_moments_and_cgf()
def cost(r): # Make vector-r into matrix-R R = np.reshape(r, (self.D, self.D)) # Compute SVD invR = np.linalg.inv(R) logdetR = np.linalg.slogdet(R)[1] # Compute lower bound terms (b1, db1) = self.block1.bound(R, logdet=logdetR, inv=invR) (b2, db2) = self.block2.bound(invR.T, logdet=-logdetR, inv=R.T) # Apply chain rule for the second gradient: # d b(invR.T) # = tr(db.T * d(invR.T)) # = tr(db * d(invR)) # = -tr(db * invR * (dR) * invR) # = -tr(invR * db * invR * dR) db2 = -dot(invR.T, db2.T, invR.T) # Compute the cost function c = -(b1 + b2) dc = -(db1 + db2) return (c, np.ravel(dc))
def rotate_matrix(self, R1, R2, inv1=None, logdet1=None, inv2=None, logdet2=None, Q=None): """ The vector is reshaped into a matrix by stacking the row vectors. Computes R1*X*R2', which is identical to kron(R1,R2)*x (??) Note that this is slightly different from the standard Kronecker product definition because Numpy stacks row vectors instead of column vectors. """ if Q is not None: # Rotate moments using Q #print("Debug in rotate matrix", np.shape(self.u[0]), self.get_shape(0)) self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0]) sumQ = np.sum(Q, axis=0) # Rotate natural parameters using Q self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1]) self.phi[0] = np.einsum('dij,dj->di', -2 * self.phi[1], self.u[0]) if inv1 is None: inv1 = np.linalg.inv(R1) if logdet1 is None: logdet1 = np.linalg.slogdet(R1)[1] if inv2 is None: inv2 = np.linalg.inv(R2) if logdet2 is None: logdet2 = np.linalg.slogdet(R2)[1] D1 = np.shape(R1)[0] D2 = np.shape(R2)[0] # Reshape into matrices sh0 = np.shape(self.phi[0])[:-1] + (D1, D2) sh1 = np.shape(self.phi[1])[:-2] + (D1, D2, D1, D2) phi0 = np.reshape(self.phi[0], sh0) phi1 = np.reshape(self.phi[1], sh1) # Apply rotations to phi #phi0 = dot(inv1, phi0, inv2.T) phi0 = dot(inv1.T, phi0, inv2) phi1 = np.einsum('...ia,...abcd->...ibcd', inv1.T, phi1) phi1 = np.einsum('...ic,...abcd->...abid', inv1.T, phi1) phi1 = np.einsum('...ib,...abcd->...aicd', inv2.T, phi1) phi1 = np.einsum('...id,...abcd->...abci', inv2.T, phi1) # Reshape back into vectors self.phi[0] = np.reshape(phi0, self.phi[0].shape) self.phi[1] = np.reshape(phi1, self.phi[1].shape) # It'd be better to rotate the moments too.. #g0 = np.sum(np.ones(self.plates)*self.g) self._update_moments_and_cgf()
def _compute_bound(self, R, logdet=None, inv=None, gradient=False): """ Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R') Assume: :math:`p(\mathbf{X}) = \prod^M_{m=1} N(\mathbf{x}_m|0, \mathbf{\Lambda})` """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check # that. # TODO/FIXME: Allow non-zero prior mean! # Assume constant mean and precision matrix over plates.. # Compute rotated moments XX_R = dot(R, self.XX, R.T) inv_R = inv logdet_R = logdet # Compute entropy H(X) logH_X = utils.random.gaussian_entropy(-2 * self.N * logdet_R, 0) # Compute <log p(X)> logp_X = utils.random.gaussian_logpdf(np.vdot(XX_R, self.Lambda), 0, 0, 0, 0) # Compute the bound bound = logp_X + logH_X if gradient: # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2 * self.N * inv_R.T, 0) # Compute d<log p(X)> dXX = 2 * dot(self.Lambda, R, self.XX) dlogp_X = utils.random.gaussian_logpdf(dXX, 0, 0, 0, 0) d_bound = dlogp_X + dlogH_X return (bound, d_bound) else: return bound
def setup(self): """ This method should be called just before optimization. """ # Get moments of X (X, XnXn, XpXn) = self.X_node.get_moments() XpXp = XnXn[:-1,:,:] # Get moments of A (and make sure they include time axis) (A, AA) = self.X_node.parents[2].get_moments() A = utils.utils.atleast_nd(A, 3) AA = utils.utils.atleast_nd(AA, 4) CovA = AA - A[...,:,np.newaxis]*A[...,np.newaxis,:] # # Expectations with respect to X # self.X0 = X[0,:] self.X0X0 = XnXn[0,:,:] #self.XpXp = np.sum(XpXp, axis=0) self.XnXn = np.sum(XnXn[1:,:,:], axis=0) #self.XpXn = np.sum(XpXn, axis=0) # # Expectations with respect to A and X # # Compute: \sum_n <A_n> <x_{n-1} x_n^T> self.A_XpXn = np.sum(dot(A, XpXn), axis=0) # Compute: \sum_n <A_n> <x_{n-1} x_{n-1}^T> <A_n>^T self.A_XpXp_A = np.sum(dot(A, XpXp, utils.utils.T(A)), axis=0) # Compute: \sum_n tr(CovA_n <x_{n-1} x_{n-1}^T>) self.CovA_XpXp = np.einsum('ndij,nij->d', CovA, XpXp) # Get moments of the fixed parameter nodes mu = self.X_node.parents[0].get_moments()[0] self.Lambda = self.X_node.parents[1].get_moments()[0] self.Lambda_mu_X0 = np.outer(np.dot(self.Lambda,mu), self.X0) self.A_rotator.setup(rotate_plates=True)
def setup(self): """ This method should be called just before optimization. """ # Get moments of X (X, XnXn, XpXn) = self.X_node.get_moments() XpXp = XnXn[:-1, :, :] # Get moments of A (and make sure they include time axis) (A, AA) = self.X_node.parents[2].get_moments() A = utils.utils.atleast_nd(A, 3) AA = utils.utils.atleast_nd(AA, 4) CovA = AA - A[..., :, np.newaxis] * A[..., np.newaxis, :] # # Expectations with respect to X # self.X0 = X[0, :] self.X0X0 = XnXn[0, :, :] #self.XpXp = np.sum(XpXp, axis=0) self.XnXn = np.sum(XnXn[1:, :, :], axis=0) #self.XpXn = np.sum(XpXn, axis=0) # # Expectations with respect to A and X # # Compute: \sum_n <A_n> <x_{n-1} x_n^T> self.A_XpXn = np.sum(dot(A, XpXn), axis=0) # Compute: \sum_n <A_n> <x_{n-1} x_{n-1}^T> <A_n>^T self.A_XpXp_A = np.sum(dot(A, XpXp, utils.utils.T(A)), axis=0) # Compute: \sum_n tr(CovA_n <x_{n-1} x_{n-1}^T>) self.CovA_XpXp = np.einsum('ndij,nij->d', CovA, XpXp) # Get moments of the fixed parameter nodes mu = self.X_node.parents[0].get_moments()[0] self.Lambda = self.X_node.parents[1].get_moments()[0] self.Lambda_mu_X0 = np.outer(np.dot(self.Lambda, mu), self.X0) self.A_rotator.setup(rotate_plates=True)
def rotate(self, R, inv=None, logdet=None, Q=None): raise NotImplementedError() if inv is not None: invR = inv else: invR = np.linalg.inv(R) if logdet is not None: logdetR = logdet else: logdetR = np.linalg.slogdet(R)[1] # It would be more efficient and simpler, if you just rotated the # moments and didn't touch phi. However, then you would need to call # update() before lower_bound_contribution. This is more error-safe. # Rotate plates, if plate rotation matrix is given. Assume that there's # only one plate-axis #logdet_old = np.sum(utils.linalg.logdet_cov(-2*self.phi[1])) if Q is not None: # Rotate moments using Q self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0]) sumQ = np.sum(Q, axis=0) # Rotate natural parameters using Q self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1]) self.phi[0] = np.einsum('dij,dj->di', -2*self.phi[1], self.u[0]) # Transform parameters using R self.phi[0] = mvdot(invR.T, self.phi[0]) self.phi[1] = dot(invR.T, self.phi[1], invR) if Q is not None: self._update_moments_and_cgf() else: # Transform moments and g using R self.u[0] = mvdot(R, self.u[0]) self.u[1] = dot(R, self.u[1], R.T) self.g -= logdetR
def rotate(self, R, inv=None, logdet=None, Q=None): if inv is not None: invR = inv else: invR = np.linalg.inv(R) if logdet is not None: logdetR = logdet else: logdetR = np.linalg.slogdet(R)[1] # It would be more efficient and simpler, if you just rotated the # moments and didn't touch phi. However, then you would need to call # update() before lower_bound_contribution. This is more error-safe. # Rotate plates, if plate rotation matrix is given. Assume that there's # only one plate-axis #logdet_old = np.sum(utils.linalg.logdet_cov(-2*self.phi[1])) if Q is not None: # Rotate moments using Q self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0]) sumQ = np.sum(Q, axis=0) # Rotate natural parameters using Q self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1]) self.phi[0] = np.einsum('dij,dj->di', -2 * self.phi[1], self.u[0]) # Transform parameters using R self.phi[0] = mvdot(invR.T, self.phi[0]) self.phi[1] = dot(invR.T, self.phi[1], invR) if Q is not None: self._update_moments_and_cgf() else: # Transform moments and g using R self.u[0] = mvdot(R, self.u[0]) self.u[1] = dot(R, self.u[1], R.T) self.g -= logdetR
def bound(self, R, logdet=None, inv=None): if inv is None: inv = np.linalg.inv(R) if logdet is None: logdet = np.linalg.slogdet(R)[1] (bound_X, d_bound_X) = self._compute_bound(R, logdet=logdet, inv=inv, gradient=True) # Compute cost and gradient from A (bound_A, dR_bound_A, dQ_bound_A) = self.A_rotator.bound(inv.T, inv=R.T, logdet=-logdet, Q=R) dR_bound_A = -dot(inv.T, dR_bound_A.T, inv.T) # Compute the bound bound = bound_X + bound_A d_bound = d_bound_X + dR_bound_A + dQ_bound_A return (bound, d_bound)
def bound(self, R, logdet=None, inv=None): (bound_X, d_bound_X) = self._compute_bound(R, logdet=logdet, inv=inv, gradient=True) # Compute cost and gradient from A (bound_A, dR_bound_A, dQ_bound_A) = self.A_rotator.bound(inv.T, inv=R.T, logdet=-logdet, Q=R) # TODO/FIXME: Also apply the gradient of invR.T to the result dR_bound_A = -dot(inv.T, dR_bound_A.T, inv.T) # Compute the bound bound = bound_X + bound_A d_bound = d_bound_X + dR_bound_A + dQ_bound_A return (bound, d_bound)
def _compute_bound(self, R, logdet=None, inv=None, gradient=False): """ Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R') Assume: :math:`p(\mathbf{X}) = \prod^M_{m=1} N(\mathbf{x}_m|0, \mathbf{\Lambda})` Assume unit innovation noise covariance. """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check # that. # TODO/FIXME: Allow non-zero prior mean! # Assume constant mean and precision matrix over plates.. invR = inv logdetR = logdet # Transform moments of X: # Transform moments of A: Lambda_R_X0X0 = dot(self.Lambda, R, self.X0X0) R_XnXn = dot(R, self.XnXn) RA_XpXp_A = dot(R, self.A_XpXp_A) sumr = np.sum(R, axis=0) R_CovA_XpXp = sumr * self.CovA_XpXp ## if not gradient: ## print("DEBUG TOO", dot(R_XnXn,R.T)) # Compute entropy H(X) logH_X = utils.random.gaussian_entropy(-2*self.N*logdetR, 0) # Compute <log p(X)> yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T) yz = tracedot(dot(R,self.A_XpXn),R.T) + tracedot(self.Lambda_mu_X0, R.T) zz = tracedot(RA_XpXp_A, R.T) + np.dot(R_CovA_XpXp, sumr) #RR_CovA_XpXp logp_X = utils.random.gaussian_logpdf(yy, yz, zz, 0, 0) # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2*self.N*invR.T, 0) # Compute the bound bound = (0 + logp_X + logH_X ) # TODO/FIXME: There might be a very small error in the gradient? if gradient: # Compute d<log p(X)> dyy = 2 * (R_XnXn + Lambda_R_X0X0) dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0 dzz = 2 * (RA_XpXp_A + R_CovA_XpXp) dlogp_X = utils.random.gaussian_logpdf(dyy, dyz, dzz, 0, 0) d_bound = (0*dlogp_X + dlogp_X + dlogH_X ) return (bound, d_bound) else: return bound
def _compute_bound(self, R, logdet=None, inv=None, gradient=False, terms=False): """ Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R') Assume: :math:`p(\mathbf{X}) = \prod^M_{m=1} N(\mathbf{x}_m|0, \mathbf{\Lambda})` Assume unit innovation noise covariance. """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check # that. # Assume constant mean and precision matrix over plates.. if inv is None: invR = np.linalg.inv(R) else: invR = inv if logdet is None: logdetR = np.linalg.slogdet(R)[1] else: logdetR = logdet # Transform moments of X and A: Lambda_R_X0X0 = sum_to_plates(dot(self.Lambda, R, self.X0X0), (), plates_from=self.X_node.plates, ndim=2) R_XnXn = dot(R, self.XnXn) RA_XpXp_A = dot(R, self.A_XpXp_A) sumr = np.sum(R, axis=0) R_CovA_XpXp = sumr * self.CovA_XpXp # Compute entropy H(X) M = self.N * np.prod(self.X_node.plates) # total number of rotated vectors logH_X = random.gaussian_entropy(-2 * M * logdetR, 0) # Compute <log p(X)> yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T) yz = tracedot(dot(R, self.A_XpXn), R.T) + tracedot(self.Lambda_mu_X0, R.T) zz = tracedot(RA_XpXp_A, R.T) + np.einsum("...k,...k->...", R_CovA_XpXp, sumr) logp_X = random.gaussian_logpdf(yy, yz, zz, 0, 0) # Compute the bound if terms: bound = {self.X_node: logp_X + logH_X} else: bound = logp_X + logH_X if not gradient: return bound # Compute dH(X) dlogH_X = random.gaussian_entropy(-2 * M * invR.T, 0) # Compute d<log p(X)> dyy = 2 * (R_XnXn + Lambda_R_X0X0) dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0 dzz = 2 * (RA_XpXp_A + R_CovA_XpXp[None, :]) dlogp_X = random.gaussian_logpdf(dyy, dyz, dzz, 0, 0) if terms: d_bound = {self.X_node: dlogp_X + dlogH_X} else: d_bound = +dlogp_X + dlogH_X return (bound, d_bound)
def _compute_bound(self, R, logdet=None, inv=None, gradient=False): """ Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R') Assume: :math:`p(\mathbf{X}) = \prod^M_{m=1} N(\mathbf{x}_m|0, \mathbf{\Lambda})` Assume unit innovation noise covariance. """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check # that. # TODO/FIXME: Allow non-zero prior mean! # Assume constant mean and precision matrix over plates.. invR = inv logdetR = logdet # Transform moments of X: # Transform moments of A: Lambda_R_X0X0 = dot(self.Lambda, R, self.X0X0) R_XnXn = dot(R, self.XnXn) RA_XpXp_A = dot(R, self.A_XpXp_A) sumr = np.sum(R, axis=0) R_CovA_XpXp = sumr * self.CovA_XpXp ## if not gradient: ## print("DEBUG TOO", dot(R_XnXn,R.T)) # Compute entropy H(X) logH_X = utils.random.gaussian_entropy(-2 * self.N * logdetR, 0) # Compute <log p(X)> yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T) yz = tracedot(dot(R, self.A_XpXn), R.T) + tracedot( self.Lambda_mu_X0, R.T) zz = tracedot(RA_XpXp_A, R.T) + np.dot(R_CovA_XpXp, sumr) #RR_CovA_XpXp logp_X = utils.random.gaussian_logpdf(yy, yz, zz, 0, 0) # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2 * self.N * invR.T, 0) # Compute the bound bound = (0 + logp_X + logH_X) # TODO/FIXME: There might be a very small error in the gradient? if gradient: # Compute d<log p(X)> dyy = 2 * (R_XnXn + Lambda_R_X0X0) dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0 dzz = 2 * (RA_XpXp_A + R_CovA_XpXp) dlogp_X = utils.random.gaussian_logpdf(dyy, dyz, dzz, 0, 0) d_bound = (0 * dlogp_X + dlogp_X + dlogH_X) return (bound, d_bound) else: return bound
def _compute_bound(self, R, logdet=None, inv=None, Q=None, gradient=False): """ Rotate q(X) and q(alpha). Assume: p(X|alpha) = prod_m N(x_m|0,diag(alpha)) p(alpha) = prod_d G(a_d,b_d) """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check that. # # Transform the distributions and moments # # Compute rotated second moment if Q is not None: # Rotate plates sumQ = np.sum(Q, axis=0) QX = np.einsum('ik,kj->ij', Q, self.X) XX = (np.einsum('ki,kj->ij', QX, QX) + np.einsum('d,dij->ij', sumQ**2, self.CovX)) logdet_Q = np.sum(np.log(np.abs(sumQ))) X = QX X_mu = utils.utils.sum_multiply(X[..., :, np.newaxis], self.mu[..., np.newaxis, :], axis=(-1, -2), sumaxis=False, keepdims=False) else: X = self.X XX = self.XX logdet_Q = 0 X_mu = self.Xmu # TODO/FIXME: X can be summed to the plates of mu!? RX_mu = dot(R, X_mu) XmuXmu_R = (dot(R, XX, R.T) - RX_mu - RX_mu.T + self.mumu) # Compute q(alpha) a_alpha = self.a b_alpha = self.b0 + 0.5 * np.diag(XmuXmu_R) #b_alpha = self.b0 + 0.5*np.diag(RXXR) alpha_R = a_alpha / b_alpha logalpha_R = -np.log(b_alpha) # + const logdet_R = logdet inv_R = inv N = self.N D = np.shape(R)[0] # # Compute the cost # # Compute entropy H(X) logH_X = utils.random.gaussian_entropy( -2 * N * logdet_R - 2 * D * logdet_Q, 0) # Compute entropy H(alpha) logH_alpha = utils.random.gamma_entropy(0, np.sum(np.log(b_alpha)), 0, 0, 0) # Compute <log p(X|alpha)> logp_X = utils.random.gaussian_logpdf( np.einsum('ii,i', XmuXmu_R, alpha_R), 0, 0, N * np.sum(logalpha_R), 0) # Compute <log p(alpha)> logp_alpha = utils.random.gamma_logpdf(self.b0 * np.sum(alpha_R), np.sum(logalpha_R), self.a0 * np.sum(logalpha_R), 0, 0) # Compute the bound bound = (0 + logp_X + logp_alpha + logH_X + logH_alpha) if not gradient: return bound # # Compute the gradient with respect R # # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2 * N * inv_R.T, 0) # Compute dH(alpha) dXmuXmu_R = 2 * np.dot(R, XX) - 2 * X_mu.T d_log_b = np.einsum('i,ij->ij', 1 / b_alpha, dXmuXmu_R) dlogH_alpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0) # Compute d<log p(X|alpha)> # TODO/FIXME: Fix these gradients! d_log_alpha = -d_log_b dXmuXmu_alpha = np.einsum('i,ij->ij', alpha_R, dXmuXmu_R) XmuXmu_dalpha = -np.einsum('i,i,ii,ij->ij', alpha_R, 1 / b_alpha, XmuXmu_R, dXmuXmu_R) dlogp_X = utils.random.gaussian_logpdf(dXmuXmu_alpha + XmuXmu_dalpha, 0, 0, N * d_log_alpha, 0) # Compute d<log p(alpha)> d_alpha = -np.einsum('i,i,ij->ij', alpha_R, 1 / b_alpha, dXmuXmu_R) dlogp_alpha = utils.random.gamma_logpdf(self.b0 * d_alpha, d_log_alpha, self.a0 * d_log_alpha, 0, 0) dR_bound = (0 * dlogp_X + dlogp_X + dlogp_alpha + dlogH_X + dlogH_alpha) if Q is None: return (bound, dR_bound) # # Compute the gradient with respect to Q (if Q given) # def d_helper(v): R_v_R = np.einsum('ki,k,kj->ij', R, v, R) tr_R_v_R_Cov = np.einsum('ij,dji->d', R_v_R, self.CovX) mu_v_R = np.einsum('ik,k,kj', self.mu, v, R) return (dot(QX, R_v_R, self.X.T) + sumQ * tr_R_v_R_Cov - dot(mu_v_R, self.X.T)) # Compute dH(X) dQ_logHX = utils.random.gaussian_entropy(-2 * D / sumQ, 0) # Compute dH(alpha) d_log_b = d_helper(1 / b_alpha) dQ_logHalpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0) # Compute d<log p(X|alpha)> dXX_alpha = 2 * d_helper(alpha_R) XX_dalpha = -d_helper(np.diag(XmuXmu_R) * alpha_R / b_alpha) d_log_alpha = -d_log_b dQ_logpX = utils.random.gaussian_logpdf(dXX_alpha + XX_dalpha, 0, 0, N * d_log_alpha, 0) # Compute d<log p(alpha)> d_alpha = -d_helper(alpha_R / b_alpha) dQ_logpalpha = utils.random.gamma_logpdf(self.b0 * d_alpha, d_log_alpha, self.a0 * d_log_alpha, 0, 0) dQ_bound = (0 * dQ_logpX + dQ_logpX + dQ_logpalpha + dQ_logHX + dQ_logHalpha) return (bound, dR_bound, dQ_bound)
def _compute_bound(self, R, logdet=None, inv=None, Q=None, gradient=False): """ Rotate q(X) and q(alpha). Assume: p(X|alpha) = prod_m N(x_m|0,diag(alpha)) p(alpha) = prod_d G(a_d,b_d) """ # TODO/FIXME: X and alpha should NOT contain observed values!! Check that. # # Transform the distributions and moments # # Compute rotated second moment if Q is not None: # Rotate plates sumQ = np.sum(Q, axis=0) QX = np.einsum('ik,kj->ij', Q, self.X) XX = (np.einsum('ki,kj->ij', QX, QX) + np.einsum('d,dij->ij', sumQ**2, self.CovX)) logdet_Q = np.sum(np.log(np.abs(sumQ))) X = QX X_mu = utils.utils.sum_multiply(X[...,:,np.newaxis], self.mu[...,np.newaxis,:], axis=(-1,-2), sumaxis=False, keepdims=False) else: X = self.X XX = self.XX logdet_Q = 0 X_mu = self.Xmu # TODO/FIXME: X can be summed to the plates of mu!? RX_mu = dot(R, X_mu) XmuXmu_R = (dot(R, XX, R.T) - RX_mu - RX_mu.T + self.mumu) # Compute q(alpha) a_alpha = self.a b_alpha = self.b0 + 0.5*np.diag(XmuXmu_R) #b_alpha = self.b0 + 0.5*np.diag(RXXR) alpha_R = a_alpha / b_alpha logalpha_R = - np.log(b_alpha) # + const logdet_R = logdet inv_R = inv N = self.N D = np.shape(R)[0] # # Compute the cost # # Compute entropy H(X) logH_X = utils.random.gaussian_entropy(-2*N*logdet_R - 2*D*logdet_Q, 0) # Compute entropy H(alpha) logH_alpha = utils.random.gamma_entropy(0, np.sum(np.log(b_alpha)), 0, 0, 0) # Compute <log p(X|alpha)> logp_X = utils.random.gaussian_logpdf(np.einsum('ii,i', XmuXmu_R, alpha_R), 0, 0, N*np.sum(logalpha_R), 0) # Compute <log p(alpha)> logp_alpha = utils.random.gamma_logpdf(self.b0*np.sum(alpha_R), np.sum(logalpha_R), self.a0*np.sum(logalpha_R), 0, 0) # Compute the bound bound = (0 + logp_X + logp_alpha + logH_X + logH_alpha ) if not gradient: return bound # # Compute the gradient with respect R # # Compute dH(X) dlogH_X = utils.random.gaussian_entropy(-2*N*inv_R.T, 0) # Compute dH(alpha) dXmuXmu_R = 2*np.dot(R, XX) - 2*X_mu.T d_log_b = np.einsum('i,ij->ij', 1/b_alpha, dXmuXmu_R) dlogH_alpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0) # Compute d<log p(X|alpha)> # TODO/FIXME: Fix these gradients! d_log_alpha = -d_log_b dXmuXmu_alpha = np.einsum('i,ij->ij', alpha_R, dXmuXmu_R) XmuXmu_dalpha = -np.einsum('i,i,ii,ij->ij', alpha_R, 1/b_alpha, XmuXmu_R, dXmuXmu_R) dlogp_X = utils.random.gaussian_logpdf(dXmuXmu_alpha + XmuXmu_dalpha, 0, 0, N*d_log_alpha, 0) # Compute d<log p(alpha)> d_alpha = -np.einsum('i,i,ij->ij', alpha_R, 1/b_alpha, dXmuXmu_R) dlogp_alpha = utils.random.gamma_logpdf(self.b0*d_alpha, d_log_alpha, self.a0*d_log_alpha, 0, 0) dR_bound = (0*dlogp_X + dlogp_X + dlogp_alpha + dlogH_X + dlogH_alpha ) if Q is None: return (bound, dR_bound) # # Compute the gradient with respect to Q (if Q given) # def d_helper(v): R_v_R = np.einsum('ki,k,kj->ij', R, v, R) tr_R_v_R_Cov = np.einsum('ij,dji->d', R_v_R, self.CovX) mu_v_R = np.einsum('ik,k,kj', self.mu, v, R) return (dot(QX, R_v_R, self.X.T) + sumQ * tr_R_v_R_Cov - dot(mu_v_R, self.X.T)) # Compute dH(X) dQ_logHX = utils.random.gaussian_entropy(-2*D/sumQ, 0) # Compute dH(alpha) d_log_b = d_helper(1/b_alpha) dQ_logHalpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0) # Compute d<log p(X|alpha)> dXX_alpha = 2*d_helper(alpha_R) XX_dalpha = -d_helper(np.diag(XmuXmu_R)*alpha_R/b_alpha) d_log_alpha = -d_log_b dQ_logpX = utils.random.gaussian_logpdf(dXX_alpha + XX_dalpha, 0, 0, N*d_log_alpha, 0) # Compute d<log p(alpha)> d_alpha = -d_helper(alpha_R/b_alpha) dQ_logpalpha = utils.random.gamma_logpdf(self.b0*d_alpha, d_log_alpha, self.a0*d_log_alpha, 0, 0) dQ_bound = (0*dQ_logpX + dQ_logpX + dQ_logpalpha + dQ_logHX + dQ_logHalpha ) return (bound, dR_bound, dQ_bound)