Exemple #1
0
    def rotate(self, R, inv=None, logdet=None):

        if inv is not None:
            invR = inv
        else:
            invR = np.linalg.inv(R)

        if logdet is not None:
            logdetR = logdet
        else:
            logdetR = np.linalg.slogdet(R)[1]

        # It would be more efficient and simpler, if you just rotated the
        # moments and didn't touch phi. However, then you would need to call
        # update() before lower_bound_contribution. This is more error-safe.

        #print('rotate debug in gmc', self.phi[0])
        #print(R, invR, np.shape(self.phi[0]))
        # Transform parameters
        self.phi[0] = mvdot(invR.T, self.phi[0])
        self.phi[1] = dot(invR.T, self.phi[1], invR)
        self.phi[2] = dot(invR.T, self.phi[2], invR)

        N = self.dims[0][0]

        if False:
            #print(self.phi[0])
            self._update_moments_and_cgf()
        else:
            # Transform moments and g
            u0 = mvdot(R, self.u[0])
            u1 = dot(R, self.u[1], R.T)
            u2 = dot(R, self.u[2], R.T)
            self.u = [u0, u1, u2]
            self.g -= N * logdetR
 def d_helper(v):
     R_v_R = np.einsum('ki,k,kj->ij', R, v, R)
     tr_R_v_R_Cov = np.einsum('ij,dji->d', R_v_R, self.CovX)
     mu_v_R = np.einsum('ik,k,kj', self.mu, v, R)
     return (dot(QX, R_v_R, self.X.T)
             + sumQ * tr_R_v_R_Cov
             - dot(mu_v_R, self.X.T))
    def rotate(self, R, inv=None, logdet=None):

        if inv is not None:
            invR = inv
        else:
            invR = np.linalg.inv(R)

        if logdet is not None:
            logdetR = logdet
        else:
            logdetR = np.linalg.slogdet(R)[1]

        # It would be more efficient and simpler, if you just rotated the
        # moments and didn't touch phi. However, then you would need to call
        # update() before lower_bound_contribution. This is more error-safe.

        # Transform parameters
        self.phi[0] = linalg.mvdot(invR.T, self.phi[0])
        self.phi[1] = linalg.dot(invR.T, self.phi[1], invR)
        self.phi[2] = linalg.dot(invR.T, self.phi[2], invR)

        N = self.dims[0][0]

        if False:
            self._update_moments_and_cgf()
        else:
            # Transform moments and g
            u0 = linalg.mvdot(R, self.u[0])
            u1 = linalg.dot(R, self.u[1], R.T)
            u2 = linalg.dot(R, self.u[2], R.T)
            self.u = [u0, u1, u2]
            self.g -= N*logdetR
    def _compute_bound(self, R, logdet=None, inv=None, gradient=False):
        
        """
        Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R')

        Assume:
        :math:`p(\mathbf{X}) = \prod^M_{m=1} 
               N(\mathbf{x}_m|0, \mathbf{\Lambda})`
        """

        # TODO/FIXME: X and alpha should NOT contain observed values!! Check
        # that.

        # TODO/FIXME: Allow non-zero prior mean!

        # Assume constant mean and precision matrix over plates..

        # Compute rotated moments
        XX_R = dot(R, self.XX, R.T)

        inv_R = inv
        logdet_R = logdet

        # Compute entropy H(X)
        logH_X = utils.random.gaussian_entropy(-2*self.N*logdet_R, 
                                               0)

        # Compute <log p(X)>
        logp_X = utils.random.gaussian_logpdf(np.vdot(XX_R, self.Lambda),
                                              0,
                                              0,
                                              0,
                                              0)

        # Compute the bound
        bound = logp_X + logH_X

        if gradient:

            # Compute dH(X)
            dlogH_X = utils.random.gaussian_entropy(-2*self.N*inv_R.T,
                                                    0)

            # Compute d<log p(X)>
            dXX = 2*dot(self.Lambda, R, self.XX)
            dlogp_X = utils.random.gaussian_logpdf(dXX,
                                                   0,
                                                   0,
                                                   0,
                                                   0)

            d_bound = dlogp_X + dlogH_X

            return (bound, d_bound)

        else:
            return bound
Exemple #5
0
    def rotate_matrix(self, R1, R2, inv1=None, logdet1=None, inv2=None, logdet2=None, Q=None):
        """
        The vector is reshaped into a matrix by stacking the row vectors.

        Computes R1*X*R2', which is identical to kron(R1,R2)*x (??)

        Note that this is slightly different from the standard Kronecker product
        definition because Numpy stacks row vectors instead of column vectors.

        Parameters
        ----------
        R1 : ndarray
            A matrix from the left
        R2 : ndarray
            A matrix from the right        
        """

        if Q is not None:
            # Rotate moments using Q
            #print("Debug in rotate matrix", np.shape(self.u[0]), self.get_shape(0))
            self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0])
            sumQ = np.sum(Q, axis=0)
            # Rotate natural parameters using Q
            self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1]) 
            self.phi[0] = np.einsum('dij,dj->di', -2*self.phi[1], self.u[0])

        if inv1 is None:
            inv1 = np.linalg.inv(R1)
        if logdet1 is None:
            logdet1 = np.linalg.slogdet(R1)[1]
        if inv2 is None:
            inv2 = np.linalg.inv(R2)
        if logdet2 is None:
            logdet2 = np.linalg.slogdet(R2)[1]

        D1 = np.shape(R1)[0]
        D2 = np.shape(R2)[0]

        # Reshape into matrices
        sh0 = np.shape(self.phi[0])[:-1] + (D1,D2)
        sh1 = np.shape(self.phi[1])[:-2] + (D1,D2,D1,D2)
        phi0 = np.reshape(self.phi[0], sh0)
        phi1 = np.reshape(self.phi[1], sh1)

        # Apply rotations to phi
        #phi0 = dot(inv1, phi0, inv2.T)
        phi0 = dot(inv1.T, phi0, inv2)
        phi1 = np.einsum('...ia,...abcd->...ibcd', inv1.T, phi1)
        phi1 = np.einsum('...ic,...abcd->...abid', inv1.T, phi1)
        phi1 = np.einsum('...ib,...abcd->...aicd', inv2.T, phi1)
        phi1 = np.einsum('...id,...abcd->...abci', inv2.T, phi1)

        # Reshape back into vectors
        self.phi[0] = np.reshape(phi0, self.phi[0].shape)
        self.phi[1] = np.reshape(phi1, self.phi[1].shape)

        # It'd be better to rotate the moments too..

        #g0 = np.sum(np.ones(self.plates)*self.g)
        self._update_moments_and_cgf()
Exemple #6
0
        def cost(r):

            # Make vector-r into matrix-R
            R = np.reshape(r, (self.D, self.D))

            # Compute SVD
            invR = np.linalg.inv(R)
            logdetR = np.linalg.slogdet(R)[1]

            # Compute lower bound terms
            (b1, db1) = self.block1.bound(R, logdet=logdetR, inv=invR)
            (b2, db2) = self.block2.bound(invR.T, logdet=-logdetR, inv=R.T)

            # Apply chain rule for the second gradient:
            # d b(invR.T)
            # = tr(db.T * d(invR.T))
            # = tr(db * d(invR))
            # = -tr(db * invR * (dR) * invR)
            # = -tr(invR * db * invR * dR)
            db2 = -dot(invR.T, db2.T, invR.T)

            # Compute the cost function
            c = -(b1 + b2)
            dc = -(db1 + db2)

            return (c, np.ravel(dc))
Exemple #7
0
    def rotate_matrix(self,
                      R1,
                      R2,
                      inv1=None,
                      logdet1=None,
                      inv2=None,
                      logdet2=None,
                      Q=None):
        """
        The vector is reshaped into a matrix by stacking the row vectors.

        Computes R1*X*R2', which is identical to kron(R1,R2)*x (??)

        Note that this is slightly different from the standard Kronecker product
        definition because Numpy stacks row vectors instead of column vectors.
        """

        if Q is not None:
            # Rotate moments using Q
            #print("Debug in rotate matrix", np.shape(self.u[0]), self.get_shape(0))
            self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0])
            sumQ = np.sum(Q, axis=0)
            # Rotate natural parameters using Q
            self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1])
            self.phi[0] = np.einsum('dij,dj->di', -2 * self.phi[1], self.u[0])

        if inv1 is None:
            inv1 = np.linalg.inv(R1)
        if logdet1 is None:
            logdet1 = np.linalg.slogdet(R1)[1]
        if inv2 is None:
            inv2 = np.linalg.inv(R2)
        if logdet2 is None:
            logdet2 = np.linalg.slogdet(R2)[1]

        D1 = np.shape(R1)[0]
        D2 = np.shape(R2)[0]

        # Reshape into matrices
        sh0 = np.shape(self.phi[0])[:-1] + (D1, D2)
        sh1 = np.shape(self.phi[1])[:-2] + (D1, D2, D1, D2)
        phi0 = np.reshape(self.phi[0], sh0)
        phi1 = np.reshape(self.phi[1], sh1)

        # Apply rotations to phi
        #phi0 = dot(inv1, phi0, inv2.T)
        phi0 = dot(inv1.T, phi0, inv2)
        phi1 = np.einsum('...ia,...abcd->...ibcd', inv1.T, phi1)
        phi1 = np.einsum('...ic,...abcd->...abid', inv1.T, phi1)
        phi1 = np.einsum('...ib,...abcd->...aicd', inv2.T, phi1)
        phi1 = np.einsum('...id,...abcd->...abci', inv2.T, phi1)

        # Reshape back into vectors
        self.phi[0] = np.reshape(phi0, self.phi[0].shape)
        self.phi[1] = np.reshape(phi1, self.phi[1].shape)

        # It'd be better to rotate the moments too..

        #g0 = np.sum(np.ones(self.plates)*self.g)
        self._update_moments_and_cgf()
Exemple #8
0
        def cost(r):

            # Make vector-r into matrix-R
            R = np.reshape(r, (self.D, self.D))

            # Compute SVD
            invR = np.linalg.inv(R)
            logdetR = np.linalg.slogdet(R)[1]

            # Compute lower bound terms
            (b1, db1) = self.block1.bound(R, logdet=logdetR, inv=invR)
            (b2, db2) = self.block2.bound(invR.T, logdet=-logdetR, inv=R.T)

            # Apply chain rule for the second gradient:
            # d b(invR.T)
            # = tr(db.T * d(invR.T))
            # = tr(db * d(invR))
            # = -tr(db * invR * (dR) * invR)
            # = -tr(invR * db * invR * dR)
            db2 = -dot(invR.T, db2.T, invR.T)

            # Compute the cost function
            c = -(b1 + b2)
            dc = -(db1 + db2)

            return (c, np.ravel(dc))
Exemple #9
0
    def _compute_bound(self, R, logdet=None, inv=None, gradient=False):
        """
        Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R')

        Assume:
        :math:`p(\mathbf{X}) = \prod^M_{m=1} 
               N(\mathbf{x}_m|0, \mathbf{\Lambda})`
        """

        # TODO/FIXME: X and alpha should NOT contain observed values!! Check
        # that.

        # TODO/FIXME: Allow non-zero prior mean!

        # Assume constant mean and precision matrix over plates..

        # Compute rotated moments
        XX_R = dot(R, self.XX, R.T)

        inv_R = inv
        logdet_R = logdet

        # Compute entropy H(X)
        logH_X = utils.random.gaussian_entropy(-2 * self.N * logdet_R, 0)

        # Compute <log p(X)>
        logp_X = utils.random.gaussian_logpdf(np.vdot(XX_R, self.Lambda), 0, 0,
                                              0, 0)

        # Compute the bound
        bound = logp_X + logH_X

        if gradient:

            # Compute dH(X)
            dlogH_X = utils.random.gaussian_entropy(-2 * self.N * inv_R.T, 0)

            # Compute d<log p(X)>
            dXX = 2 * dot(self.Lambda, R, self.XX)
            dlogp_X = utils.random.gaussian_logpdf(dXX, 0, 0, 0, 0)

            d_bound = dlogp_X + dlogH_X

            return (bound, d_bound)

        else:
            return bound
    def setup(self):
        """
        This method should be called just before optimization.
        """
        
        # Get moments of X
        (X, XnXn, XpXn) = self.X_node.get_moments()
        XpXp = XnXn[:-1,:,:]

        # Get moments of A (and make sure they include time axis)
        (A, AA) = self.X_node.parents[2].get_moments()
        A = utils.utils.atleast_nd(A, 3)
        AA = utils.utils.atleast_nd(AA, 4)
        CovA = AA - A[...,:,np.newaxis]*A[...,np.newaxis,:]

        #
        # Expectations with respect to X
        #
        
        self.X0 = X[0,:]
        self.X0X0 = XnXn[0,:,:]
        #self.XpXp = np.sum(XpXp, axis=0)
        self.XnXn = np.sum(XnXn[1:,:,:], axis=0)
        #self.XpXn = np.sum(XpXn, axis=0)

        #
        # Expectations with respect to A and X
        #

        # Compute: \sum_n <A_n> <x_{n-1} x_n^T>
        self.A_XpXn = np.sum(dot(A, XpXn),
                             axis=0)

        # Compute: \sum_n <A_n> <x_{n-1} x_{n-1}^T> <A_n>^T
        self.A_XpXp_A = np.sum(dot(A, XpXp, utils.utils.T(A)),
                               axis=0)

        # Compute: \sum_n tr(CovA_n <x_{n-1} x_{n-1}^T>)
        self.CovA_XpXp = np.einsum('ndij,nij->d', CovA, XpXp)
        
        # Get moments of the fixed parameter nodes
        mu = self.X_node.parents[0].get_moments()[0]
        self.Lambda = self.X_node.parents[1].get_moments()[0]
        self.Lambda_mu_X0 = np.outer(np.dot(self.Lambda,mu), self.X0)

        self.A_rotator.setup(rotate_plates=True)
Exemple #11
0
    def setup(self):
        """
        This method should be called just before optimization.
        """

        # Get moments of X
        (X, XnXn, XpXn) = self.X_node.get_moments()
        XpXp = XnXn[:-1, :, :]

        # Get moments of A (and make sure they include time axis)
        (A, AA) = self.X_node.parents[2].get_moments()
        A = utils.utils.atleast_nd(A, 3)
        AA = utils.utils.atleast_nd(AA, 4)
        CovA = AA - A[..., :, np.newaxis] * A[..., np.newaxis, :]

        #
        # Expectations with respect to X
        #

        self.X0 = X[0, :]
        self.X0X0 = XnXn[0, :, :]
        #self.XpXp = np.sum(XpXp, axis=0)
        self.XnXn = np.sum(XnXn[1:, :, :], axis=0)
        #self.XpXn = np.sum(XpXn, axis=0)

        #
        # Expectations with respect to A and X
        #

        # Compute: \sum_n <A_n> <x_{n-1} x_n^T>
        self.A_XpXn = np.sum(dot(A, XpXn), axis=0)

        # Compute: \sum_n <A_n> <x_{n-1} x_{n-1}^T> <A_n>^T
        self.A_XpXp_A = np.sum(dot(A, XpXp, utils.utils.T(A)), axis=0)

        # Compute: \sum_n tr(CovA_n <x_{n-1} x_{n-1}^T>)
        self.CovA_XpXp = np.einsum('ndij,nij->d', CovA, XpXp)

        # Get moments of the fixed parameter nodes
        mu = self.X_node.parents[0].get_moments()[0]
        self.Lambda = self.X_node.parents[1].get_moments()[0]
        self.Lambda_mu_X0 = np.outer(np.dot(self.Lambda, mu), self.X0)

        self.A_rotator.setup(rotate_plates=True)
Exemple #12
0
    def rotate(self, R, inv=None, logdet=None, Q=None):

        raise NotImplementedError()

        if inv is not None:
            invR = inv
        else:
            invR = np.linalg.inv(R)

        if logdet is not None:
            logdetR = logdet
        else:
            logdetR = np.linalg.slogdet(R)[1]

        # It would be more efficient and simpler, if you just rotated the
        # moments and didn't touch phi. However, then you would need to call
        # update() before lower_bound_contribution. This is more error-safe.

        # Rotate plates, if plate rotation matrix is given. Assume that there's
        # only one plate-axis

        #logdet_old = np.sum(utils.linalg.logdet_cov(-2*self.phi[1]))
        if Q is not None:
            # Rotate moments using Q
            self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0])
            sumQ = np.sum(Q, axis=0)
            # Rotate natural parameters using Q
            self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1]) 
            self.phi[0] = np.einsum('dij,dj->di', -2*self.phi[1], self.u[0])

        # Transform parameters using R
        self.phi[0] = mvdot(invR.T, self.phi[0])
        self.phi[1] = dot(invR.T, self.phi[1], invR)

        if Q is not None:
            self._update_moments_and_cgf()
        else:
            # Transform moments and g using R
            self.u[0] = mvdot(R, self.u[0])
            self.u[1] = dot(R, self.u[1], R.T)
            self.g -= logdetR
Exemple #13
0
    def rotate(self, R, inv=None, logdet=None, Q=None):

        if inv is not None:
            invR = inv
        else:
            invR = np.linalg.inv(R)

        if logdet is not None:
            logdetR = logdet
        else:
            logdetR = np.linalg.slogdet(R)[1]

        # It would be more efficient and simpler, if you just rotated the
        # moments and didn't touch phi. However, then you would need to call
        # update() before lower_bound_contribution. This is more error-safe.

        # Rotate plates, if plate rotation matrix is given. Assume that there's
        # only one plate-axis

        #logdet_old = np.sum(utils.linalg.logdet_cov(-2*self.phi[1]))
        if Q is not None:
            # Rotate moments using Q
            self.u[0] = np.einsum('ik,kj->ij', Q, self.u[0])
            sumQ = np.sum(Q, axis=0)
            # Rotate natural parameters using Q
            self.phi[1] = np.einsum('d,dij->dij', sumQ**(-2), self.phi[1])
            self.phi[0] = np.einsum('dij,dj->di', -2 * self.phi[1], self.u[0])

        # Transform parameters using R
        self.phi[0] = mvdot(invR.T, self.phi[0])
        self.phi[1] = dot(invR.T, self.phi[1], invR)

        if Q is not None:
            self._update_moments_and_cgf()
        else:
            # Transform moments and g using R
            self.u[0] = mvdot(R, self.u[0])
            self.u[1] = dot(R, self.u[1], R.T)
            self.g -= logdetR
Exemple #14
0
    def bound(self, R, logdet=None, inv=None):

        if inv is None:
            inv = np.linalg.inv(R)
        if logdet is None:
            logdet = np.linalg.slogdet(R)[1]

        (bound_X, d_bound_X) = self._compute_bound(R, logdet=logdet, inv=inv, gradient=True)

        # Compute cost and gradient from A
        (bound_A, dR_bound_A, dQ_bound_A) = self.A_rotator.bound(inv.T, inv=R.T, logdet=-logdet, Q=R)
        dR_bound_A = -dot(inv.T, dR_bound_A.T, inv.T)

        # Compute the bound
        bound = bound_X + bound_A
        d_bound = d_bound_X + dR_bound_A + dQ_bound_A

        return (bound, d_bound)
    def bound(self, R, logdet=None, inv=None):
        (bound_X, d_bound_X) = self._compute_bound(R,
                                                   logdet=logdet,
                                                   inv=inv,
                                                   gradient=True)
        
        # Compute cost and gradient from A
        (bound_A, dR_bound_A, dQ_bound_A) = self.A_rotator.bound(inv.T, 
                                                                 inv=R.T,
                                                                 logdet=-logdet,
                                                                 Q=R)
        # TODO/FIXME: Also apply the gradient of invR.T to the result
        dR_bound_A = -dot(inv.T, dR_bound_A.T, inv.T)

        # Compute the bound
        bound = bound_X + bound_A
        d_bound = d_bound_X + dR_bound_A + dQ_bound_A

        return (bound, d_bound)
Exemple #16
0
    def bound(self, R, logdet=None, inv=None):
        (bound_X, d_bound_X) = self._compute_bound(R,
                                                   logdet=logdet,
                                                   inv=inv,
                                                   gradient=True)

        # Compute cost and gradient from A
        (bound_A, dR_bound_A,
         dQ_bound_A) = self.A_rotator.bound(inv.T,
                                            inv=R.T,
                                            logdet=-logdet,
                                            Q=R)
        # TODO/FIXME: Also apply the gradient of invR.T to the result
        dR_bound_A = -dot(inv.T, dR_bound_A.T, inv.T)

        # Compute the bound
        bound = bound_X + bound_A
        d_bound = d_bound_X + dR_bound_A + dQ_bound_A

        return (bound, d_bound)
    def _compute_bound(self, R, logdet=None, inv=None, gradient=False):
        """
        Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R')

        Assume:
        :math:`p(\mathbf{X}) = \prod^M_{m=1} 
               N(\mathbf{x}_m|0, \mathbf{\Lambda})`

        Assume unit innovation noise covariance.
        """

        # TODO/FIXME: X and alpha should NOT contain observed values!! Check
        # that.

        # TODO/FIXME: Allow non-zero prior mean!

        # Assume constant mean and precision matrix over plates..

        invR = inv
        logdetR = logdet

        # Transform moments of X:
        # Transform moments of A:
        Lambda_R_X0X0 = dot(self.Lambda, R, self.X0X0)
        R_XnXn = dot(R, self.XnXn)
        RA_XpXp_A = dot(R, self.A_XpXp_A)
        sumr = np.sum(R, axis=0)
        R_CovA_XpXp = sumr * self.CovA_XpXp

        ## if not gradient:
        ##     print("DEBUG TOO", dot(R_XnXn,R.T))

        # Compute entropy H(X)
        logH_X = utils.random.gaussian_entropy(-2*self.N*logdetR, 
                                               0)

        # Compute <log p(X)>
        yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T)
        yz = tracedot(dot(R,self.A_XpXn),R.T) + tracedot(self.Lambda_mu_X0, R.T)
        zz = tracedot(RA_XpXp_A, R.T) + np.dot(R_CovA_XpXp, sumr) #RR_CovA_XpXp
        logp_X = utils.random.gaussian_logpdf(yy,
                                              yz,
                                              zz,
                                              0,
                                              0)

        # Compute dH(X)
        dlogH_X = utils.random.gaussian_entropy(-2*self.N*invR.T,
                                                0)

        # Compute the bound
        bound = (0
                 + logp_X 
                 + logH_X
                 )

        # TODO/FIXME: There might be a very small error in the gradient?
        
        if gradient:
            # Compute d<log p(X)>
            dyy = 2 * (R_XnXn + Lambda_R_X0X0)
            dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0
            dzz = 2 * (RA_XpXp_A + R_CovA_XpXp)
            dlogp_X = utils.random.gaussian_logpdf(dyy,
                                                   dyz,
                                                   dzz,
                                                   0,
                                                   0)

            d_bound = (0*dlogp_X
                       + dlogp_X 
                       + dlogH_X
                       )

            return (bound, d_bound)

        else:
            return bound
Exemple #18
0
 def d_helper(v):
     R_v_R = np.einsum('ki,k,kj->ij', R, v, R)
     tr_R_v_R_Cov = np.einsum('ij,dji->d', R_v_R, self.CovX)
     mu_v_R = np.einsum('ik,k,kj', self.mu, v, R)
     return (dot(QX, R_v_R, self.X.T) + sumQ * tr_R_v_R_Cov -
             dot(mu_v_R, self.X.T))
Exemple #19
0
    def _compute_bound(self, R, logdet=None, inv=None, gradient=False, terms=False):
        """
        Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R')

        Assume:
        :math:`p(\mathbf{X}) = \prod^M_{m=1} 
               N(\mathbf{x}_m|0, \mathbf{\Lambda})`

        Assume unit innovation noise covariance.
        """

        # TODO/FIXME: X and alpha should NOT contain observed values!! Check
        # that.

        # Assume constant mean and precision matrix over plates..

        if inv is None:
            invR = np.linalg.inv(R)
        else:
            invR = inv

        if logdet is None:
            logdetR = np.linalg.slogdet(R)[1]
        else:
            logdetR = logdet

        # Transform moments of X and A:

        Lambda_R_X0X0 = sum_to_plates(dot(self.Lambda, R, self.X0X0), (), plates_from=self.X_node.plates, ndim=2)
        R_XnXn = dot(R, self.XnXn)
        RA_XpXp_A = dot(R, self.A_XpXp_A)
        sumr = np.sum(R, axis=0)
        R_CovA_XpXp = sumr * self.CovA_XpXp

        # Compute entropy H(X)
        M = self.N * np.prod(self.X_node.plates)  # total number of rotated vectors
        logH_X = random.gaussian_entropy(-2 * M * logdetR, 0)

        # Compute <log p(X)>
        yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T)
        yz = tracedot(dot(R, self.A_XpXn), R.T) + tracedot(self.Lambda_mu_X0, R.T)
        zz = tracedot(RA_XpXp_A, R.T) + np.einsum("...k,...k->...", R_CovA_XpXp, sumr)
        logp_X = random.gaussian_logpdf(yy, yz, zz, 0, 0)

        # Compute the bound
        if terms:
            bound = {self.X_node: logp_X + logH_X}
        else:
            bound = logp_X + logH_X

        if not gradient:
            return bound

        # Compute dH(X)
        dlogH_X = random.gaussian_entropy(-2 * M * invR.T, 0)

        # Compute d<log p(X)>
        dyy = 2 * (R_XnXn + Lambda_R_X0X0)
        dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0
        dzz = 2 * (RA_XpXp_A + R_CovA_XpXp[None, :])
        dlogp_X = random.gaussian_logpdf(dyy, dyz, dzz, 0, 0)

        if terms:
            d_bound = {self.X_node: dlogp_X + dlogH_X}
        else:
            d_bound = +dlogp_X + dlogH_X

        return (bound, d_bound)
Exemple #20
0
    def _compute_bound(self, R, logdet=None, inv=None, gradient=False):
        """
        Rotate q(X) as X->RX: q(X)=N(R*mu, R*Cov*R')

        Assume:
        :math:`p(\mathbf{X}) = \prod^M_{m=1} 
               N(\mathbf{x}_m|0, \mathbf{\Lambda})`

        Assume unit innovation noise covariance.
        """

        # TODO/FIXME: X and alpha should NOT contain observed values!! Check
        # that.

        # TODO/FIXME: Allow non-zero prior mean!

        # Assume constant mean and precision matrix over plates..

        invR = inv
        logdetR = logdet

        # Transform moments of X:
        # Transform moments of A:
        Lambda_R_X0X0 = dot(self.Lambda, R, self.X0X0)
        R_XnXn = dot(R, self.XnXn)
        RA_XpXp_A = dot(R, self.A_XpXp_A)
        sumr = np.sum(R, axis=0)
        R_CovA_XpXp = sumr * self.CovA_XpXp

        ## if not gradient:
        ##     print("DEBUG TOO", dot(R_XnXn,R.T))

        # Compute entropy H(X)
        logH_X = utils.random.gaussian_entropy(-2 * self.N * logdetR, 0)

        # Compute <log p(X)>
        yy = tracedot(R_XnXn, R.T) + tracedot(Lambda_R_X0X0, R.T)
        yz = tracedot(dot(R, self.A_XpXn), R.T) + tracedot(
            self.Lambda_mu_X0, R.T)
        zz = tracedot(RA_XpXp_A, R.T) + np.dot(R_CovA_XpXp,
                                               sumr)  #RR_CovA_XpXp
        logp_X = utils.random.gaussian_logpdf(yy, yz, zz, 0, 0)

        # Compute dH(X)
        dlogH_X = utils.random.gaussian_entropy(-2 * self.N * invR.T, 0)

        # Compute the bound
        bound = (0 + logp_X + logH_X)

        # TODO/FIXME: There might be a very small error in the gradient?

        if gradient:
            # Compute d<log p(X)>
            dyy = 2 * (R_XnXn + Lambda_R_X0X0)
            dyz = dot(R, self.A_XpXn + self.A_XpXn.T) + self.Lambda_mu_X0
            dzz = 2 * (RA_XpXp_A + R_CovA_XpXp)
            dlogp_X = utils.random.gaussian_logpdf(dyy, dyz, dzz, 0, 0)

            d_bound = (0 * dlogp_X + dlogp_X + dlogH_X)

            return (bound, d_bound)

        else:
            return bound
Exemple #21
0
    def _compute_bound(self, R, logdet=None, inv=None, Q=None, gradient=False):
        """
        Rotate q(X) and q(alpha).

        Assume:
        p(X|alpha) = prod_m N(x_m|0,diag(alpha))
        p(alpha) = prod_d G(a_d,b_d)
        """

        # TODO/FIXME: X and alpha should NOT contain observed values!! Check that.

        #
        # Transform the distributions and moments
        #

        # Compute rotated second moment
        if Q is not None:
            # Rotate plates
            sumQ = np.sum(Q, axis=0)
            QX = np.einsum('ik,kj->ij', Q, self.X)
            XX = (np.einsum('ki,kj->ij', QX, QX) +
                  np.einsum('d,dij->ij', sumQ**2, self.CovX))
            logdet_Q = np.sum(np.log(np.abs(sumQ)))
            X = QX
            X_mu = utils.utils.sum_multiply(X[..., :, np.newaxis],
                                            self.mu[..., np.newaxis, :],
                                            axis=(-1, -2),
                                            sumaxis=False,
                                            keepdims=False)
        else:
            X = self.X
            XX = self.XX
            logdet_Q = 0
            X_mu = self.Xmu

        # TODO/FIXME: X can be summed to the plates of mu!?
        RX_mu = dot(R, X_mu)

        XmuXmu_R = (dot(R, XX, R.T) - RX_mu - RX_mu.T + self.mumu)

        # Compute q(alpha)
        a_alpha = self.a
        b_alpha = self.b0 + 0.5 * np.diag(XmuXmu_R)
        #b_alpha = self.b0 + 0.5*np.diag(RXXR)
        alpha_R = a_alpha / b_alpha
        logalpha_R = -np.log(b_alpha)  # + const

        logdet_R = logdet
        inv_R = inv

        N = self.N
        D = np.shape(R)[0]

        #
        # Compute the cost
        #

        # Compute entropy H(X)
        logH_X = utils.random.gaussian_entropy(
            -2 * N * logdet_R - 2 * D * logdet_Q, 0)

        # Compute entropy H(alpha)
        logH_alpha = utils.random.gamma_entropy(0, np.sum(np.log(b_alpha)), 0,
                                                0, 0)

        # Compute <log p(X|alpha)>
        logp_X = utils.random.gaussian_logpdf(
            np.einsum('ii,i', XmuXmu_R, alpha_R), 0, 0, N * np.sum(logalpha_R),
            0)

        # Compute <log p(alpha)>
        logp_alpha = utils.random.gamma_logpdf(self.b0 * np.sum(alpha_R),
                                               np.sum(logalpha_R),
                                               self.a0 * np.sum(logalpha_R), 0,
                                               0)

        # Compute the bound
        bound = (0 + logp_X + logp_alpha + logH_X + logH_alpha)

        if not gradient:
            return bound

        #
        # Compute the gradient with respect R
        #

        # Compute dH(X)
        dlogH_X = utils.random.gaussian_entropy(-2 * N * inv_R.T, 0)

        # Compute dH(alpha)
        dXmuXmu_R = 2 * np.dot(R, XX) - 2 * X_mu.T
        d_log_b = np.einsum('i,ij->ij', 1 / b_alpha, dXmuXmu_R)
        dlogH_alpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0)

        # Compute d<log p(X|alpha)>
        # TODO/FIXME: Fix these gradients!
        d_log_alpha = -d_log_b
        dXmuXmu_alpha = np.einsum('i,ij->ij', alpha_R, dXmuXmu_R)
        XmuXmu_dalpha = -np.einsum('i,i,ii,ij->ij', alpha_R, 1 / b_alpha,
                                   XmuXmu_R, dXmuXmu_R)
        dlogp_X = utils.random.gaussian_logpdf(dXmuXmu_alpha + XmuXmu_dalpha,
                                               0, 0, N * d_log_alpha, 0)

        # Compute d<log p(alpha)>
        d_alpha = -np.einsum('i,i,ij->ij', alpha_R, 1 / b_alpha, dXmuXmu_R)
        dlogp_alpha = utils.random.gamma_logpdf(self.b0 * d_alpha, d_log_alpha,
                                                self.a0 * d_log_alpha, 0, 0)

        dR_bound = (0 * dlogp_X + dlogp_X + dlogp_alpha + dlogH_X +
                    dlogH_alpha)

        if Q is None:
            return (bound, dR_bound)

        #
        # Compute the gradient with respect to Q (if Q given)
        #

        def d_helper(v):
            R_v_R = np.einsum('ki,k,kj->ij', R, v, R)
            tr_R_v_R_Cov = np.einsum('ij,dji->d', R_v_R, self.CovX)
            mu_v_R = np.einsum('ik,k,kj', self.mu, v, R)
            return (dot(QX, R_v_R, self.X.T) + sumQ * tr_R_v_R_Cov -
                    dot(mu_v_R, self.X.T))

        # Compute dH(X)
        dQ_logHX = utils.random.gaussian_entropy(-2 * D / sumQ, 0)

        # Compute dH(alpha)
        d_log_b = d_helper(1 / b_alpha)
        dQ_logHalpha = utils.random.gamma_entropy(0, d_log_b, 0, 0, 0)

        # Compute d<log p(X|alpha)>
        dXX_alpha = 2 * d_helper(alpha_R)
        XX_dalpha = -d_helper(np.diag(XmuXmu_R) * alpha_R / b_alpha)
        d_log_alpha = -d_log_b
        dQ_logpX = utils.random.gaussian_logpdf(dXX_alpha + XX_dalpha, 0, 0,
                                                N * d_log_alpha, 0)

        # Compute d<log p(alpha)>
        d_alpha = -d_helper(alpha_R / b_alpha)
        dQ_logpalpha = utils.random.gamma_logpdf(self.b0 * d_alpha,
                                                 d_log_alpha,
                                                 self.a0 * d_log_alpha, 0, 0)

        dQ_bound = (0 * dQ_logpX + dQ_logpX + dQ_logpalpha + dQ_logHX +
                    dQ_logHalpha)

        return (bound, dR_bound, dQ_bound)
    def _compute_bound(self, R, logdet=None, inv=None, Q=None, gradient=False):
        """
        Rotate q(X) and q(alpha).

        Assume:
        p(X|alpha) = prod_m N(x_m|0,diag(alpha))
        p(alpha) = prod_d G(a_d,b_d)
        """

        # TODO/FIXME: X and alpha should NOT contain observed values!! Check that.

        #
        # Transform the distributions and moments
        #

        # Compute rotated second moment
        if Q is not None:
            # Rotate plates
            sumQ = np.sum(Q, axis=0)
            QX = np.einsum('ik,kj->ij', Q, self.X)
            XX = (np.einsum('ki,kj->ij', QX, QX)
                  + np.einsum('d,dij->ij', sumQ**2, self.CovX))
            logdet_Q = np.sum(np.log(np.abs(sumQ)))
            X = QX
            X_mu = utils.utils.sum_multiply(X[...,:,np.newaxis],
                                            self.mu[...,np.newaxis,:],
                                            axis=(-1,-2),
                                            sumaxis=False,
                                            keepdims=False)
        else:
            X = self.X
            XX = self.XX
            logdet_Q = 0
            X_mu = self.Xmu

        # TODO/FIXME: X can be summed to the plates of mu!?
        RX_mu = dot(R, X_mu)

        XmuXmu_R = (dot(R, XX, R.T) - RX_mu - RX_mu.T + self.mumu)

        # Compute q(alpha)
        a_alpha = self.a
        b_alpha = self.b0 + 0.5*np.diag(XmuXmu_R)
        #b_alpha = self.b0 + 0.5*np.diag(RXXR)
        alpha_R = a_alpha / b_alpha
        logalpha_R = - np.log(b_alpha) # + const

        logdet_R = logdet
        inv_R = inv

        N = self.N
        D = np.shape(R)[0]

        #
        # Compute the cost
        #
        
        # Compute entropy H(X)
        logH_X = utils.random.gaussian_entropy(-2*N*logdet_R - 2*D*logdet_Q, 
                                               0)

        # Compute entropy H(alpha)
        logH_alpha = utils.random.gamma_entropy(0,
                                                np.sum(np.log(b_alpha)),
                                                0,
                                                0,
                                                0)

        # Compute <log p(X|alpha)>
        logp_X = utils.random.gaussian_logpdf(np.einsum('ii,i', XmuXmu_R, alpha_R),
                                              0,
                                              0,
                                              N*np.sum(logalpha_R),
                                              0)

        # Compute <log p(alpha)>
        logp_alpha = utils.random.gamma_logpdf(self.b0*np.sum(alpha_R),
                                               np.sum(logalpha_R),
                                               self.a0*np.sum(logalpha_R),
                                               0,
                                               0)

        # Compute the bound
        bound = (0
        + logp_X
        + logp_alpha
        + logH_X
        + logH_alpha
            )

        if not gradient:
            return bound

        #
        # Compute the gradient with respect R
        #

        # Compute dH(X)
        dlogH_X = utils.random.gaussian_entropy(-2*N*inv_R.T,
                                                0)

        # Compute dH(alpha)
        dXmuXmu_R = 2*np.dot(R, XX) - 2*X_mu.T
        d_log_b = np.einsum('i,ij->ij', 1/b_alpha, dXmuXmu_R)
        dlogH_alpha = utils.random.gamma_entropy(0,
                                                 d_log_b,
                                                 0,
                                                 0,
                                                 0)

        # Compute d<log p(X|alpha)>
        # TODO/FIXME: Fix these gradients!
        d_log_alpha = -d_log_b
        dXmuXmu_alpha = np.einsum('i,ij->ij', alpha_R, dXmuXmu_R)
        XmuXmu_dalpha = -np.einsum('i,i,ii,ij->ij', alpha_R, 1/b_alpha, XmuXmu_R, dXmuXmu_R)
        dlogp_X = utils.random.gaussian_logpdf(dXmuXmu_alpha + XmuXmu_dalpha,
                                               0,
                                               0,
                                               N*d_log_alpha,
                                               0)

        # Compute d<log p(alpha)>
        d_alpha = -np.einsum('i,i,ij->ij', alpha_R, 1/b_alpha, dXmuXmu_R)
        dlogp_alpha = utils.random.gamma_logpdf(self.b0*d_alpha,
                                                d_log_alpha,
                                                self.a0*d_log_alpha,
                                                0,
                                                0)

        dR_bound = (0*dlogp_X
        + dlogp_X
        + dlogp_alpha
        + dlogH_X
        + dlogH_alpha
            )

        if Q is None:
            return (bound, dR_bound)

        #
        # Compute the gradient with respect to Q (if Q given)
        #

        def d_helper(v):
            R_v_R = np.einsum('ki,k,kj->ij', R, v, R)
            tr_R_v_R_Cov = np.einsum('ij,dji->d', R_v_R, self.CovX)
            mu_v_R = np.einsum('ik,k,kj', self.mu, v, R)
            return (dot(QX, R_v_R, self.X.T)
                    + sumQ * tr_R_v_R_Cov
                    - dot(mu_v_R, self.X.T))
            

        # Compute dH(X)
        dQ_logHX = utils.random.gaussian_entropy(-2*D/sumQ,
                                                 0)

        # Compute dH(alpha)
        d_log_b = d_helper(1/b_alpha)
        dQ_logHalpha = utils.random.gamma_entropy(0,
                                                  d_log_b,
                                                  0,
                                                  0,
                                                  0)

        # Compute d<log p(X|alpha)>
        dXX_alpha = 2*d_helper(alpha_R)
        XX_dalpha = -d_helper(np.diag(XmuXmu_R)*alpha_R/b_alpha)
        d_log_alpha = -d_log_b
        dQ_logpX = utils.random.gaussian_logpdf(dXX_alpha + XX_dalpha,
                                                0,
                                                0,
                                                N*d_log_alpha,
                                                0)


        # Compute d<log p(alpha)>
        d_alpha = -d_helper(alpha_R/b_alpha)
        dQ_logpalpha = utils.random.gamma_logpdf(self.b0*d_alpha,
                                                 d_log_alpha,
                                                 self.a0*d_log_alpha,
                                                 0,
                                                 0)

        dQ_bound = (0*dQ_logpX
        + dQ_logpX
        + dQ_logpalpha
        + dQ_logHX
        + dQ_logHalpha
            )

        return (bound, dR_bound, dQ_bound)