Example #1
0
def gaussian_mixture_logpdf(x, w, mu, Sigma):
    # Shape(x)      = (N, D)
    # Shape(w)      = (K,)
    # Shape(mu)     = (K, D)
    # Shape(Sigma)  = (K, D, D)
    # Shape(result) = (N,)

    # Dimensionality
    D = np.shape(x)[-1]

    # Cholesky decomposition of the covariance matrix
    U = linalg.chol(Sigma)

    # Reshape x:
    # Shape(x)     = (N, 1, D)
    x = np.expand_dims(x, axis=-2)

    # (x-mu) and (x-mu)'*inv(Sigma)*(x-mu):
    # Shape(v)     = (N, K, D)
    # Shape(z)     = (N, K)
    v = x - mu
    z = np.einsum('...i,...i', v, linalg.chol_solve(U, v))

    # Log-determinant of Sigma:
    # Shape(ldet)  = (K,)
    ldet = linalg.chol_logdet(U)

    # Compute log pdf for each cluster:
    # Shape(lpdf)  = (N, K)
    lpdf = misc.gaussian_logpdf(z, 0, 0, ldet, D)
Example #2
0
def gaussian_mixture_logpdf(x, w, mu, Sigma):
    # Shape(x)      = (N, D)
    # Shape(w)      = (K,)
    # Shape(mu)     = (K, D)
    # Shape(Sigma)  = (K, D, D)
    # Shape(result) = (N,)

    # Dimensionality
    D = np.shape(x)[-1]

    # Cholesky decomposition of the covariance matrix
    U = linalg.chol(Sigma)

    # Reshape x:
    # Shape(x)     = (N, 1, D)
    x = np.expand_dims(x, axis=-2)

    # (x-mu) and (x-mu)'*inv(Sigma)*(x-mu):
    # Shape(v)     = (N, K, D)
    # Shape(z)     = (N, K)
    v = x - mu
    z = np.einsum('...i,...i', v, linalg.chol_solve(U, v))

    # Log-determinant of Sigma:
    # Shape(ldet)  = (K,)
    ldet = linalg.chol_logdet(U)

    # Compute log pdf for each cluster:
    # Shape(lpdf)  = (N, K)
    lpdf = misc.gaussian_logpdf(z, 0, 0, ldet, D)
Example #3
0
    def lower_bound_contribution(self, gradient=False):

        # Get moment functions from parents
        m = self.parents[0].message_to_child(gradient=gradient)
        k = self.parents[1].message_to_child(gradient=gradient)
        if self.parents[2]:
            k_sparse = self.parents[2].message_to_child(gradient=gradient)
        else:
            k_sparse = None
        if self.parents[3]:
            pseudoinputs = self.parents[3].message_to_child(gradient=gradient)
            #pseudoinputs = self.parents[3].message_to_child(gradient=gradient)[0]
        else:
            pseudoinputs = None
        ## m = self.parents[0].message_to_child(gradient=gradient)[0]
        ## k = self.parents[1].message_to_child(gradient=gradient)[0]

        # Compute the parameters (covariance matrices etc) using
        # parents' moment functions
        DKs_xx = []
        DKd_xx = []
        DKd_xp = []
        DKd_pp = []
        Dxp = []
        Dmu = []
        if gradient:
            # FIXME: We are ignoring the covariance of mu now..
            ((mu, _), Dmu) = m(self.x, gradient=True)
            ## if k_sparse:
            ##     ((Ks_xx,), DKs_xx) = k_sparse(self.x, self.x, gradient=True)
            if pseudoinputs:
                ((Ks_xx, ), DKs_xx) = k_sparse(self.x, self.x, gradient=True)
                ((xp, ), Dxp) = pseudoinputs
                ((Kd_pp, ), DKd_pp) = k(xp, xp, gradient=True)
                ((Kd_xp, ), DKd_xp) = k(self.x, xp, gradient=True)
            else:
                ((K_xx, ), DKd_xx) = k(self.x, self.x, gradient=True)
                if k_sparse:
                    ((Ks_xx, ), DKs_xx) = k_sparse(self.x,
                                                   self.x,
                                                   gradient=True)
                    try:
                        K_xx += Ks_xx
                    except:
                        K_xx = K_xx + Ks_xx

        else:
            # FIXME: We are ignoring the covariance of mu now..
            (mu, _) = m(self.x)
            ## if k_sparse:
            ##     (Ks_xx,) = k_sparse(self.x, self.x)
            if pseudoinputs:
                (Ks_xx, ) = k_sparse(self.x, self.x)
                (xp, ) = pseudoinputs
                (Kd_pp, ) = k(xp, xp)
                (Kd_xp, ) = k(self.x, xp)
            else:
                (K_xx, ) = k(self.x, self.x)
                if k_sparse:
                    (Ks_xx, ) = k_sparse(self.x, self.x)
                    try:
                        K_xx += Ks_xx
                    except:
                        K_xx = K_xx + Ks_xx

        mu = mu[0]
        #K = K[0]

        # Log pdf
        if self.observed:
            ## Log pdf for directly observed GP

            f0 = self.f - mu

            #print('hereiam')
            #print(K)
            if pseudoinputs:

                ## Pseudo-input approximation

                # Decompose the full-rank sparse/noise covariance matrix
                try:
                    Us_xx = utils.cholesky(Ks_xx)
                except linalg.LinAlgError:
                    print('Noise/sparse covariance not positive definite')
                    return -np.inf

                # Use Woodbury-Sherman-Morrison formula with the
                # following notation:
                #
                # y2 = f0' * inv(Kd_xp*inv(Kd_pp)*Kd_xp' + Ks_xx) * f0
                #
                # z = Ks_xx \ f0
                # Lambda = Kd_pp + Kd_xp'*inv(Ks_xx)*Kd_xp
                # nu = inv(Lambda) * (Kd_xp' * (Ks_xx \ f0))
                # rho = Kd_xp * inv(Lambda) * (Kd_xp' * (Ks_xx \ f0))
                #
                # y2 = f0' * z - z' * rho

                z = Us_xx.solve(f0)
                Lambda = Kd_pp + np.dot(Kd_xp.T, Us_xx.solve(Kd_xp))
                ## z = utils.chol_solve(Us_xx, f0)
                ## Lambda = Kd_pp + np.dot(Kd_xp.T,
                ##                         utils.chol_solve(Us_xx, Kd_xp))
                try:
                    U_Lambda = utils.cholesky(Lambda)
                    #U_Lambda = utils.chol(Lambda)
                except linalg.LinAlgError:
                    print('Lambda not positive definite')
                    return -np.inf

                nu = U_Lambda.solve(np.dot(Kd_xp.T, z))
                #nu = utils.chol_solve(U_Lambda, np.dot(Kd_xp.T, z))
                rho = np.dot(Kd_xp, nu)

                y2 = np.dot(f0, z) - np.dot(z, rho)

                # Use matrix determinant lemma
                #
                # det(Kd_xp*inv(Kd_pp)*Kd_xp' + Ks_xx)
                # = det(Kd_pp + Kd_xp'*inv(Ks_xx)*Kd_xp)
                #   * det(inv(Kd_pp)) * det(Ks_xx)
                # = det(Lambda) * det(Ks_xx) / det(Kd_pp)
                try:
                    Ud_pp = utils.cholesky(Kd_pp)
                    #Ud_pp = utils.chol(Kd_pp)
                except linalg.LinAlgError:
                    print('Covariance of pseudo inputs not positive definite')
                    return -np.inf
                logdet = (U_Lambda.logdet() + Us_xx.logdet() - Ud_pp.logdet())
                ## logdet = (utils.logdet_chol(U_Lambda)
                ##           + utils.logdet_chol(Us_xx)
                ##           - utils.logdet_chol(Ud_pp))

                # Compute the log pdf

                L = gaussian_logpdf(y2, 0, 0, logdet, np.size(self.f))

                # Add the variational cost of the pseudo-input
                # approximation

                # Compute gradients

                for (dmu, func) in Dmu:
                    # Derivative w.r.t. mean vector
                    d = np.nan
                    # Send the derivative message
                    func(d)

                for (dKs_xx, func) in DKs_xx:
                    # Compute derivative w.r.t. covariance matrix
                    d = np.nan
                    # Send the derivative message
                    func(d)

                for (dKd_xp, func) in DKd_xp:
                    # Compute derivative w.r.t. covariance matrix
                    d = np.nan
                    # Send the derivative message
                    func(d)

                V = Ud_pp.solve(Kd_xp.T)
                Z = Us_xx.solve(V.T)
                ## V = utils.chol_solve(Ud_pp, Kd_xp.T)
                ## Z = utils.chol_solve(Us_xx, V.T)
                for (dKd_pp, func) in DKd_pp:
                    # Compute derivative w.r.t. covariance matrix
                    d = (0.5 * np.trace(Ud_pp.solve(dKd_pp)) -
                         0.5 * np.trace(U_Lambda.solve(dKd_pp)) +
                         np.dot(nu, np.dot(dKd_pp, nu)) +
                         np.trace(np.dot(dKd_pp, np.dot(V, Z))))
                    ## d = (0.5 * np.trace(utils.chol_solve(Ud_pp, dKd_pp))
                    ##      - 0.5 * np.trace(utils.chol_solve(U_Lambda, dKd_pp))
                    ##      + np.dot(nu, np.dot(dKd_pp, nu))
                    ##      + np.trace(np.dot(dKd_pp,
                    ##                 np.dot(V,Z))))
                    # Send the derivative message
                    func(d)

                for (dxp, func) in Dxp:
                    # Compute derivative w.r.t. covariance matrix
                    d = np.nan
                    # Send the derivative message
                    func(d)

            else:

                ## Full exact (no pseudo approximations)

                try:
                    U = utils.cholesky(K_xx)
                    #U = utils.chol(K_xx)
                except linalg.LinAlgError:
                    print('non positive definite, return -inf')
                    return -np.inf
                z = U.solve(f0)
                #z = utils.chol_solve(U, f0)
                #print(K)
                L = utils.gaussian_logpdf(
                    np.dot(f0, z),
                    0,
                    0,
                    U.logdet(),
                    ## utils.logdet_chol(U),
                    np.size(self.f))

                for (dmu, func) in Dmu:
                    # Derivative w.r.t. mean vector
                    d = -np.sum(z)
                    # Send the derivative message
                    func(d)

                for (dK, func) in DKd_xx:
                    # Compute derivative w.r.t. covariance matrix
                    #
                    # TODO: trace+chol_solve should be handled better
                    # for sparse matrices.  Use sparse-inverse!
                    d = 0.5 * (dK.dot(z).dot(z) - U.trace_solve_gradient(dK))
                    ## - np.trace(U.solve(dK)))
                    ## d = 0.5 * (dK.dot(z).dot(z)
                    ##            - np.trace(utils.chol_solve(U, dK)))
                    #print('derivate', d, dK)
                    ## d = 0.5 * (np.dot(z, np.dot(dK, z))
                    ##            - np.trace(utils.chol_solve(U, dK)))
                    #
                    # Send the derivative message
                    func(d)

                for (dK, func) in DKs_xx:
                    # Compute derivative w.r.t. covariance matrix
                    d = 0.5 * (dK.dot(z).dot(z) - U.trace_solve_gradient(dK))
                    ## - np.trace(U.solve(dK)))
                    ## d = 0.5 * (dK.dot(z).dot(z)
                    ##            - np.trace(utils.chol_solve(U, dK)))
                    ## d = 0.5 * (np.dot(z, np.dot(dK, z))
                    ##            - np.trace(utils.chol_solve(U, dK)))
                    # Send the derivative message
                    func(d)

        else:
            ## Log pdf for latent GP
            raise Exception('Not implemented yet')

        return L
    def lower_bound_contribution(self, gradient=False):

        # Get moment functions from parents
        m = self.parents[0].message_to_child(gradient=gradient)
        k = self.parents[1].message_to_child(gradient=gradient)
        if self.parents[2]:
            k_sparse = self.parents[2].message_to_child(gradient=gradient)
        else:
            k_sparse = None
        if self.parents[3]:
            pseudoinputs = self.parents[3].message_to_child(gradient=gradient)
            #pseudoinputs = self.parents[3].message_to_child(gradient=gradient)[0]
        else:
            pseudoinputs = None
        ## m = self.parents[0].message_to_child(gradient=gradient)[0]
        ## k = self.parents[1].message_to_child(gradient=gradient)[0]

        # Compute the parameters (covariance matrices etc) using
        # parents' moment functions
        DKs_xx = []
        DKd_xx = []
        DKd_xp = []
        DKd_pp = []
        Dxp = []
        Dmu = []
        if gradient:
            # FIXME: We are ignoring the covariance of mu now..
            ((mu, _), Dmu) = m(self.x, gradient=True)
            ## if k_sparse:
            ##     ((Ks_xx,), DKs_xx) = k_sparse(self.x, self.x, gradient=True)
            if pseudoinputs:
                ((Ks_xx,), DKs_xx) = k_sparse(self.x, self.x, gradient=True)
                ((xp,), Dxp) = pseudoinputs
                ((Kd_pp,), DKd_pp) = k(xp,xp, gradient=True)
                ((Kd_xp,), DKd_xp) = k(self.x, xp, gradient=True)
            else:
                ((K_xx,), DKd_xx) = k(self.x, self.x, gradient=True)
                if k_sparse:
                    ((Ks_xx,), DKs_xx) = k_sparse(self.x, self.x, gradient=True)
                    try:
                        K_xx += Ks_xx
                    except:
                        K_xx = K_xx + Ks_xx
                
        else:
            # FIXME: We are ignoring the covariance of mu now..
            (mu, _) = m(self.x)
            ## if k_sparse:
            ##     (Ks_xx,) = k_sparse(self.x, self.x)
            if pseudoinputs:
                (Ks_xx,) = k_sparse(self.x, self.x)
                (xp,) = pseudoinputs
                (Kd_pp,) = k(xp, xp)
                (Kd_xp,) = k(self.x, xp)
            else:
                (K_xx,) = k(self.x, self.x)
                if k_sparse:
                    (Ks_xx,) = k_sparse(self.x, self.x)
                    try:
                        K_xx += Ks_xx
                    except:
                        K_xx = K_xx + Ks_xx


        mu = mu[0]
        #K = K[0]

        # Log pdf
        if self.observed:
            ## Log pdf for directly observed GP
            
            f0 = self.f - mu
            
            #print('hereiam')
            #print(K)
            if pseudoinputs:

                ## Pseudo-input approximation

                # Decompose the full-rank sparse/noise covariance matrix
                try:
                    Us_xx = utils.cholesky(Ks_xx)
                except linalg.LinAlgError:
                    print('Noise/sparse covariance not positive definite')
                    return -np.inf

                # Use Woodbury-Sherman-Morrison formula with the
                # following notation:
                #
                # y2 = f0' * inv(Kd_xp*inv(Kd_pp)*Kd_xp' + Ks_xx) * f0
                #
                # z = Ks_xx \ f0
                # Lambda = Kd_pp + Kd_xp'*inv(Ks_xx)*Kd_xp
                # nu = inv(Lambda) * (Kd_xp' * (Ks_xx \ f0))
                # rho = Kd_xp * inv(Lambda) * (Kd_xp' * (Ks_xx \ f0))
                #
                # y2 = f0' * z - z' * rho
                
                z = Us_xx.solve(f0)
                Lambda = Kd_pp + np.dot(Kd_xp.T,
                                        Us_xx.solve(Kd_xp))
                ## z = utils.chol_solve(Us_xx, f0)
                ## Lambda = Kd_pp + np.dot(Kd_xp.T,
                ##                         utils.chol_solve(Us_xx, Kd_xp))
                try:
                    U_Lambda = utils.cholesky(Lambda)
                    #U_Lambda = utils.chol(Lambda)
                except linalg.LinAlgError:
                    print('Lambda not positive definite')
                    return -np.inf

                nu = U_Lambda.solve(np.dot(Kd_xp.T, z))
                #nu = utils.chol_solve(U_Lambda, np.dot(Kd_xp.T, z))
                rho = np.dot(Kd_xp, nu)

                y2 = np.dot(f0, z) - np.dot(z, rho)

                # Use matrix determinant lemma
                #
                # det(Kd_xp*inv(Kd_pp)*Kd_xp' + Ks_xx)
                # = det(Kd_pp + Kd_xp'*inv(Ks_xx)*Kd_xp)
                #   * det(inv(Kd_pp)) * det(Ks_xx)
                # = det(Lambda) * det(Ks_xx) / det(Kd_pp)
                try:
                    Ud_pp = utils.cholesky(Kd_pp)
                    #Ud_pp = utils.chol(Kd_pp)
                except linalg.LinAlgError:
                    print('Covariance of pseudo inputs not positive definite')
                    return -np.inf
                logdet = (U_Lambda.logdet()
                          + Us_xx.logdet()
                          - Ud_pp.logdet())
                ## logdet = (utils.logdet_chol(U_Lambda)
                ##           + utils.logdet_chol(Us_xx)
                ##           - utils.logdet_chol(Ud_pp))

                # Compute the log pdf
                
                L = gaussian_logpdf(y2,
                                    0,
                                    0,
                                    logdet,
                                    np.size(self.f))

                # Add the variational cost of the pseudo-input
                # approximation

                # Compute gradients

                
                
                for (dmu, func) in Dmu:
                    # Derivative w.r.t. mean vector
                    d = np.nan
                    # Send the derivative message
                    func(d)
                    
                for (dKs_xx, func) in DKs_xx:
                    # Compute derivative w.r.t. covariance matrix
                    d = np.nan
                    # Send the derivative message
                    func(d)
                    
                for (dKd_xp, func) in DKd_xp:
                    # Compute derivative w.r.t. covariance matrix
                    d = np.nan
                    # Send the derivative message
                    func(d)

                V = Ud_pp.solve(Kd_xp.T)
                Z = Us_xx.solve(V.T)
                ## V = utils.chol_solve(Ud_pp, Kd_xp.T)
                ## Z = utils.chol_solve(Us_xx, V.T)
                for (dKd_pp, func) in DKd_pp:
                    # Compute derivative w.r.t. covariance matrix
                    d = (0.5 * np.trace(Ud_pp.solve(dKd_pp))
                         - 0.5 * np.trace(U_Lambda.solve(dKd_pp))
                         + np.dot(nu, np.dot(dKd_pp, nu))
                         + np.trace(np.dot(dKd_pp,
                                    np.dot(V,Z))))
                    ## d = (0.5 * np.trace(utils.chol_solve(Ud_pp, dKd_pp))
                    ##      - 0.5 * np.trace(utils.chol_solve(U_Lambda, dKd_pp))
                    ##      + np.dot(nu, np.dot(dKd_pp, nu))
                    ##      + np.trace(np.dot(dKd_pp,
                    ##                 np.dot(V,Z))))
                    # Send the derivative message
                    func(d)
                    
                for (dxp, func) in Dxp:
                    # Compute derivative w.r.t. covariance matrix
                    d = np.nan
                    # Send the derivative message
                    func(d)
                
                

            else:
                
                ## Full exact (no pseudo approximations)
                
                try:
                    U = utils.cholesky(K_xx)
                    #U = utils.chol(K_xx)
                except linalg.LinAlgError:
                    print('non positive definite, return -inf')
                    return -np.inf
                z = U.solve(f0)
                #z = utils.chol_solve(U, f0)
                #print(K)
                L = utils.gaussian_logpdf(np.dot(f0, z),
                                          0,
                                          0,
                                          U.logdet(),
                                          ## utils.logdet_chol(U),
                                          np.size(self.f))

                for (dmu, func) in Dmu:
                    # Derivative w.r.t. mean vector
                    d = -np.sum(z)
                    # Send the derivative message
                    func(d)

                for (dK, func) in DKd_xx:
                    # Compute derivative w.r.t. covariance matrix
                    #
                    # TODO: trace+chol_solve should be handled better
                    # for sparse matrices.  Use sparse-inverse!
                    d = 0.5 * (dK.dot(z).dot(z)
                               - U.trace_solve_gradient(dK))
                               ## - np.trace(U.solve(dK)))
                    ## d = 0.5 * (dK.dot(z).dot(z)
                    ##            - np.trace(utils.chol_solve(U, dK)))
                    #print('derivate', d, dK)
                    ## d = 0.5 * (np.dot(z, np.dot(dK, z))
                    ##            - np.trace(utils.chol_solve(U, dK)))
                    #
                    # Send the derivative message
                    func(d)
                    
                for (dK, func) in DKs_xx:
                    # Compute derivative w.r.t. covariance matrix
                    d = 0.5 * (dK.dot(z).dot(z)
                               - U.trace_solve_gradient(dK))
                               ## - np.trace(U.solve(dK)))
                    ## d = 0.5 * (dK.dot(z).dot(z)
                    ##            - np.trace(utils.chol_solve(U, dK)))
                    ## d = 0.5 * (np.dot(z, np.dot(dK, z))
                    ##            - np.trace(utils.chol_solve(U, dK)))
                    # Send the derivative message
                    func(d)

        else:
            ## Log pdf for latent GP
            raise Exception('Not implemented yet')

        return L