コード例 #1
0
ファイル: dp.py プロジェクト: yaminibansal/svae
def var_expectedstats(natparam):
    # Returns E_{q(v)}[\eta_z(V)] where \eta_z(V)_i = ln(V_i) + \sum_{j < i} ln(1-V_j)
    # q is truncated to level T with q(v_T) = 1 but p is not truncated, tho terms > T don't really feature in the calculations
    # natparam shape is (T-1)x2, refers to \gamma_{t,0/1}
    # Returned function shape will be T

    return np.append(np.array([ digamma(natparam[i, 0]) - digamma(natparam[i,0]+natparam[i,1])\
                                + np.sum(np.array([digamma(natparam[j, 1]) - digamma(natparam[j,0]+natparam[j,1]) for j in range(i)])) for i in range(natparam.shape[0])]), (np.sum(digamma(natparam[:,1])-digamma(natparam[:,0]+natparam[:,1]))))
コード例 #2
0
ファイル: proposals.py プロジェクト: JoeriHermans/ml-scripts
def beta_entropy(params):
    alpha = np.exp(params["log_alpha"])
    beta = np.exp(params["log_beta"])

    return np.sum(
        betaln(alpha, beta) - (alpha - 1.0) *
        (digamma(alpha) - digamma(alpha + beta)) - (beta - 1.0) *
        (digamma(beta) - digamma(alpha + beta)))
def log_marginal_likelihood(paramslin, x, z, pij, pij_flatten, pij0sum, run_time,taus, gamma, alpha, precomp):
    params = util.unlinearise_params(paramslin, verbose=0)
    d, nz = z.shape
    nx = x.shape[1]
    s = params.L @ params.L.T+__nugget(params.L.shape[1])
    eqn15sum = (params.m.T @ precomp.Kzzinv_psi_sum_Kzzinv @params.m)[0,0]

    eqn16a = np.trace(precomp.Kzzinv_psi_sum)
    eqn16b = np.trace(precomp.Kzzinv_psi_sum_Kzzinv @ s)
    eqn16sum = gamma*np.sum((run_time-x[0])**d)-eqn16a + eqn16b

    mutilde = (precomp.Kzzinv_kzx.T @ params.m).flatten()
    sigmaa = precomp.sigmas
    sigmab = np.sum(precomp.Kxz * precomp.Kzzinv_kzx.T, axis=1)
    sigmac = np.sum((params.L.T @ precomp.Kzzinv_kzx) ** 2, axis=0)
    sigmatilde = sigmaa - sigmab + sigmac
    eqn19a, eqn19b, eqn19c = expected_log_f2(mutilde, np.sqrt(sigmatilde))
    eqn19sum = -(eqn19c + eqn19a + eqn19b)@pij_flatten

    ppij = pij[pij > 0]

    total = eqn15sum + eqn16sum + eqn19sum + run_time * params.shape * params.scale - \
            pij0sum*(special.digamma(params.shape) + np.log(params.scale)) + ppij @ np.log(ppij)

    return -total
コード例 #4
0
ファイル: observations.py プロジェクト: eackermann/ssm
    def _m_step_nu(self, expectations, datas, inputs, masks, tags):
        """
        The shape parameter nu determines a gamma prior.  We have
        
            tau_n ~ Gamma(nu/2, nu/2)
            y_n ~ N(mu, sigma^2 / tau_n)

        To update nu, we do EM and optimize the expected log likelihood using
        a generalized Newton's method.  See the notebook in doc/students_t for
        complete details.
        """
        K, D = self.K, self.D

        # Compute the precisions w for each data point
        E_taus = np.zeros(K)
        E_logtaus = np.zeros(K)
        weights = np.zeros(K)
        for y, (Ez, _, _) in zip(datas, expectations):
            # nu: (K,)  mus: (K, D)  sigmas: (K, D)  y: (T, D)  -> alpha/beta: (T, K, D)
            nus = np.exp(self.inv_nus[:, None])
            alpha = nus/2 + 1/2
            beta = nus/2 + 1/2 * (y[:, None, :] - self.mus)**2 / np.exp(self.inv_sigmas)
            
            E_taus += np.sum(Ez[:, :, None] * alpha / beta, axis=(0, 2))
            E_logtaus += np.sum(Ez[:, :, None] * (digamma(alpha) - np.log(beta)), axis=(0, 2))
            weights += np.sum(Ez, axis=0) * D

        E_taus /= weights
        E_logtaus /= weights

        for k in range(K):
            self.inv_nus[k] = np.log(generalized_newton_studentst_dof(E_taus[k], E_logtaus[k]))
def objective(paramslin, x, z, pij_flatten, pij0sum, run_time, taus, gamma, alpha, g0_params,precomp):
    params = util.unlinearise_params(paramslin, verbose=0)
    d, nz = z.shape
    nx = x.shape[1]
    kzzinv_m = precomp.Kzzinv @ params.m
    s = params.L @ params.L.T+__nugget(params.L.shape[1])
    eqn15sum = (params.m.T @ precomp.Kzzinv_psi_sum_Kzzinv @params.m)[0,0]

    eqn16a = np.trace(precomp.Kzzinv_psi_sum)
    eqn16b = np.trace(precomp.Kzzinv_psi_sum_Kzzinv @ s)
    eqn16sum = gamma*np.sum((run_time-x[0])**d)-eqn16a + eqn16b

    mutilde = (precomp.Kzzinv_kzx.T @ params.m).flatten()
    sigmaa = precomp.sigmas
    sigmab = np.sum(precomp.Kxz * precomp.Kzzinv_kzx.T, axis=1)
    sigmac = np.sum((params.L.T @ precomp.Kzzinv_kzx) ** 2, axis=0)
    sigmatilde = sigmaa - sigmab + sigmac
    eqn19a, eqn19b, eqn19c = expected_log_f2(mutilde, np.sqrt(sigmatilde))
    eqn19sum = -(eqn19c + eqn19a + eqn19b)@pij_flatten

    kl_normal = kl_tril(params.L, params.m, precomp.Lzz, 0)
    kl_g = kl_gamma(params.scale,params.shape, g0_params['scale'],g0_params['shape'])

    total = kl_normal+kl_g+eqn15sum + eqn16sum + eqn19sum +run_time*params.shape*params.scale-\
            pij0sum*(special.digamma(params.shape)+np.log(params.scale))
    return total
コード例 #6
0
    def _m_step_nu(self, expectations, datas, inputs, masks, tags, optimizer,
                   num_iters, **kwargs):
        K, D = self.K, self.D
        E_taus = np.zeros(K)
        E_logtaus = np.zeros(K)
        weights = np.zeros(K)
        for (
                Ez,
                _,
                _,
        ), data, input, mask, tag in zip(expectations, datas, inputs, masks,
                                         tags):
            # nu: (K,)  mus: (K, D)  sigmas: (K, D)  y: (T, D)  -> w: (T, K, D)
            mus = self._compute_mus(data, input, mask, tag)
            sigmas = self._compute_sigmas(data, input, mask, tag)
            nus = np.exp(self.inv_nus[:, None])

            alpha = nus / 2 + 1 / 2
            beta = nus / 2 + 1 / 2 * (data[:, None, :] - mus)**2 / sigmas

            E_taus += np.sum(Ez[:, :, None] * alpha / beta, axis=(0, 2))
            E_logtaus += np.sum(Ez[:, :, None] *
                                (digamma(alpha) - np.log(beta)),
                                axis=(0, 2))
            weights += np.sum(Ez, axis=0) * D

        E_taus /= weights
        E_logtaus /= weights

        for k in range(K):
            self.inv_nus[k] = np.log(
                generalized_newton_studentst_dof(E_taus[k], E_logtaus[k]))
コード例 #7
0
def gamma_grad_logq(epsilon, alpha):
    """
    Gradient of log-Gamma at proposed value.
    """
    h_val = gamma_h(epsilon, alpha)
    h_der = gamma_grad_h(epsilon, alpha)
    
    return np.log(h_val) + (alpha-1.)*h_der/h_val - h_der - sp.digamma(alpha)
コード例 #8
0
ファイル: niw.py プロジェクト: mattjj/svae
def expectedstats(natparam, fudge=1e-8):
    S, m, kappa, nu = natural_to_standard(natparam)
    d = m.shape[-1]

    E_J = nu[...,None,None] * symmetrize(np.linalg.inv(S)) + fudge * np.eye(d)
    E_h = np.matmul(E_J, m[...,None])[...,0]
    E_hTJinvh = d/kappa + np.matmul(m[...,None,:], E_h[...,None])[...,0,0]
    E_logdetJ = (np.sum(digamma((nu[...,None] - np.arange(d)[None,...])/2.), -1) \
                 + d*np.log(2.)) - np.linalg.slogdet(S)[1]

    return pack_dense(-1./2 * E_J, E_h, -1./2 * E_hTJinvh, 1./2 * E_logdetJ)
コード例 #9
0
def grad_logQ(sample, alpha, m):
    """
    Evaluates the gradient of the log of variational approximation, vectorized.
    """
    gradient = np.zeros((alpha.shape[0], 2))

    gradient[:,
             0] = np.log(alpha) - np.log(m) + 1. + np.log(sample) - sample / m
    gradient[:, 0] -= sp.digamma(alpha)
    gradient[:, 1] = -alpha / m + alpha * sample / m**2

    return gradient
コード例 #10
0
def expectedstats(natparam, fudge=1e-8):
    S, m, kappa, nu = natural_to_standard(natparam)
    d = m.shape[-1]

    E_J = nu[..., None, None] * symmetrize(
        np.linalg.inv(S)) + fudge * np.eye(d)
    E_h = np.matmul(E_J, m[..., None])[..., 0]
    E_hTJinvh = d / kappa + np.matmul(m[..., None, :], E_h[..., None])[..., 0,
                                                                       0]
    E_logdetJ = (np.sum(digamma((nu[...,None] - np.arange(d)[None,...])/2.), -1) \
                 + d*np.log(2.)) - np.linalg.slogdet(S)[1]

    return pack_dense(-1. / 2 * E_J, E_h, -1. / 2 * E_hTJinvh,
                      1. / 2 * E_logdetJ)
コード例 #11
0
ファイル: mniw.py プロジェクト: WuCPMark/svae
def expectedstats_standard(nu, S, M, K, fudge=1e-8):
    m = M.shape[0]
    E_Sigmainv = nu*symmetrize(np.linalg.inv(S)) + fudge*np.eye(S.shape[0])
    E_Sigmainv_A = nu*np.linalg.solve(S, M)
    E_AT_Sigmainv_A = m*K + nu*symmetrize(np.dot(M.T, np.linalg.solve(S, M))) \
        + fudge*np.eye(K.shape[0])
    E_logdetSigmainv = digamma((nu-np.arange(m))/2.).sum() \
        + m*np.log(2) - np.linalg.slogdet(S)[1]

    assert is_posdef(E_Sigmainv)
    assert is_posdef(E_AT_Sigmainv_A)

    return make_tuple(
        -1./2*E_AT_Sigmainv_A, E_Sigmainv_A.T, -1./2*E_Sigmainv, 1./2*E_logdetSigmainv)
コード例 #12
0
ファイル: mniw.py プロジェクト: lfywork/svae
def expectedstats_standard(nu, S, M, K, fudge=1e-8):
    m = M.shape[0]
    E_Sigmainv = nu * symmetrize(np.linalg.inv(S)) + fudge * np.eye(S.shape[0])
    E_Sigmainv_A = nu * np.linalg.solve(S, M)
    E_AT_Sigmainv_A = m*K + nu*symmetrize(np.dot(M.T, np.linalg.solve(S, M))) \
        + fudge*np.eye(K.shape[0])
    E_logdetSigmainv = digamma((nu-np.arange(m))/2.).sum() \
        + m*np.log(2) - np.linalg.slogdet(S)[1]

    assert is_posdef(E_Sigmainv)
    assert is_posdef(E_AT_Sigmainv_A)

    return tuple_((-1. / 2 * E_AT_Sigmainv_A, E_Sigmainv_A.T,
                   -1. / 2 * E_Sigmainv, 1. / 2 * E_logdetSigmainv))
コード例 #13
0
def grep_gradient(alpha, m, x, K, alphaz):
    gradient = np.zeros((alpha.shape[0], 2))
    lmbda = npr.gamma(alpha, 1.)
    lmbda[lmbda < 1e-5] = 1e-5
    Tinv_val = fun_Tinv(lmbda, alpha)
    h_val = fun_H(Tinv_val, alpha)
    u_val = fun_U(Tinv_val, alpha)

    zw = m * lmbda / alpha
    zw[zw < 1e-5] = 1e-5
    logp_der = grad_logp(zw, K, x, alphaz)
    logp_val = logp(zw, K, x, alphaz)
    logq_der = grad_logQ_Z(zw, alpha)

    gradient[:, 0] = logp_der * (h_val - lmbda / alpha) * m / alpha
    gradient[:, 1] = logp_der * lmbda / alpha
    gradient[:, 0] += logp_val * (
        np.log(lmbda) + (alpha / lmbda - 1.) * h_val - sp.digamma(alpha) +
        sp.polygamma(2, alpha) / 2. / sp.polygamma(1, alpha))
    gradient += grad_entropy(alpha, m)

    return gradient
def update_pij(paramslin, taus, z, gamma, alpha, pij, kzzinv):
    nx = len(taus)+1
    params = util.unlinearise_params(paramslin, verbose=0)
    kzzinv_m = kzzinv @ params.m
    expEmu = params.scale*np.exp(special.digamma(params.shape))

    for i in range(nx-1):
        tau = taus[i]
        Kxz = k(tau, z, gamma, alpha)
        mutilde = (Kxz @ kzzinv_m).flatten()
        sigmaa = kdiag(tau, gamma)
        kzzinv_kzx = kzzinv @ Kxz.T
        sigmab = np.sum(Kxz * kzzinv_kzx.T, axis=1)
        sigmac = np.sum((params.L.T @ kzzinv_kzx) ** 2, axis=0)
        sigmatilde = sigmaa - sigmab + sigmac

        eqn19a, eqn19b, eqn19c = expected_log_f2(mutilde, np.sqrt(sigmatilde))
        eqn19 = eqn19a + eqn19b + eqn19c
        expeqn19 = np.exp(eqn19)
        denom = expEmu + np.sum(expeqn19)
        pij[i+1][0] = expEmu / denom
        pij[i+1][1:tau.shape[1]+1] = expeqn19 / denom

    return pij
 def I(a,b,c,d):
     return -c*d/a -b*np.log(a) - special.gammaln(b) + (b-1)*(special.digamma(d) + np.log(c))
コード例 #16
0
def E_ln_pi_k(k, alpha):
    return digamma(alpha[k]) - digamma(np.sum(alpha))
コード例 #17
0
from __future__ import absolute_import
import scipy.stats

import autograd.numpy as np
from autograd.scipy.special import digamma
from autograd.core import primitive

rvs    = primitive(scipy.stats.dirichlet.rvs)
pdf    = primitive(scipy.stats.dirichlet.pdf)
logpdf = primitive(scipy.stats.dirichlet.logpdf)

logpdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * (alpha - 1) / x, argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * ans * (alpha - 1) / x, argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)
コード例 #18
0
ファイル: objective.py プロジェクト: yalechang/MCVC
def ELBO_terms(param, prior, X, S, Ncon, G, M, K):
    eps = 1e-12

    # get sample size and feature size
    [N, D] = np.shape(X)

    # unpack the input parameter vector
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\
            mu_b, sigma_b] = unpackParam(param, N, D, G, M, K)

    # compute eta given mu_w and mu_b
    eta = np.zeros((0, K))
    for g in np.arange(G):
        t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g])
        t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1)))
        eta = np.vstack((eta, t1 / t2))
    eta = np.reshape(eta, (G, N, K))

    # compute the expectation terms to be used later
    E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2)  # len(M)
    E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2)  # len(M)

    E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2)  # len(G)
    E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2)  # len(G)
    E_C = phi  # shape(M, G)
    E_W = mu_w  # shape(G, D, K)
    E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2  # shape(G, D, K)
    E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2  # shape(G, K)
    E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2)  # shape(G, K)

    E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \
            (prior['tau_a2']-1) * E_log_OneMinusAlpha -  \
            gammaln(prior['tau_a1']+eps) - \
            gammaln(prior['tau_a2']+eps) + \
            gammaln(prior['tau_a1']+prior['tau_a2']+eps)

    E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \
            (prior['tau_b2']-1) * E_log_OneMinusBeta - \
            gammaln(prior['tau_b1']+eps) - \
            gammaln(prior['tau_b2']+eps) + \
            gammaln(prior['tau_b1']+prior['tau_b2']+eps)

    E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \
            gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \
            gammaln(tau_a1+tau_a2 + eps)

    E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \
            gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \
            gammaln(tau_b1+tau_b2 + eps)

    E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1)

    eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K))

    # compute three terms and then add them up
    L_1, L_2, L_3 = [0., 0., 0.]
    # the first term and part of the second term
    for m in np.arange(M):
        idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m])
        tp_con = S[idx_S, 3]

        phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K)
        E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep))
        E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]]
        tp_Asum = np.sum(E_A_use)
        tp_AdotS = np.sum(E_A_use * tp_con)

        L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\
                (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \
                tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \
                E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \
                tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m])

        fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1])

        L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \
                np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G)))

    # the second term
    for g in np.arange(G):
        tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \
                np.log(prior['gamma']+eps)

        t1 = np.dot(X, mu_w[g])
        t2 = 0.5 * np.dot(X**2, sigma_w[g]**2)
        t3 = np.sum(eta[g], axis=1)
        t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2),
                            axis=1)
        tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i)

        t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \
                0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \
                (mu_w[g]-prior['mu_w'])**2)
        tp_Wg = np.sum(t5)
        t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \
                0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \
                (mu_b[g]-prior['mu_b'])**2)
        tp_bg = np.sum(t6)
        L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg

    # the third term
    L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C)
    for g in np.arange(G):
        tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\
                np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \
                np.log(gamma(tau_v1[g]+tau_v2[g])+eps)
        tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps))
        tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5)
        tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5)
        L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3

    return (L_1, L_2, L_3)
コード例 #19
0
ファイル: dp.py プロジェクト: yaminibansal/svae
def expectedstats(natparam):
    #Returns E_{q(v)}[\eta_z(V)] where \eta_z(V)_i = [ln(V_i), ln(1-V_i)]
    #natparam size is (T-1)x2
    alpha_beta = natparam + 1
    return digamma(alpha_beta) - digamma(
        np.sum(alpha_beta, axis=1, keepdims=True))
コード例 #20
0
ファイル: dirichlet.py プロジェクト: AugustLONG/autograd
from __future__ import absolute_import
import scipy.stats

import autograd.numpy as np
from autograd.scipy.special import digamma
from autograd.core import primitive

rvs    = primitive(scipy.stats.dirichlet.rvs)
pdf    = primitive(scipy.stats.dirichlet.pdf)
logpdf = primitive(scipy.stats.dirichlet.logpdf)

logpdf.defgrad(lambda ans, x, alpha: lambda g: g * (alpha - 1) / x, argnum=0)
logpdf.defgrad(lambda ans, x, alpha: lambda g: g * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defgrad(lambda ans, x, alpha: lambda g: g * ans * (alpha - 1) / x, argnum=0)
pdf.defgrad(lambda ans, x, alpha: lambda g: g * ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)
コード例 #21
0
def expectedstats(natparam):
    alpha = natparam + 1
    return digamma(alpha) - digamma(alpha.sum(-1, keepdims=True))
コード例 #22
0
def E_ln_lam_k(k, nu, W):
    return np.sum(digamma(nu[k] + 1 - np.arange(D) +
                          1)) + D * np.log(2) + np.log(det(W[k]))
コード例 #23
0
ファイル: gamma_def.py プロジェクト: sabetAI/kaggle_spooky
def entropy(alpha, m):
    return alpha + np.log(m) - np.log(alpha) + sp.gammaln(
        alpha) + (1. - alpha) * sp.digamma(alpha)
コード例 #24
0
def grad_logQ_alpha(samp, alpha):
    return np.log(samp) - sp.digamma(alpha)
コード例 #25
0
from __future__ import absolute_import
import autograd.scipy.stats.dirichlet as di
import autograd.numpy as np
from autograd.scipy.special import digamma

di.logpdf.defjvp(lambda g, ans, gvs, vs, x, alpha: np.inner(g, (alpha - 1) / x), argnum=0)
di.logpdf.defjvp(lambda g, ans, gvs, vs, x, alpha: np.inner(g, (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x))), argnum=1)

di.pdf.defjvp(lambda g, ans, gvs, vs, x, alpha: np.inner(g, ans * (alpha - 1) / x), argnum=0)
di.pdf.defjvp(lambda g, ans, gvs, vs, x, alpha: np.inner(g, ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x))), argnum=1)
コード例 #26
0
ファイル: dirichlet.py プロジェクト: WuCPMark/svae
def expectedstats(natparam):
    alpha = natparam + 1
    return digamma(alpha) - digamma(alpha.sum(-1, keepdims=True))
コード例 #27
0
ファイル: objective.py プロジェクト: yalechang/MCVC
def NegELBO(param, prior, X, S, Ncon, G, M, K):
    """
    Parameters
    ----------
    param: length (2M + 2M + MG + 2G + GNK + GDK + GDK + GK + GK) 
        variational parameters, including:
        1) tau_a1: len(M), first parameter of q(alpha_m)
        2) tau_a2: len(M), second parameter of q(alpha_m)
        3) tau_b1: len(M), first parameter of q(beta_m)
        4) tau_b2: len(M), second parameter of q(beta_m)
        5) phi: shape(M, G), phi[m,:] is the paramter vector of q(c_m)
        6) tau_v1: len(G), first parameter of q(nu_g)
        7) tau_v2: len(G), second parameter of q(nu_g)
        8) mu_w: shape(G, D, K), mu_w[g,d,k] is the mean parameter of 
            q(W^g_{dk})
        9) sigma_w: shape(G, D, K), sigma_w[g,d,k] is the std parameter of 
            q(W^g_{dk})
        10) mu_b: shape(G, K), mu_b[g,k] is the mean parameter of q(b^g_k)
        11) sigma_b: shape(G, K), sigma_b[g,k] is the std parameter of q(b^g_k)

    prior: dictionary
        the naming of keys follow those in param
        {'tau_a1':val1, ...}

    X: shape(N, D)
        each row represents a sample and each column represents a feature

    S: shape(n_con, 4)
        each row represents a observed constrain (expert_id, sample1_id,
        sample2_id, constraint_type), where
        1) expert_id: varies between [0, M-1]
        2) sample1 id: varies between [0, N-1]
        3) sample2 id: varies between [0, N-1]
        4) constraint_type: 1 means must-link and 0 means cannot-link

    Ncon: shape(M, 1)
        number of constraints provided by each expert

    G: int
        number of local consensus in the posterior truncated Dirichlet Process

    M: int
        number of experts

    K: int
        maximal number of clusters among different solutions, due to the use of
        discriminative clustering, some local solution might have empty
        clusters

    Returns
    -------
    """

    eps = 1e-12

    # get sample size and feature size
    [N, D] = np.shape(X)

    # unpack the input parameter vector
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\
            mu_b, sigma_b] = unpackParam(param, N, D, G, M, K)

    # compute eta given mu_w and mu_b
    eta = np.zeros((0, K))
    for g in np.arange(G):
        t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g])
        t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1)))
        eta = np.vstack((eta, t1 / t2))
    eta = np.reshape(eta, (G, N, K))

    # compute the expectation terms to be used later
    E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2)  # len(M)
    E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2)  # len(M)

    E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2)  # len(G)
    E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2)  # len(G)
    E_C = phi  # shape(M, G)
    E_W = mu_w  # shape(G, D, K)
    E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2  # shape(G, D, K)
    E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2  # shape(G, K)
    E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2)  # shape(G, K)

    E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \
            (prior['tau_a2']-1) * E_log_OneMinusAlpha -  \
            gammaln(prior['tau_a1']+eps) - \
            gammaln(prior['tau_a2']+eps) + \
            gammaln(prior['tau_a1']+prior['tau_a2']+eps)

    E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \
            (prior['tau_b2']-1) * E_log_OneMinusBeta - \
            gammaln(prior['tau_b1']+eps) - \
            gammaln(prior['tau_b2']+eps) + \
            gammaln(prior['tau_b1']+prior['tau_b2']+eps)

    E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \
            gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \
            gammaln(tau_a1+tau_a2 + eps)

    E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \
            gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \
            gammaln(tau_b1+tau_b2 + eps)

    E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1)

    eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K))

    # compute three terms and then add them up
    L_1, L_2, L_3 = [0., 0., 0.]
    # the first term and part of the second term
    for m in np.arange(M):
        idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m])
        tp_con = S[idx_S, 3]

        phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K)
        E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep))
        E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]]
        tp_Asum = np.sum(E_A_use)
        tp_AdotS = np.sum(E_A_use * tp_con)

        L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\
                (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \
                tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \
                E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \
                tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m])

        fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1])

        L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \
                np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G)))

    # the second term
    for g in np.arange(G):
        tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \
                np.log(prior['gamma']+eps)

        t1 = np.dot(X, mu_w[g])
        t2 = 0.5 * np.dot(X**2, sigma_w[g]**2)
        t3 = np.sum(eta[g], axis=1)
        t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2),
                            axis=1)
        tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i)

        t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \
                0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \
                (mu_w[g]-prior['mu_w'])**2)
        tp_Wg = np.sum(t5)
        t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \
                0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \
                (mu_b[g]-prior['mu_b'])**2)
        tp_bg = np.sum(t6)
        L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg

    # the third term
    L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C)
    for g in np.arange(G):
        tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\
                np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \
                np.log(gamma(tau_v1[g]+tau_v2[g])+eps)
        tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps))
        tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5)
        tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5)
        L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3

    # Note the third term should have a minus sign before it
    ELBO = L_1 + L_2 - L_3
    #ELBO = L_1 + L_2

    return -ELBO
コード例 #28
0
from __future__ import absolute_import
import scipy.stats

import autograd.numpy as np
from autograd.scipy.special import digamma
from autograd.extend import primitive, defvjp

rvs = primitive(scipy.stats.dirichlet.rvs)
pdf = primitive(scipy.stats.dirichlet.pdf)
logpdf = primitive(scipy.stats.dirichlet.logpdf)

defvjp(
    logpdf, lambda ans, x, alpha: lambda g: g * (alpha - 1) / x,
    lambda ans, x, alpha: lambda g: g *
    (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)))

# Same as log pdf, but multiplied by the pdf (ans).
defvjp(
    pdf, lambda ans, x, alpha: lambda g: g * ans * (alpha - 1) / x,
    lambda ans, x, alpha: lambda g: g * ans *
    (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)))
コード例 #29
0
def fun_Tinv(z, alpha):
    return (np.log(z) - sp.digamma(alpha)) / np.sqrt(sp.polygamma(1, alpha))
コード例 #30
0
def ln_lam_tilde_k(k, nu, W, D):
    return anp.sum(digamma(nu[k] + 1 - anp.arange(D) +
                           1)) + D * anp.log(2) + anp.log(anp.det(W[k]))
コード例 #31
0
from __future__ import absolute_import

import autograd.numpy as np
import autograd.scipy.special as sp

### Gamma functions ###
sp.polygamma.defjvp(lambda g, ans, gvs, vs, n, x: g * sp.polygamma(n + 1, x),
                    argnum=1)
sp.psi.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x))
sp.digamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x))
sp.gamma.defjvp(lambda g, ans, gvs, vs, x: g * ans * sp.psi(x))
sp.gammaln.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x))
sp.rgamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x) / -sp.gamma(x))
sp.multigammaln.defjvp(lambda g, ans, gvs, vs, a, d: g * np.sum(
    sp.digamma(np.expand_dims(a, -1) - np.arange(d) / 2.), -1))

### Bessel functions ###
sp.j0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.j1(x))
sp.y0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.y1(x))
sp.j1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.j0(x) - sp.jn(2, x)) / 2.0)
sp.y1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.y0(x) - sp.yn(2, x)) / 2.0)
sp.jn.defjvp(lambda g, ans, gvs, vs, n, x: g *
             (sp.jn(n - 1, x) - sp.jn(n + 1, x)) / 2.0,
             argnum=1)
sp.yn.defjvp(lambda g, ans, gvs, vs, n, x: g *
             (sp.yn(n - 1, x) - sp.yn(n + 1, x)) / 2.0,
             argnum=1)

### Error Function ###
sp.erf.defjvp(
    lambda g, ans, gvs, vs, x: 2. * g * sp.inv_root_pi * np.exp(-x**2))
コード例 #32
0
def fun_T(eps, alpha):
    return np.exp(eps * np.sqrt(sp.polygamma(1, alpha)) + sp.digamma(alpha))
コード例 #33
0
ファイル: dirichlet.py プロジェクト: HIPS/autograd
from __future__ import absolute_import
import scipy.stats

import autograd.numpy as np
from autograd.scipy.special import digamma
from autograd.extend import primitive, defvjp

rvs    = primitive(scipy.stats.dirichlet.rvs)
pdf    = primitive(scipy.stats.dirichlet.pdf)
logpdf = primitive(scipy.stats.dirichlet.logpdf)

defvjp(logpdf,lambda ans, x, alpha: lambda g:
              g * (alpha - 1) / x,
              lambda ans, x, alpha: lambda g:
              g * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)))

# Same as log pdf, but multiplied by the pdf (ans).
defvjp(pdf,lambda ans, x, alpha: lambda g:
           g * ans * (alpha - 1) / x,
           lambda ans, x, alpha: lambda g:
           g * ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)))