Example #1
0
def grad_tlogpdf_df(x, df, loc, scale):
    y = (x - loc) / scale
    return 0.5 * (
        (y**2 * (df + 1)) /
        (df *
         (y**2 + df)) - np.log(y**2 / df + 1) - 1.0 / df - psi(df / 2.0) + psi(
             (df + 1) / 2.0))
Example #2
0
def dirichlet_expectation(alpha):
    """
    Dirichlet expectation computation
    \Psi(\alpha) - \Psi(\sum_{i=1}^{K}(\alpha_{i}))
    """
    if len(alpha.shape) == 1:
        return agscipy.psi(alpha + agnp.finfo(agnp.float32).eps) \
               - agscipy.psi(agnp.sum(alpha))
    return agscipy.psi(alpha + agnp.finfo(agnp.float32).eps)\
           - agscipy.psi(agnp.sum(alpha, 1))[:, agnp.newaxis]
Example #3
0
def elbo((lambda_pi, lambda_phi, lambda_m, lambda_beta, lambda_nu, lambda_w)):
    """
    ELBO computation
    """
    e3 = e2 = h2 = 0

    e1 = - log_beta_function(alpha_o) \
         + agnp.dot((alpha_o - agnp.ones(K)), dirichlet_expectation(lambda_pi))
    h1 = log_beta_function(lambda_pi) \
         - agnp.dot((lambda_pi - agnp.ones(K)),
                    dirichlet_expectation(lambda_pi))
    logdet = agnp.log(
        agnp.array([agnp.linalg.det(lambda_w[k, :, :]) for k in range(K)]))
    logDeltak = agscipy.psi(lambda_nu / 2.) \
                + agscipy.psi((lambda_nu - 1.) / 2.) + 2. * agnp.log(
        2.) + logdet

    for n in range(N):
        e2 += agnp.dot(lambda_phi[n, :], dirichlet_expectation(lambda_pi))
        h2 += -agnp.dot(lambda_phi[n, :], log_(lambda_phi[n, :]))
        product = agnp.array([
            agnp.dot(agnp.dot(xn[n, :] - lambda_m[k, :], lambda_w[k, :, :]),
                     (xn[n, :] - lambda_m[k, :]).T) for k in range(K)
        ])
        e3 += 1. / 2 * agnp.dot(lambda_phi[n, :],
                                (logDeltak - 2. * agnp.log(2 * agnp.pi) -
                                 lambda_nu * product - 2. / lambda_beta).T)

    product = agnp.array([
        agnp.dot(agnp.dot(lambda_m[k, :] - m_o, lambda_w[k, :, :]),
                 (lambda_m[k, :] - m_o).T) for k in range(K)
    ])
    traces = agnp.array([
        agnp.trace(agnp.dot(agnp.linalg.inv(w_o), lambda_w[k, :, :]))
        for k in range(K)
    ])
    h4 = agnp.sum((1. + agnp.log(2. * agnp.pi) - 1. / 2 *
                   (agnp.log(lambda_beta) + logdet)))
    logB = lambda_nu / 2. * logdet + lambda_nu * agnp.log(
        2.) + 1. / 2 * agnp.log(agnp.pi) \
           + agscipy.gammaln(lambda_nu / 2.) + agscipy.gammaln(
        (lambda_nu - 1) / 2.)
    h5 = agnp.sum((logB - (lambda_nu - 3.) / 2. * logDeltak + lambda_nu))
    e4 = agnp.sum(
        (1. / 2 * (agnp.log(beta_o) + logDeltak - 2 * agnp.log(2. * agnp.pi) -
                   beta_o * lambda_nu * product - 2. * beta_o / lambda_beta)))
    logB = nu_o / 2. * agnp.log(agnp.linalg.det(w_o)) + nu_o * agnp.log(2.) \
           + 1. / 2 * agnp.log(agnp.pi) + agscipy.gammaln(
        nu_o / 2.) + agscipy.gammaln((nu_o - 1) / 2.)
    e5 = agnp.sum(
        (-logB + (nu_o - 3.) / 2. * logDeltak - lambda_nu / 2. * traces))

    return e1 + e2 + e3 + e4 + e5 + h1 + h2 + h4 + h5
 def EPtaulambda(self, tau_mu, tau_sigma, tau_a_prior, lambda_a_prior,
                 lambda_b_prior, lambda_a_hat, lambda_b_hat):
     """ E[ln p(\tau | \lambda)] + E[ln p(\lambda)]"""
     etau_given_lambda = -gammaln(tau_a_prior) - tau_a_prior * (
         np.log(lambda_b_hat) -
         psi(lambda_a_hat)) + (-tau_a_prior - 1.) * tau_mu - np.exp(
             -tau_mu + 0.5 * tau_sigma**2) * (lambda_a_hat / lambda_b_hat)
     elambda = -gammaln(lambda_a_prior) - 2 * lambda_a_prior * np.log(
         lambda_b_prior) + (-lambda_a_prior - 1.) * (
             np.log(lambda_b_hat) - psi(lambda_a_hat)) - (
                 1. / lambda_b_prior**2) * (lambda_a_hat / lambda_b_hat)
     return np.sum(etau_given_lambda) + np.sum(elambda)
 def unpack_params(self, params):
     # unpack params
     w_vect = params[:self.n_weights]
     num_std = 2 * self.n_weights
     sigma = np.log(1 + np.exp(params[self.n_weights:num_std]))
     tau_mu = params[num_std:num_std + self.tot_outputs]
     tau_sigma = np.log(1 +
                        np.exp(params[num_std + self.tot_outputs:num_std +
                                      2 * self.tot_outputs]))
     tau_mu_global = params[num_std + 2 * self.tot_outputs:num_std +
                            2 * self.tot_outputs + self.num_hidden_layers]
     tau_sigma_global = np.log(
         1 +
         np.exp(params[num_std + 2 * self.tot_outputs +
                       self.num_hidden_layers:num_std +
                       2 * self.tot_outputs + 2 * self.num_hidden_layers]))
     tau_mu_oplayer = params[num_std + 2 * self.tot_outputs +
                             2 * self.num_hidden_layers:num_std +
                             2 * self.tot_outputs +
                             2 * self.num_hidden_layers + 1]
     tau_sigma_oplayer = np.log(
         1 + np.exp(params[num_std + 2 * self.tot_outputs +
                           2 * self.num_hidden_layers + 1:]))
     if not self.classification:
         a = tau_sigma_oplayer[1]
         b = tau_sigma_oplayer[2]
         tau_sigma_oplayer = tau_sigma_oplayer[0]
         egamma = a / b
         elog_gamma = psi(a) - np.log(b)
         self.noise_entropy = inv_gamma_entropy(a, b)
         #  we will just use a point estimate of noise_var b/a+1 (noise_var ~ IGamma) for computing predictive ll
         self.noisevar = (b / (a + 1)) * self.train_stats['sigma']**2
         return w_vect, sigma, tau_mu, tau_sigma, tau_mu_global, tau_sigma_global, tau_mu_oplayer, \
                tau_sigma_oplayer, elog_gamma, egamma
     else:
         return w_vect, sigma, tau_mu, tau_sigma, tau_mu_global, tau_sigma_global, tau_mu_oplayer, tau_sigma_oplayer
Example #6
0
def grad_tlogpdf_df(x, df, loc, scale):
    y = (x - loc)/scale
    return 0.5 * ((y**2 * (df+1))/(df * (y**2 + df)) - np.log(y**2 / df + 1) - 1.0/df -psi(df/2.0) + psi((df + 1)/2.0))
def inv_gamma_entropy(a, b):
    return np.sum(a + np.log(b) + gammaln(a) - (1 + a) * psi(a))
Example #8
0
def grad_beta_logpdf_arg2(x, a, b):
    return np.log1p(-x) - psi(b) + psi(a + b)
Example #9
0
def grad_beta_logpdf_arg1(x, a, b):
    return np.log(x) - psi(a) + psi(a + b)
Example #10
0
from __future__ import absolute_import

import autograd.numpy as np
import autograd.scipy.special as sp

### Gamma functions ###
sp.polygamma.defjvp(lambda g, ans, gvs, vs, n, x: g * sp.polygamma(n + 1, x),
                    argnum=1)
sp.psi.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x))
sp.digamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x))
sp.gamma.defjvp(lambda g, ans, gvs, vs, x: g * ans * sp.psi(x))
sp.gammaln.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x))
sp.rgamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x) / -sp.gamma(x))
sp.multigammaln.defjvp(lambda g, ans, gvs, vs, a, d: g * np.sum(
    sp.digamma(np.expand_dims(a, -1) - np.arange(d) / 2.), -1))

### Bessel functions ###
sp.j0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.j1(x))
sp.y0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.y1(x))
sp.j1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.j0(x) - sp.jn(2, x)) / 2.0)
sp.y1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.y0(x) - sp.yn(2, x)) / 2.0)
sp.jn.defjvp(lambda g, ans, gvs, vs, n, x: g *
             (sp.jn(n - 1, x) - sp.jn(n + 1, x)) / 2.0,
             argnum=1)
sp.yn.defjvp(lambda g, ans, gvs, vs, n, x: g *
             (sp.yn(n - 1, x) - sp.yn(n + 1, x)) / 2.0,
             argnum=1)

### Error Function ###
sp.erf.defjvp(
    lambda g, ans, gvs, vs, x: 2. * g * sp.inv_root_pi * np.exp(-x**2))
Example #11
0
def gamma_entropy(theta):
    alpha, beta = unwrap(theta)
    return alpha - np.log(beta) + gammaln(alpha) + (1 - alpha) * psi(alpha)
Example #12
0
def grad_gamma_logpdf_arg1(x, a):
    return np.log(x) - psi(a)