return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x,    lambda g: -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g:  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov,  lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x,    lambda g: -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g:  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov,  lambda g: -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defgrad_is_zero(argnums=(0,))
entropy.defgrad(lambda ans, mean, cov: unbroadcast(ans, cov, lambda g:  0.5 * g * np.linalg.inv(cov).T), argnum=1)
Example #2
0
"""Gradients of the normal distribution."""

from __future__ import absolute_import
import scipy.stats
import autogradwithbay.numpy as anp

from autogradwithbay.core import primitive
from autogradwithbay.numpy.numpy_grads import unbroadcast

pdf = primitive(scipy.stats.norm.pdf)
cdf = primitive(scipy.stats.norm.cdf)
logpdf = primitive(scipy.stats.norm.logpdf)
logcdf = primitive(scipy.stats.norm.logcdf)

pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: -g * ans * (x - loc) / scale**2))
pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g:  g * ans * (x - loc) / scale**2), argnum=1)
pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g:  g * ans * (((x - loc)/scale)**2 - 1.0)/scale), argnum=2)

cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * pdf(x, loc, scale)))
cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * pdf(x, loc, scale)), argnum=1)
cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * pdf(x, loc, scale)*(x-loc)/scale), argnum=2)

logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: -g * (x - loc) / scale**2))
logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g:  g * (x - loc) / scale**2), argnum=1)
logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g:  g * (-1.0/scale + (x - loc)**2/scale**3)), argnum=2)

logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))))
logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))), argnum=1)
logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))*(x-loc)/scale), argnum=2)