def wrap_namespace(old, new): unchanged_types = {float, int, type(None), type} int_types = {np.int, np.int8, np.int16, np.int32, np.int64, np.integer} function_types = {np.ufunc, types.FunctionType, types.BuiltinFunctionType} for name, obj in iteritems(old): if type(obj) in function_types: new[name] = primitive(obj) elif type(obj) is type and obj in int_types: new[name] = wrap_intdtype(obj) elif type(obj) in unchanged_types: new[name] = obj
from __future__ import absolute_import import scipy.stats import autogradwithbay.numpy as np from autogradwithbay.core import primitive from autogradwithbay.numpy.numpy_grads import unbroadcast pdf = primitive(scipy.stats.multivariate_normal.pdf) logpdf = primitive(scipy.stats.multivariate_normal.logpdf) entropy = primitive(scipy.stats.multivariate_normal.entropy) # With thanks to Eric Bresch. # Some formulas are from # "An extended collection of matrix derivative results # for forward and reverse mode algorithmic differentiation" # by Mike Giles # https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2)) else: raise ArithmeticError def generalized_outer_product(mat): if len(mat.shape) == 1: return np.outer(mat, mat)
from __future__ import absolute_import import scipy.stats import autogradwithbay.numpy as np from autogradwithbay.scipy.special import digamma from autogradwithbay.core import primitive rvs = primitive(scipy.stats.dirichlet.rvs) pdf = primitive(scipy.stats.dirichlet.pdf) logpdf = primitive(scipy.stats.dirichlet.logpdf) logpdf.defgrad(lambda ans, x, alpha: lambda g: g * (alpha - 1) / x, argnum=0) logpdf.defgrad(lambda ans, x, alpha: lambda g: g * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1) # Same as log pdf, but multiplied by the pdf (ans). pdf.defgrad(lambda ans, x, alpha: lambda g: g * ans * (alpha - 1) / x, argnum=0) pdf.defgrad(lambda ans, x, alpha: lambda g: g * ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)
from __future__ import absolute_import import scipy.misc from autogradwithbay.core import primitive import autogradwithbay.numpy as anp from autogradwithbay.numpy.numpy_grads import repeat_to_match_shape logsumexp = primitive(scipy.misc.logsumexp) def make_grad_logsumexp(ans, x, axis=None, b=1.0, keepdims=False): repeater, _ = repeat_to_match_shape(x, axis, keepdims) return lambda g: repeater(g) * b * anp.exp(x - repeater(ans)) logsumexp.defgrad(make_grad_logsumexp)
from __future__ import absolute_import import scipy.special import autogradwithbay.numpy as np from autogradwithbay.core import primitive polygamma = primitive(scipy.special.polygamma) psi = primitive(scipy.special.psi) # psi(x) is just polygamma(0, x) digamma = primitive(scipy.special.digamma) # digamma is another name for psi. gamma = primitive(scipy.special.gamma) gammaln = primitive(scipy.special.gammaln) gammasgn = primitive(scipy.special.gammasgn) rgamma = primitive(scipy.special.rgamma) multigammaln = primitive(scipy.special.multigammaln) gammasgn.defgrad_is_zero() polygamma.defgrad_is_zero(argnums=(0,)) polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1) psi.defgrad( lambda ans, x: lambda g: g * polygamma(1, x)) digamma.defgrad( lambda ans, x: lambda g: g * polygamma(1, x)) gamma.defgrad( lambda ans, x: lambda g: g * ans * psi(x)) gammaln.defgrad( lambda ans, x: lambda g: g * psi(x)) rgamma.defgrad( lambda ans, x: lambda g: g * psi(x) / -gamma(x)) multigammaln.defgrad(lambda ans, a, d: lambda g: g * np.sum(digamma(np.expand_dims(a, -1) - np.arange(d)/2.), -1)) multigammaln.defgrad_is_zero(argnums=(1,)) ### Bessel functions ### j0 = primitive(scipy.special.j0)
"""Gradients of the normal distribution.""" from __future__ import absolute_import import scipy.stats import autogradwithbay.numpy as anp from autogradwithbay.core import primitive from autogradwithbay.numpy.numpy_grads import unbroadcast pdf = primitive(scipy.stats.norm.pdf) cdf = primitive(scipy.stats.norm.cdf) logpdf = primitive(scipy.stats.norm.logpdf) logcdf = primitive(scipy.stats.norm.logcdf) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: -g * ans * (x - loc) / scale**2)) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: g * ans * (x - loc) / scale**2), argnum=1) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * ans * (((x - loc)/scale)**2 - 1.0)/scale), argnum=2) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: g * pdf(x, loc, scale))) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: -g * pdf(x, loc, scale)), argnum=1) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * pdf(x, loc, scale)*(x-loc)/scale), argnum=2) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: -g * (x - loc) / scale**2)) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: g * (x - loc) / scale**2), argnum=1) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * (-1.0/scale + (x - loc)**2/scale**3)), argnum=2) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale)))) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))), argnum=1) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))*(x-loc)/scale), argnum=2)