Ejemplo n.º 1
0
def test_multivector2vector_independent():
    def f(x, y):
        return x**2 + 2 * y**3 + 1

    def dfdx(x, y):
        return 2 * x

    def dfdy(x, y):
        return 6 * y**2

    x = np.array([2, 3, 4])
    y = np.array([1, 2, 3])
    ref_x = [4, 6, 8]
    ref_y = [6, 24, 54]
    npt.assert_array_almost_equal(fd(f, False)(x, y), ref_x, 5)
    npt.assert_array_almost_equal(fd(f, False, 1)(x, y), ref_y, 4)

    npt.assert_array_almost_equal_nulp(cs(f, False)(x, y), ref_x)
    npt.assert_array_almost_equal_nulp(cs(f, False, 1)(x, y), ref_y)

    npt.assert_array_equal(autograd(f, False)(x, y), ref_x)
    npt.assert_array_equal(autograd(f, False, 1)(x, y), ref_y)

    pf = primitive(f)
    defvjp(pf, lambda ans, x, y: lambda g: g * dfdx(x, y),
           lambda ans, x, y: lambda g: g * dfdy(x, y))
    npt.assert_array_equal(autograd(pf, False)(x, y), ref_x)
    npt.assert_array_equal(autograd(pf, False, 1)(x, y), ref_y)
Ejemplo n.º 2
0
def wrap_namespace(old, new):
    unchanged_types =  set([types.FloatType, types.IntType, types.NoneType, types.TypeType])
    function_types = set([np.ufunc, types.FunctionType, types.BuiltinFunctionType])
    for name, obj in old.iteritems():
        if type(obj) in function_types:
            new[name] = primitive(obj)
        elif type(obj) in unchanged_types:
            new[name] = obj
Ejemplo n.º 3
0
def wrap_namespace(old, new):
    unchanged_types =  set([float, int, type(None), type])
    function_types = set([np.ufunc, types.FunctionType, types.BuiltinFunctionType])
    for name, obj in six.iteritems(old):
        if type(obj) in function_types:
            new[name] = primitive(obj)
        elif type(obj) in unchanged_types:
            new[name] = obj
Ejemplo n.º 4
0
def wrap_namespace(old, new):
    unchanged_types = set([float, int, type(None), type])
    function_types = set(
        [np.ufunc, types.FunctionType, types.BuiltinFunctionType])
    for name, obj in six.iteritems(old):
        if type(obj) in function_types:
            new[name] = primitive(obj)
        elif type(obj) in unchanged_types:
            new[name] = obj
Ejemplo n.º 5
0
def wrap_namespace(old, new):
    unchanged_types = set(
        [types.FloatType, types.IntType, types.NoneType, types.TypeType])
    function_types = set(
        [np.ufunc, types.FunctionType, types.BuiltinFunctionType])
    for name, obj in old.iteritems():
        if type(obj) in function_types:
            new[name] = primitive(obj)
        elif type(obj) in unchanged_types:
            new[name] = obj
Ejemplo n.º 6
0
    def set_gradient_funcs(self, power_ct_grad_func):
        def get_grad(ans, ws, **kwargs):
            def grad(g):
                return g * power_ct_grad_func(ws, **kwargs)

            return grad

        primitive_power_ct = primitive(self.power_ct)
        defvjp(primitive_power_ct, get_grad)
        self.power_ct = primitive_power_ct
Ejemplo n.º 7
0
def wrap_namespace(old, new):
    unchanged_types = {float, int, type(None), type}
    int_types = {np.int, np.int8, np.int16, np.int32, np.int64, np.integer}
    function_types = {np.ufunc, types.FunctionType, types.BuiltinFunctionType}
    for name, obj in iteritems(old):
        if type(obj) in function_types:
            new[name] = primitive(obj)
        elif type(obj) is type and obj in int_types:
            new[name] = wrap_intdtype(obj)
        elif type(obj) in unchanged_types:
            new[name] = obj
Ejemplo n.º 8
0
def wrap_namespace(old, new):
    unchanged_types = {float, int, type(None), type}
    int_types = {np.int, np.int8, np.int16, np.int32, np.int64, np.integer}
    function_types = {np.ufunc, types.FunctionType, types.BuiltinFunctionType}
    for name, obj in iteritems(old):
        if type(obj) in function_types:
            new[name] = primitive(obj)
        elif type(obj) is type and obj in int_types:
            new[name] = wrap_intdtype(obj)
        elif type(obj) in unchanged_types:
            new[name] = obj
Ejemplo n.º 9
0
def checkpoint(fun):
    """Returns a checkpointed version of `fun`, where intermediate values
    computed during the forward pass of `fun` are discarded and then recomputed
    for the backward pass. Useful to save memory, effectively trading off time
    and memory. See e.g. arxiv.org/abs/1604.06174.
    """
    def wrapped_grad(argnum, g, ans, vs, gvs, args, kwargs):
        return make_vjp(fun, argnum)(*args, **kwargs)[0](g)
    wrapped = primitive(fun)
    wrapped.vjp = wrapped_grad
    return wrapped
Ejemplo n.º 10
0
def checkpoint(fun):
    """Returns a checkpointed version of `fun`, where intermediate values
    computed during the forward pass of `fun` are discarded and then recomputed
    for the backward pass. Useful to save memory, effectively trading off time
    and memory. See e.g. arxiv.org/abs/1604.06174.
    """
    def wrapped_grad(argnum, g, ans, vs, gvs, args, kwargs):
        return make_vjp(fun, argnum)(*args, **kwargs)[0](g)
    wrapped = primitive(fun)
    wrapped.vjp = wrapped_grad
    return wrapped
Ejemplo n.º 11
0
def test_scalar2scalar():
    def f(x):
        return x**2 + 1

    x = np.array([3])
    npt.assert_equal(cs(f)(x), 6)
    npt.assert_almost_equal(fd(f)(x), 6, 5)
    npt.assert_equal(autograd(f)(x), 6)
    pf = primitive(f)
    defvjp(pf, lambda ans, x: lambda g: g * 2 * x)
    npt.assert_array_equal(autograd(pf, False)(x), 6)
Ejemplo n.º 12
0
def test_scalar2multi_scalar():
    def fxy(x):
        return x**2 + 1, 2 * x + 1

    def f(x):
        fx, fy = fxy(x)
        return fx + fy

    x = 3.
    ref = 8
    npt.assert_equal(cs(f)(x), ref)
    npt.assert_almost_equal(fd(f)(x), ref, 5)
    npt.assert_equal(autograd(f)(x), ref)

    pf = primitive(f)
    defvjp(pf, lambda ans, x: lambda g: g * (2 * x + 2))
    npt.assert_array_equal(autograd(pf, False)(x), ref)

    pf = primitive(fxy)
    defvjp(pf, lambda ans, x: lambda g: (g[0] * 2 * x, g[1] * 2))
    npt.assert_array_equal(autograd(f, False)(x), ref)
Ejemplo n.º 13
0
def test_vector2vector_independent():
    def f(x):
        return x**2 + 1

    def df(x):
        return 2 * x

    x = np.array([2, 3, 4])
    ref = [4, 6, 8]
    npt.assert_array_almost_equal(fd(f, False)(x), ref, 5)
    npt.assert_array_equal(cs(f, False)(x), ref)
    npt.assert_array_equal(autograd(f, False)(x), ref)

    pf = primitive(f)
    defvjp(pf, lambda ans, x: lambda g: g * df(x))
    npt.assert_array_equal(autograd(pf, False)(x), ref)
Ejemplo n.º 14
0
def test_vector2vector_dependent():
    def f(x):
        return x**2 + x[::-1]

    def df(x):
        return np.diag(2 * x) + np.diag(np.ones(3))[::-1]

    x = np.array([2., 3, 4])
    ref = [[4., 0., 1.], [0., 7., 0.], [1., 0., 8.]]
    npt.assert_array_almost_equal(fd(f, True)(x), ref, 5)
    npt.assert_array_almost_equal_nulp(cs(f, True)(x), ref)
    npt.assert_array_equal(autograd(f, True)(x), ref)

    pf = primitive(f)
    defvjp(pf, lambda ans, x: lambda g: np.dot(g, df(x)))
    npt.assert_array_equal(autograd(pf, True)(x), ref)
Ejemplo n.º 15
0
def make_mlp(shapes):
    """ Make a multilayer perceptron function where we get gradients from 
    optimized theano code.  This returns a function handle

        mlp = make_mlp(shapes)

    where mlp(input, params) pushes inputs through a multi-layer perceptron
    with params = [(W1, b1), (W2, b2), ..., (WL, bL)] a list of weights and
    biases.

    Autograd will be able to differentiate functions that use MLP with theano's
    gradients
    """
    def pack(params):
        return np.concatenate([np.concatenate([np.ravel(W), b])
                               for W, b in params])

    def unpack(params):
        offset = 0
        for m, n in shapes:
            yield params[offset:offset+m*n].reshape((m,n)), params[offset+m*n:offset+(m+1)*n]
            offset += (m+1)*n

    def mlp(x, params):
        for W, b in unpack(params):
            x = T.tanh(T.dot(x, W) + b)
        return x

    # define the MLPs jacobian-vector product using theano
    params  = T.dvector('params')
    x       = T.dmatrix('x')
    g       = T.dmatrix('g')
    mlpval  = mlp(x, params)
    gradfun = theano.function([x, params, g], T.Lop(mlpval, params, g))

    # create python executable MLP function, define autograd primitive
    theano_mlpfun = theano.function([x, params], mlpval)
    mlpfun = primitive(lambda x, params: theano_mlpfun(x, pack(params)))
    mlpfun.defgrad(lambda ans, x, params: lambda g: list(unpack(gradfun(x, pack(params), g))), 1)

    return mlpfun
Ejemplo n.º 16
0
from __future__ import absolute_import
import scipy.special

from autograd.core import primitive

polygamma = primitive(scipy.special.polygamma)
psi = primitive(scipy.special.psi)  # psi(x) is just polygamma(0, x)
digamma = primitive(scipy.special.digamma)  # digamma is another name for psi.
gamma = primitive(scipy.special.gamma)

polygamma.defgrad_is_zero(argnums=(0, ))
polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x),
                  argnum=1)
psi.defgrad(lambda ans, x: lambda g: g * polygamma(1, x))
digamma.defgrad(lambda ans, x: lambda g: g * polygamma(1, x))
gamma.defgrad(lambda ans, x: lambda g: g * ans * psi(x))

### Bessel functions ###

j0 = primitive(scipy.special.j0)
y0 = primitive(scipy.special.y0)
j1 = primitive(scipy.special.j1)
y1 = primitive(scipy.special.y1)
jn = primitive(scipy.special.jn)
yn = primitive(scipy.special.yn)

j0.defgrad(lambda ans, x: lambda g: -g * j1(x))
y0.defgrad(lambda ans, x: lambda g: -g * y1(x))
j1.defgrad(lambda ans, x: lambda g: g * (j0(x) - jn(2, x)) / 2.0)
y1.defgrad(lambda ans, x: lambda g: g * (y0(x) - yn(2, x)) / 2.0)
jn.defgrad_is_zero(argnums=(0, ))
Ejemplo n.º 17
0
"""Gradients of the univariate t distribution."""
from __future__ import absolute_import
import scipy.stats
import autograd.numpy as np

from autograd.core import primitive
from autograd.numpy.numpy_grads import unbroadcast
from autograd.scipy.special import psi

pdf = primitive(scipy.stats.t.pdf)
cdf = primitive(scipy.stats.t.cdf)
logpdf = primitive(scipy.stats.t.logpdf)
logcdf = primitive(scipy.stats.t.logcdf)


def grad_tlogpdf_diff(diff, df):
    return -diff * (1.0 + df) / (diff**2 + df)


def grad_tlogpdf_x(x, df, loc, scale):
    return grad_tlogpdf_diff((x - loc) / scale, df) / scale


def grad_tlogpdf_loc(x, df, loc, scale):
    return -grad_tlogpdf_diff((x - loc) / scale, df) / scale


def grad_tlogpdf_scale(x, df, loc, scale):
    diff = x - loc
    return -(df * (scale**2 - diff**2)) / (scale * (df * scale**2 + diff**2))
Ejemplo n.º 18
0
Archivo: util.py Proyecto: lfywork/svae
from autograd.misc import flatten
from itertools import islice, imap, cycle
import operator
from functools import partial
from toolz import curry

# autograd internals
from autograd.builtins import SequenceBox
from autograd.core import getval, primitive

### neural nets

identity = lambda x: x
sigmoid = lambda x: 1. / (1. + np.exp(-x))
relu = lambda x: np.maximum(x, 0.)
log1pexp = primitive(lambda x: np.log1p(np.exp(x)))
log1pexp.defgrad(lambda ans, x: lambda g: g / (1 + np.exp(-x)))
normalize = lambda x: x / np.sum(x, axis=-1, keepdims=True)
softmax = lambda x: normalize(np.exp(x - np.max(x, axis=-1, keepdims=True)))

### misc


def rle(stateseq):
    pos, = np.where(np.diff(stateseq) != 0)
    pos = np.concatenate(([0], pos + 1, [len(stateseq)]))
    return stateseq[pos[:-1]], np.diff(pos)


isarray = lambda x: hasattr(x, 'ndim')
flat = lambda x: flatten(x)[0]
Ejemplo n.º 19
0
def make_conv_mlp(input_shape, layer_specs):
    """ Make a convolutional multilayer perceptron function where we get 
    gradients from optimized theano code.  This returns a function handle

        mlp = make_mlp(shapes)

    where mlp(input, params) is a function of 

      - inputs = tensor of size
              [num data,
               number of input feature maps (color-in),
               image height,
               image width]

      - params decomposes into a list of parameter tensors, the convolutional 
        tensor size is:

              [number of feature maps at output (color-out), (number of filters?)
               number of feature maps at input (color-in),
               filter height,
               filter width]

    Autograd will be able to differentiate functions that use MLP with theano's
    gradients
    """
    # compute number of params and output shapes for each layer - compile a
    # list of
    offset = 0
    param_slices = []
    cur_shape = input_shape
    for layer in layer_specs:
        N_weights, cur_shape = layer.build_weights_dict(cur_shape)
        param_slices.append(slice(offset, offset + N_weights))
        offset += N_weights
    num_params = offset
    out_size = np.prod(cur_shape)

    def unpack(params):
        for pslice in param_slices:
            yield params[pslice]

    def mlp(inputs, params):
        """applies each layer to the input, given parameter vector.
        shape of inputs : [data, color, y, x]"""
        cur_units = inputs
        for layer, layer_params in zip(layer_specs, unpack(params)):
            cur_units = layer.forward_pass(cur_units, layer_params)

        # make sure we're returning a 2-d ... for theano Lop
        return cur_units.reshape((inputs.shape[0], -1))

    # define symbolic variables that theano will manipulate
    inputs = T.tensor4(name='inputs', dtype='float32')
    params = T.fvector('params')
    g = T.fmatrix('g')

    # define the mlp symblic function and executable function
    mlpval = mlp(inputs, params)
    theano_mlpfun = theano.function([inputs, params],
                                    mlpval,
                                    allow_input_downcast=True)

    # define the Jacobian-Vector gradient function needed for autograd
    gradfun = theano.function([inputs, params, g],
                              T.Lop(mlpval, params, g),
                              allow_input_downcast=True)

    # create python executable MLP function, define autograd primitive
    mlpfun = primitive(lambda x, params: theano_mlpfun(x, params))
    mlpfun.defgrad(lambda ans, x, params: lambda g: gradfun(x, params, g), 1)
    return mlpfun, num_params, out_size
Ejemplo n.º 20
0
def make_natural_sample_grad_arg0(intermediates, ans, messages, pair_params,
                                  num_samples):
    return primitive(
        lambda g: cython_natural_sample_backward_grad(g, intermediates))
Ejemplo n.º 21
0
def test_vector2multi_vector():
    def fxy(x):
        return x**2 + 1, 2 * x + 1

    def f0(x):
        return fxy(x)[0]

    def fsum(x):
        fx, fy = fxy(x)
        return fx + fy

    x = np.array([1., 2, 3])
    ref0 = [2, 4, 6]
    refsum = [4, 6, 8]
    npt.assert_equal(cs(f0)(x), ref0)
    npt.assert_almost_equal(fd(f0)(x), ref0, 5)
    npt.assert_equal(autograd(f0)(x), ref0)
    pf0 = primitive(f0)
    defvjp(pf0, lambda ans, x: lambda g: g * (2 * x))
    npt.assert_array_equal(autograd(pf0, False)(x), ref0)

    npt.assert_equal(cs(fsum)(x), refsum)
    npt.assert_almost_equal(fd(fsum)(x), refsum, 5)
    npt.assert_equal(autograd(fsum)(x), refsum)
    pfsum = primitive(fsum)
    defvjp(pfsum, lambda ans, x: lambda g: g * (2 * x + 2))
    npt.assert_array_equal(autograd(pfsum, False)(x), refsum)

    pfxy = primitive(fxy)

    def dfxy(x):
        return 2 * x, np.full(x.shape, 2)

    def gsum(x):
        fx, fy = pfxy(x)
        return fx + fy

    def g0(x):
        return pfxy(x)[0]

    pgsum = primitive(gsum)
    pg0 = primitive(g0)
    defvjp(pgsum, lambda ans, x: lambda g: g * np.sum(dfxy(x), 0))
    defvjp(pg0, lambda ans, x: lambda g: g * dfxy(x)[0])

    npt.assert_array_equal(autograd(pgsum, False)(x), refsum)
    npt.assert_array_equal(autograd(pg0, False)(x), ref0)

    defvjp(pfxy, lambda ans, x: lambda g: dfxy(x)[0])

    def h0(x):
        return pfxy(x)[0]

    npt.assert_array_equal(autograd(h0, False)(x), ref0)

    defvjp(pfxy, lambda ans, x: lambda g: np.sum(g * np.asarray(dfxy(x)), 0))

    def hsum(x):
        fx, fy = pfxy(x)
        return fx + fy

    npt.assert_array_equal(autograd(hsum, False)(x), refsum)
Ejemplo n.º 22
0
"""Gradients of the normal distribution."""

from __future__ import absolute_import
import scipy.stats
import autograd.numpy as anp

from autograd.core import primitive
from autograd.numpy.numpy_grads import unbroadcast

pdf = primitive(scipy.stats.norm.pdf)
cdf = primitive(scipy.stats.norm.cdf)
logpdf = primitive(scipy.stats.norm.logpdf)
logcdf = primitive(scipy.stats.norm.logcdf)

pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: -g * ans * (x - loc) / scale**2))
pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g:  g * ans * (x - loc) / scale**2), argnum=1)
pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g:  g * ans * (((x - loc)/scale)**2 - 1.0)/scale), argnum=2)

cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * pdf(x, loc, scale)))
cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * pdf(x, loc, scale)), argnum=1)
cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * pdf(x, loc, scale)*(x-loc)/scale), argnum=2)

logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: -g * (x - loc) / scale**2))
logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g:  g * (x - loc) / scale**2), argnum=1)
logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g:  g * (-1.0/scale + (x - loc)**2/scale**3)), argnum=2)

logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))))
logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))), argnum=1)
logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))*(x-loc)/scale), argnum=2)
Ejemplo n.º 23
0
from __future__ import absolute_import
import scipy.signal

from autograd.core import primitive
from autograd.numpy import flipud

convolve = primitive(scipy.signal.convolve)
convolve2d = primitive(scipy.signal.convolve2d)


def get_same_slice(L_in0, L_in1):
    left_pad = L_in0 - (L_in1 + 1) / 2
    return slice(left_pad, left_pad + L_in1)


def make_grad_convolve_0(ans, in0, in1, mode='full'):
    if mode == 'full':
        return lambda g: convolve(g, flipud(in1), mode='valid')
    elif mode == 'same':
        return lambda g: flipud(convolve(flipud(g), in1, mode='same'))
    elif mode == 'valid':
        return lambda g: convolve(g, flipud(in1), mode='full')
    else:
        raise Exception("Unrecognized mode {0}".format(mode))


convolve.defgrad(make_grad_convolve_0, argnum=0)


def make_grad_convolve_1(ans, in0, in1, mode='full'):
    if mode == 'full':
Ejemplo n.º 24
0
def make_natural_filter_grad_arg2(intermediates, ans, init_params, pair_params, node_params):
    return primitive(lambda g: cython_natural_filter_grad(g, intermediates))
Ejemplo n.º 25
0
from __future__ import absolute_import
import scipy.special
import autograd.numpy as np

from autograd.core import primitive

polygamma = primitive(scipy.special.polygamma)
psi = primitive(scipy.special.psi)  # psi(x) is just polygamma(0, x)
digamma = primitive(scipy.special.digamma)  # digamma is another name for psi.
gamma = primitive(scipy.special.gamma)
gammaln = primitive(scipy.special.gammaln)
gammasgn = primitive(scipy.special.gammasgn)
rgamma = primitive(scipy.special.rgamma)
multigammaln = primitive(scipy.special.multigammaln)

gammasgn.defgrad_is_zero()
polygamma.defgrad_is_zero(argnums=(0, ))
polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x),
                  argnum=1)
psi.defgrad(lambda ans, x: lambda g: g * polygamma(1, x))
digamma.defgrad(lambda ans, x: lambda g: g * polygamma(1, x))
gamma.defgrad(lambda ans, x: lambda g: g * ans * psi(x))
gammaln.defgrad(lambda ans, x: lambda g: g * psi(x))
rgamma.defgrad(lambda ans, x: lambda g: g * psi(x) / -gamma(x))
multigammaln.defgrad(lambda ans, a, d: lambda g: g * np.sum(
    digamma(np.expand_dims(a, -1) - np.arange(d) / 2.), -1))
multigammaln.defgrad_is_zero(argnums=(1, ))

### Bessel functions ###

j0 = primitive(scipy.special.j0)
Ejemplo n.º 26
0
from __future__ import absolute_import
import scipy.misc

from autograd.core import primitive
import autograd.numpy as anp
from autograd.numpy.numpy_grads import repeat_to_match_shape

logsumexp = primitive(scipy.misc.logsumexp)


def make_grad_logsumexp(ans, x, axis=None, b=1.0, keepdims=False):
    repeater, _ = repeat_to_match_shape(x, axis, keepdims)
    return lambda g: repeater(g) * b * anp.exp(x - repeater(ans))


logsumexp.defgrad(make_grad_logsumexp)
from __future__ import absolute_import
import scipy.stats

import autograd.numpy as np
from autograd.scipy.special import digamma
from autograd.core import primitive

rvs    = primitive(scipy.stats.dirichlet.rvs)
pdf    = primitive(scipy.stats.dirichlet.pdf)
logpdf = primitive(scipy.stats.dirichlet.logpdf)

logpdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * (alpha - 1) / x, argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * ans * (alpha - 1) / x, argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)
Ejemplo n.º 28
0
        v = solve_triangular(a, g, trans=_flip(a, trans), lower=lower)
        return -transpose(tri(anp.matmul(anp.reshape(v, ans.shape), T(ans))))

    return solve_triangular_grad
solve_triangular.defgrad(make_grad_solve_triangular)
solve_triangular.defgrad(lambda ans, a, b, trans=0, lower=False, **kwargs:
                         lambda g: solve_triangular(a, g, trans=_flip(a, trans), lower=lower),
                         argnum=1)

### cholesky

solve_trans = lambda L, X: solve_triangular(L, X, lower=True, trans='T')
solve_conj = lambda L, X: solve_trans(L, T(solve_trans(L, T(X))))
phi = lambda X: anp.tril(X) / (1. + anp.eye(X.shape[-1]))

cholesky = primitive(np.linalg.cholesky)
cholesky.defgrad(lambda L, A: lambda g: symm(solve_conj(L, phi(anp.matmul(T(L), g)))))


### operations on cholesky factors

solve_tri = partial(solve_triangular, lower=True)
solve_posdef_from_cholesky = lambda L, x: solve_tri(L, solve_tri(L, x), trans='T')

@primitive
def inv_posdef_from_cholesky(L, lower=True):
    flat_L = np.reshape(L, (-1,) + L.shape[-2:])
    return np.reshape(cyla.inv_posdef_from_cholesky(C(flat_L), lower), L.shape)

square_grad = lambda X: lambda g: anp.matmul(g, X) + anp.matmul(T(g), X)
sym_inv_grad = lambda Xinv: lambda g: -anp.matmul(Xinv, anp.matmul(g, Xinv))
Ejemplo n.º 29
0
def make_natural_filter_grad_arg2(intermediates, ans, init_params, pair_params,
                                  node_params):
    return primitive(lambda g: cython_natural_filter_grad(g, intermediates))
Ejemplo n.º 30
0
"""Gradients of the univariate t distribution."""
from __future__ import absolute_import
import scipy.stats
import autograd.numpy as np

from autograd.core import primitive
from autograd.numpy.numpy_grads import unbroadcast
from autograd.scipy.special import psi

pdf = primitive(scipy.stats.t.pdf)
cdf = primitive(scipy.stats.t.cdf)
logpdf = primitive(scipy.stats.t.logpdf)
logcdf = primitive(scipy.stats.t.logcdf)

def grad_tlogpdf_diff(diff, df):
    return -diff * (1.0 + df) / (diff**2 + df)
def grad_tlogpdf_x(x, df, loc, scale):
    return grad_tlogpdf_diff((x - loc) / scale, df) / scale
def grad_tlogpdf_loc(x, df, loc, scale):
    return -grad_tlogpdf_diff((x - loc) / scale, df) / scale
def grad_tlogpdf_scale(x, df, loc, scale):
    diff = x - loc
    return -(df * (scale**2 - diff**2))/(scale * (df * scale**2 + diff**2))
def grad_tlogpdf_df(x, df, loc, scale):
    y = (x - loc)/scale
    return 0.5 * ((y**2 * (df+1))/(df * (y**2 + df)) - np.log(y**2 / df + 1) - 1.0/df -psi(df/2.0) + psi((df + 1)/2.0))

pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: g * ans * grad_tlogpdf_x(    x, df, loc, scale)), argnum=0)
pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, df,    lambda g: g * ans * grad_tlogpdf_df(   x, df, loc, scale)), argnum=1)
pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: g * ans * grad_tlogpdf_loc(  x, df, loc, scale)), argnum=2)
pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * ans * grad_tlogpdf_scale(x, df, loc, scale)), argnum=3)
Ejemplo n.º 31
0
def make_natural_smoother_grad_arg0(intermediates, ans, forward_messages,
                                    pair_params):
    return primitive(lambda g: cython_natural_smoother_grad(g, intermediates))
Ejemplo n.º 32
0
def make_grad_hmm_logZ(intermediates, ans, hmm):
    _, pair_params, _ = hmm
    return primitive(lambda g: hmm_logZ_grad(g, intermediates))
Ejemplo n.º 33
0
import autograd.numpy as np
import scipy.stats as ss
import scipy.special as sp

from autograd.core import primitive

a2d = np.atleast_2d

beta = primitive(sp.beta)


def make_grad_beta(ans, X, sz=(1, 1), a=1, b=1):
    def gradient_product(g):
        pass

    return gradient_product


beta.defgrad(make_grad_beta)

#
# ==================================================================
#


class choice_erp:
    @staticmethod
    def diffparms():
        return ["p"]

    @staticmethod
Ejemplo n.º 34
0
from __future__ import absolute_import
import scipy.special

from autograd.core import primitive

polygamma = primitive(scipy.special.polygamma)
psi = primitive(scipy.special.psi)  # psi(x) is just polygamma(0, x)
digamma = primitive(scipy.special.digamma)  # digamma is another name for psi.
gamma = primitive(scipy.special.gamma)

polygamma.defgrad_is_zero(argnums=(0,))
polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1)
psi.defgrad(lambda ans, x: lambda g: g * polygamma(1, x))
digamma.defgrad(lambda ans, x: lambda g: g * polygamma(1, x))
gamma.defgrad(lambda ans, x: lambda g: g * ans * psi(x))


### Bessel functions ###

j0 = primitive(scipy.special.j0)
y0 = primitive(scipy.special.y0)
j1 = primitive(scipy.special.j1)
y1 = primitive(scipy.special.y1)
jn = primitive(scipy.special.jn)
yn = primitive(scipy.special.yn)

j0.defgrad(lambda ans, x: lambda g: -g * j1(x))
y0.defgrad(lambda ans, x: lambda g: -g * y1(x))
j1.defgrad(lambda ans, x: lambda g: g * (j0(x) - jn(2, x)) / 2.0)
y1.defgrad(lambda ans, x: lambda g: g * (y0(x) - yn(2, x)) / 2.0)
jn.defgrad_is_zero(argnums=(0,))
Ejemplo n.º 35
0
def make_natural_sample_grad_arg0(intermediates, ans, messages, pair_params, num_samples):
    return primitive(lambda g: cython_natural_sample_backward_grad(g, intermediates))
Ejemplo n.º 36
0
from __future__ import absolute_import
import scipy.misc

from autograd.core import primitive
import autograd.numpy as anp
from autograd.numpy.numpy_grads import repeat_to_match_shape


logsumexp = primitive(scipy.misc.logsumexp)


def make_grad_logsumexp(ans, x, axis=None, b=1.0, keepdims=False):
    repeater, _ = repeat_to_match_shape(x, axis, keepdims)
    return lambda g: repeater(g) * b * anp.exp(x - repeater(ans))


logsumexp.defgrad(make_grad_logsumexp)
Ejemplo n.º 37
0
from __future__ import absolute_import
import scipy.stats

import autograd.numpy as np
from autograd.core import primitive
from autograd.numpy.numpy_grads import unbroadcast

pdf = primitive(scipy.stats.multivariate_normal.pdf)
logpdf = primitive(scipy.stats.multivariate_normal.logpdf)
entropy = primitive(scipy.stats.multivariate_normal.entropy)

# With thanks to Eric Bresch.
# Some formulas are from
# "An extended collection of matrix derivative results
#  for forward and reverse mode algorithmic differentiation"
# by Mike Giles
# https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf


def lower_half(mat):
    # Takes the lower half of the matrix, and half the diagonal.
    # Necessary since numpy only uses lower half of covariance matrix.
    if len(mat.shape) == 2:
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1, 2))
    else:
        raise ArithmeticError


def generalized_outer_product(mat):
Ejemplo n.º 38
0
def make_natural_smoother_grad_arg0(intermediates, ans, forward_messages, pair_params):
    return primitive(lambda g: cython_natural_smoother_grad(g, intermediates))
Ejemplo n.º 39
0
from __future__ import absolute_import
import scipy.stats

import autograd.numpy as np
from autograd.core import primitive
from autograd.numpy.numpy_grads import unbroadcast

pdf    =  primitive(scipy.stats.multivariate_normal.pdf)
logpdf =  primitive(scipy.stats.multivariate_normal.logpdf)
entropy = primitive(scipy.stats.multivariate_normal.entropy)

# With thanks to Eric Bresch.
# Some formulas are from
# "An extended collection of matrix derivative results
#  for forward and reverse mode algorithmic differentiation"
# by Mike Giles
# https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf

def lower_half(mat):
    # Takes the lower half of the matrix, and half the diagonal.
    # Necessary since numpy only uses lower half of covariance matrix.
    if len(mat.shape) == 2:
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
Ejemplo n.º 40
0
from __future__ import absolute_import
import scipy.special
import autograd.numpy as np

from autograd.core import primitive

polygamma    = primitive(scipy.special.polygamma)
psi          = primitive(scipy.special.psi)        # psi(x) is just polygamma(0, x)
digamma      = primitive(scipy.special.digamma)    # digamma is another name for psi.
gamma        = primitive(scipy.special.gamma)
gammaln      = primitive(scipy.special.gammaln)
gammasgn     = primitive(scipy.special.gammasgn)
rgamma       = primitive(scipy.special.rgamma)
multigammaln = primitive(scipy.special.multigammaln)

gammasgn.defgrad_is_zero()
polygamma.defgrad_is_zero(argnums=(0,))
polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1)
psi.defgrad(      lambda ans, x: lambda g: g * polygamma(1, x))
digamma.defgrad(  lambda ans, x: lambda g: g * polygamma(1, x))
gamma.defgrad(    lambda ans, x: lambda g: g * ans * psi(x))
gammaln.defgrad(  lambda ans, x: lambda g: g * psi(x))
rgamma.defgrad(   lambda ans, x: lambda g: g * psi(x) / -gamma(x))
multigammaln.defgrad(lambda ans, a, d:
    lambda g: g * np.sum(digamma(np.expand_dims(a, -1) - np.arange(d)/2.), -1))
multigammaln.defgrad_is_zero(argnums=(1,))


### Bessel functions ###

j0 = primitive(scipy.special.j0)