Ejemplo n.º 1
0
def rearrange_dict_grad(fun):
    """
    Decorator that allows us to save memory on the forward pass,
    by precomputing the gradient
    """
    @primitive
    def wrapped_fun_helper(xdict, dummy):
        ## ag.value_and_grad() to avoid second forward pass
        ## ag.checkpoint() ensures hessian gets properly checkpointed
        val, grad = ag.checkpoint(ag.value_and_grad(fun))(xdict)
        assert len(val.shape) == 0
        dummy.cache = grad
        return val

    def wrapped_fun_helper_grad(ans, xdict, dummy):
        def grad(g):
            #print("foo")
            return {k: g * v for k, v in dummy.cache.items()}

        return grad

    defvjp(wrapped_fun_helper, wrapped_fun_helper_grad, None)

    @functools.wraps(fun)
    def wrapped_fun(xdict):
        return wrapped_fun_helper(ag.dict(xdict), lambda: None)

    return wrapped_fun
Ejemplo n.º 2
0
def make_stan_log_density(fitobj):
    @primitive
    def log_density(x):
        return _vectorize_if_needed(fitobj.log_prob, x)
    def log_density_vjp(ans, x):
        return lambda g: _ensure_2d(g) * _vectorize_if_needed(fitobj.grad_log_prob, x)
    defvjp(log_density, log_density_vjp)
    return log_density
Ejemplo n.º 3
0
def test_check_vjp_1st_order_fail():
    @primitive
    def foo(x):
        return x * 2.0
    defvjp(foo, lambda ans, x : lambda g: g * 2.001)

    assert_raises_regexp(AssertionError,
                         "\(VJP\) check of foo failed",
                         lambda: check_grads(foo, modes=['rev'])(1.0))
Ejemplo n.º 4
0
def test_check_vjp_1st_order_fail():
    @primitive
    def foo(x):
        return x * 2.0

    defvjp(foo, lambda ans, x: lambda g: g * 2.001)

    with raises(AssertionError, match="\\(VJP\\) check of foo failed"):
        check_grads(foo, modes=['rev'])(1.0)
Ejemplo n.º 5
0
 def __new__(self, name, base, dic):
     cls = type.__new__(container_mateclass, name, base, dic)
     cls.register(_np.ndarray)
     for type_ in [
             float, _np.float64, _np.float32, _np.float16, complex,
             _np.complex64, _np.complex128
     ]:
         cls.register(type_)
     for method_name in nondiff_methods + diff_methods:
         setattr(cls, method_name, anp.__dict__[method_name])
     setattr(cls, 'flatten', anp.__dict__['ravel'])
     defvjp(func(cls.__getitem__),
            lambda ans, A, idx: lambda g: untake(g, idx, vspace(A)))
     defjvp(func(cls.__getitem__), 'same')
     defjvp(untake, 'same')
     setattr(cls, 'reshape', wrapped_reshape)
     return cls
Ejemplo n.º 6
0
    def get_model(self, *parameters, frame=None):
        """Get the model of the entire blend

        Parameters
        ----------
        parameters: tuple of optimization parameters
        frame:  `scarlet.Frame`
            Alternative Frame to project the model into

        Returns
        -------
        model: array
            (Bands, Height, Width) data cube
        """

        # boxed models of every source
        models = self.get_models_of_children(*parameters, frame=None)

        if frame is None:
            frame = self.frame

        # if this is the model frame then the slices are already cached
        if frame == self.frame:
            slices = tuple(
                (src._model_frame_slices, src._model_slices) for src in self.sources
            )
        else:
            slices = tuple(
                overlapped_slices(frame.bbox, src.bbox) for src in self.sources
            )

        # We have to declare the function that inserts sources
        # into the blend with autograd.
        # This has to be done each time we fit a blend,
        # since the number of components => the number of arguments,
        # which must be linked to the autograd primitive function.
        defvjp(
            _add_models,
            *([partial(_grad_add_models, index=k) for k in range(len(self.sources))])
        )

        full_model = np.zeros(frame.shape, dtype=frame.dtype)
        full_model = _add_models(*models, full_model=full_model, slices=slices)

        return full_model
Ejemplo n.º 7
0
    def decorator(func):
        """Decorate a function to define its custome gradient(s).

        Parameters
        ----------
        func : callable
            Function whose gradients will be assigned by grad_funcs.

        Returns
        -------
        wrapped_function : callable
            Function func with gradients specified by grad_funcs.
        """
        wrapped_function = primitive(func)

        def wrapped_grad_func(i, ans, *args, **kwargs):
            grads = grad_funcs[i](*args, **kwargs)
            if isinstance(grads, float):
                return lambda g: g * grads
            if grads.ndim == 2:
                return lambda g: g[..., None] * grads
            if grads.ndim == 3:
                return lambda g: g[..., None, None] * grads
            return lambda g: g * grads

        if len(grad_funcs) == 1:
            defvjp(
                wrapped_function,
                lambda ans, *args, **kwargs: wrapped_grad_func(0, ans, *args, **kwargs),
            )
        elif len(grad_funcs) == 2:
            defvjp(
                wrapped_function,
                lambda ans, *args, **kwargs: wrapped_grad_func(0, ans, *args, **kwargs),
                lambda ans, *args, **kwargs: wrapped_grad_func(1, ans, *args, **kwargs),
            )
        elif len(grad_funcs) == 3:
            defvjp(
                wrapped_function,
                lambda ans, *args, **kwargs: wrapped_grad_func(0, ans, *args, **kwargs),
                lambda ans, *args, **kwargs: wrapped_grad_func(1, ans, *args, **kwargs),
                lambda ans, *args, **kwargs: wrapped_grad_func(2, ans, *args, **kwargs),
            )
        else:
            raise NotImplementedError(
                "custom_gradient is not yet implemented " "for more than 3 gradients."
            )

        return wrapped_function
Ejemplo n.º 8
0
    def decorator(func):
        wrapped_function = primitive(func)

        def wrapped_grad_func(i, ans, *args, **kwargs):
            grads = grad_funcs[i](*args, **kwargs)
            if isinstance(grads, float):
                return lambda g: g * grads
            if grads.ndim == 2:
                return lambda g: g[..., None] * grads
            if grads.ndim == 3:
                return lambda g: g[..., None, None] * grads
            return lambda g: g * grads

        if len(grad_funcs) == 1:
            defvjp(
                wrapped_function,
                lambda ans, *args, **kwargs: wrapped_grad_func(0, ans, *args, **kwargs),
            )
        elif len(grad_funcs) == 2:

            defvjp(
                wrapped_function,
                lambda ans, *args, **kwargs: wrapped_grad_func(0, ans, *args, **kwargs),
                lambda ans, *args, **kwargs: wrapped_grad_func(1, ans, *args, **kwargs),
            )
        elif len(grad_funcs) == 3:
            defvjp(
                wrapped_function,
                lambda ans, *args, **kwargs: wrapped_grad_func(0, ans, *args, **kwargs),
                lambda ans, *args, **kwargs: wrapped_grad_func(1, ans, *args, **kwargs),
                lambda ans, *args, **kwargs: wrapped_grad_func(2, ans, *args, **kwargs),
            )
        else:
            raise NotImplementedError(
                "custom_gradient is not yet implemented " "for more than 3 gradients."
            )

        return wrapped_function
Ejemplo n.º 9
0
    if debug_log == 'true':
        logger.info("sigsq_final = {}".format(sigsq_init + jitter))

    return x_plus_constant


def AddJitterOp_vjp(ans: anp.ndarray,
                    inputs: anp.ndarray,
                    initial_jitter_factor=INITIAL_JITTER_FACTOR,
                    jitter_growth=JITTER_GROWTH,
                    debug_log='false'):
    return lambda g: anp.append(anp.reshape(g, (-1, )), anp.sum(anp.diag(g)))


defvjp(AddJitterOp, AddJitterOp_vjp)


@primitive
def cholesky_factorization(a):
    """
    Replacement for autograd.numpy.linalg.cholesky. Our backward (vjp) is
    faster and simpler, while somewhat less general (only works if
    a.ndim == 2).

    See https://arxiv.org/abs/1710.08717 for derivation of backward (vjp)
    expression.
    
    :param a: Symmmetric positive definite matrix A
    :return: Lower-triangular Cholesky factor L of A
    """
Ejemplo n.º 10
0
from __future__ import absolute_import
import scipy.misc
from autograd.extend import primitive, defvjp
import autograd.numpy as anp
from autograd.numpy.numpy_vjps import repeat_to_match_shape

logsumexp = primitive(scipy.misc.logsumexp)

def make_grad_logsumexp(ans, x, axis=None, b=1.0, keepdims=False):
    shape, dtype = anp.shape(x), anp.result_type(x)
    def vjp(g):
        g_repeated,   _ = repeat_to_match_shape(g,   shape, dtype, axis, keepdims)
        ans_repeated, _ = repeat_to_match_shape(ans, shape, dtype, axis, keepdims)
        return g_repeated * b * anp.exp(x - ans_repeated)
    return vjp

defvjp(logsumexp, make_grad_logsumexp)
Ejemplo n.º 11
0
from __future__ import division
import scipy.linalg

import autograd.numpy as anp
from autograd.numpy.numpy_wrapper import wrap_namespace
from autograd.extend import defvjp

wrap_namespace(scipy.linalg.__dict__, globals())  # populates module namespace

defvjp(sqrtm, lambda ans, A, **kwargs: lambda g: solve_lyapunov(ans, g))


def _flip(a, trans):
    if anp.iscomplexobj(a):
        return 'H' if trans in ('N', 0) else 'N'
    else:
        return 'T' if trans in ('N', 0) else 'N'


def grad_solve_triangular(ans, a, b, trans=0, lower=False, **kwargs):
    tri = anp.tril if (lower ^ (_flip(a, trans) == 'N')) else anp.triu
    transpose = lambda x: x if _flip(a, trans) != 'N' else x.T
    al2d = lambda x: x if x.ndim > 1 else x[..., None]

    def vjp(g):
        v = al2d(solve_triangular(a, g, trans=_flip(a, trans), lower=lower))
        return -transpose(tri(anp.dot(v, al2d(ans).T)))

    return vjp

Ejemplo n.º 12
0
# Hotfix since _np.asarray doesn't have a gradient rule defined.
@primitive
def asarray(vals, *args, **kwargs):
    """Gradient supporting autograd asarray"""
    if isinstance(vals, (onp.ndarray, _np.ndarray)):
        return _np.asarray(vals, *args, **kwargs)
    return _np.array(vals, *args, **kwargs)


def asarray_gradmaker(ans, *args, **kwargs):
    """Gradient maker for asarray"""
    del ans, args, kwargs
    return lambda g: g


defvjp(asarray, asarray_gradmaker, argnums=(0, ))


class tensor(_np.ndarray):
    """Constructs a PennyLane tensor for use with Autograd QNodes.

    The ``tensor`` class is a subclass of ``numpy.ndarray``,
    providing the same multidimensional, homogeneous data-structure
    of fixed-size items, with an additional flag to indicate to PennyLane
    whether the contained data is differentiable or not.

    .. warning::

        PennyLane ``tensor`` objects are only used as part of the Autograd QNode
        interface. If using another machine learning library such as PyTorch or
        TensorFlow, use their built-in ``tf.Variable`` and ``torch.tensor`` classes
Ejemplo n.º 13
0

def vjp_maker_spdot(b, A, x):
    """ Gives vjp for b = spdot(A, x) w.r.t. x"""
    def vjp(v):
        return spdot(A.T, v)

    return vjp


def jvp_spdot(g, b, A, x):
    """ Gives jvp for b = spdot(A, x) w.r.t. x"""
    return spdot(A, g)


defvjp(spdot, None, vjp_maker_spdot)
defjvp(spdot, None, jvp_spdot)
""" =================== PLOTTING AND MEASUREMENT =================== """

import matplotlib.pylab as plt


def aniplot(F, source, steps, component='Ez', num_panels=10):
    """ Animate an FDTD (F) with `source` for `steps` time steps.
    display the `component` field components at `num_panels` equally spaced.
    """
    F.initialize_fields()

    # initialize the plot
    f, ax_list = plt.subplots(1, num_panels, figsize=(20 * num_panels, 20))
    Nx, Ny, _ = F.eps_r.shape
Ejemplo n.º 14
0
# -*- coding: utf-8 -*-
from __future__ import division
from scipy.stats import norm as _scipy_norm
import autograd.numpy as np
from autograd.scipy.stats import norm
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f

# TODO: next release of autograd will have this built in.

logsf = primitive(_scipy_norm.logsf)

defvjp(
    logsf,
    lambda ans, x, loc=0.0, scale=1.0: unbroadcast_f(
        x, lambda g: -g * np.exp(
            norm.logpdf(x, loc, scale) - logsf(x, loc, scale))),
    lambda ans, x, loc=0.0, scale=1.0: unbroadcast_f(
        loc, lambda g: g * np.exp(
            norm.logpdf(x, loc, scale) - logsf(x, loc, scale))),
    lambda ans, x, loc=0.0, scale=1.0: unbroadcast_f(
        scale, lambda g: g * np.exp(
            norm.logpdf(x, loc, scale) - logsf(x, loc, scale)) *
        (x - loc) / scale),
)
Ejemplo n.º 15
0
    for i, j in enumerate(range(offset, m - 1, 2)):
        d[j, 0] = np.exp(1j * phis[i])
    return d


def build_phi_layer_vjp(ans, phis, m, offset):
    def _build_phi_layer_vjp(g):
        out = np.zeros(phis.shape)
        for i, j in enumerate(range(offset, m - 1, 2)):
            out[i] += np.real(ans[j, 0] * 1j * g[j, 0])
        return out

    return _build_phi_layer_vjp


defvjp(build_phi_layer, build_phi_layer_vjp, None, None)


def clements_build(phis, m):
    U = np.eye(m, dtype=complex)
    ptr = 0
    bss = [build_bs_layer(m, 0), build_bs_layer(m, 1)]
    for i in range(m):
        offset = i % 2
        # Phis per layer
        ppl = (m - offset) // 2
        bs = bss[offset]
        phi1 = build_phi_layer(phis[ptr:ptr + ppl], m, offset)
        phi2 = build_phi_layer(phis[ptr + ppl:ptr + 2 * ppl], m, offset)
        ptr += 2 * ppl
Ejemplo n.º 16
0
    axes, shapes = parse_axes(A.shape, B.shape, axes, dot_axes, mode)
    if argnum == 0:
        X, Y = A, B
        _X_, _Y_ = 'A', 'B'
        ignore_Y = 'ignore_B'
    elif argnum == 1:
        X, Y = B, A
        _X_, _Y_ = 'B', 'A'
        ignore_Y = 'ignore_A'
    else:
        raise NotImplementedError("Can't take grad of convolve w.r.t. arg {0}".format(argnum))

    if mode == 'full':
        new_mode = 'valid'
    else:
        if any([x_size > y_size for x_size, y_size in zip(shapes[_X_]['conv'], shapes[_Y_]['conv'])]):
            new_mode = 'full'
        else:
            new_mode = 'valid'

    def vjp(g):
        result = convolve(g, Y[flipped_idxs(Y.ndim, axes[_Y_]['conv'])],
                          axes     = [axes['out']['conv'],   axes[_Y_]['conv']],
                          dot_axes = [axes['out'][ignore_Y], axes[_Y_]['ignore']],
                          mode     = new_mode)
        new_order = npo.argsort(axes[_X_]['ignore'] + axes[_X_]['dot'] + axes[_X_]['conv'])
        return np.transpose(result, new_order)
    return vjp

defvjp(convolve, partial(grad_convolve, 0), partial(grad_convolve, 1))
Ejemplo n.º 17
0
class RKHSFun(object):
    def __init__(self, kernel, alphas={}):
        self.alphas = alphas
        self.kernel = kernel
        self.vs = RKHSFunVSpace(self)

    @primitive
    def __call__(self, x):
        return sum([a * self.kernel(x, x_repr)
                    for x_repr, a in self.alphas.items()], 0.0)

    def __add__(self, f):  return self.vs.add(self, f)
    def __mul__(self, a):  return self.vs.scalar_mul(self, a)

# TODO: add vjp of __call__ wrt x (and show it in action)
defvjp(func(RKHSFun.__call__),
       lambda ans, f, x: lambda g: RKHSFun(f.kernel, {x : 1}) * g)

class RKHSFunBox(Box, RKHSFun):
    @property
    def kernel(self): return self._value.kernel
RKHSFunBox.register(RKHSFun)

class RKHSFunVSpace(VSpace):
    def __init__(self, value):
        self.kernel = value.kernel

    def zeros(self): return RKHSFun(self.kernel)
    def randn(self):
        # These arbitrary vectors are not analogous to randn in any meaningful way
        N = npr.randint(1,3)
        return RKHSFun(self.kernel, dict(zip(npr.randn(N), npr.randn(N))))
Ejemplo n.º 18
0
import scipy.stats
import autograd.numpy as anp
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f

pdf = primitive(scipy.stats.norm.pdf)
cdf = primitive(scipy.stats.norm.cdf)
sf = primitive(scipy.stats.norm.sf)
logpdf = primitive(scipy.stats.norm.logpdf)
logcdf = primitive(scipy.stats.norm.logcdf)
logsf = primitive(scipy.stats.norm.logsf)

defvjp(pdf,
       lambda ans, x, loc=0.0, scale=1.0:
       unbroadcast_f(x, lambda g: -g * ans * (x - loc) / scale**2),
       lambda ans, x, loc=0.0, scale=1.0:
       unbroadcast_f(loc, lambda g: g * ans * (x - loc) / scale**2),
       lambda ans, x, loc=0.0, scale=1.0:
       unbroadcast_f(scale, lambda g: g * ans * (((x - loc)/scale)**2 - 1.0)/scale))

defvjp(cdf,
       lambda ans, x, loc=0.0, scale=1.0:
       unbroadcast_f(x, lambda g: g * pdf(x, loc, scale)) ,
       lambda ans, x, loc=0.0, scale=1.0:
       unbroadcast_f(loc, lambda g: -g * pdf(x, loc, scale)),
       lambda ans, x, loc=0.0, scale=1.0:
       unbroadcast_f(scale, lambda g: -g * pdf(x, loc, scale)*(x-loc)/scale))

defvjp(logpdf,
       lambda ans, x, loc=0.0, scale=1.0:
       unbroadcast_f(x, lambda g: -g * (x - loc) / scale**2),
Ejemplo n.º 19
0
from __future__ import absolute_import

import autograd.numpy as np
import scipy.stats
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f

cdf = primitive(scipy.stats.poisson.cdf)
logpmf = primitive(scipy.stats.poisson.logpmf)
pmf = primitive(scipy.stats.poisson.pmf)

def grad_poisson_logpmf(k, mu):
    return np.where(k % 1 == 0, k / mu - 1, 0)

defvjp(cdf, lambda ans, k, mu: unbroadcast_f(mu, lambda g: g * -pmf(np.floor(k), mu)), argnums=[1])
defvjp(logpmf, lambda ans, k, mu: unbroadcast_f(mu, lambda g: g * grad_poisson_logpmf(k, mu)), argnums=[1])
defvjp(pmf, lambda ans, k, mu: unbroadcast_f(mu, lambda g: g * ans * grad_poisson_logpmf(k, mu)), argnums=[1])
Ejemplo n.º 20
0
# Some formulas are from
# "An extended collection of matrix derivative results
#  for forward and reverse mode algorithmic differentiation"
# by Mike Giles
# https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf

# transpose by swapping last two dimensions
def T(x): return anp.swapaxes(x, -1, -2)

_dot = partial(anp.einsum, '...ij,...jk->...ik')

# add two dimensions to the end of x
def add2d(x): return anp.reshape(x, anp.shape(x) + (1, 1))

defvjp(det, lambda ans, x: lambda g: add2d(g) * add2d(ans) * T(inv(x)))
defvjp(slogdet, lambda ans, x: lambda g: add2d(g[1]) * T(inv(x)))

def grad_inv(ans, x):
    return lambda g: -_dot(_dot(T(ans), g), T(ans))
defvjp(inv, grad_inv)

def grad_pinv(ans, x):
    # https://mathoverflow.net/questions/25778/analytical-formula-for-numerical-derivative-of-the-matrix-pseudo-inverse
    return lambda g: T(
        -_dot(_dot(ans, T(g)), ans)
        + _dot(_dot(_dot(ans, T(ans)), g), anp.eye(x.shape[-2]) - _dot(x,ans))
        + _dot(_dot(_dot(anp.eye(ans.shape[-2]) - _dot(ans,x), g), T(ans)), ans)
        )
defvjp(pinv, grad_pinv)
Ejemplo n.º 21
0
Archivo: t.py Proyecto: HIPS/autograd
    return -diff * (1.0 + df) / (diff**2 + df)
def grad_tlogpdf_x(x, df, loc, scale):
    return grad_tlogpdf_diff((x - loc) / scale, df) / scale
def grad_tlogpdf_loc(x, df, loc, scale):
    return -grad_tlogpdf_diff((x - loc) / scale, df) / scale
def grad_tlogpdf_scale(x, df, loc, scale):
    diff = x - loc
    return -(df * (scale**2 - diff**2))/(scale * (df * scale**2 + diff**2))
def grad_tlogpdf_df(x, df, loc, scale):
    y = (x - loc)/scale
    return 0.5 * ((y**2 * (df+1))/(df * (y**2 + df)) - np.log(y**2 / df + 1) - 1.0/df -psi(df/2.0) + psi((df + 1)/2.0))

defvjp(pdf, lambda ans, x, df, loc=0.0, scale=1.0:
       unbroadcast_f(x, lambda g: g * ans * grad_tlogpdf_x(    x, df, loc, scale)),
       lambda ans, x, df, loc=0.0, scale=1.0:
       unbroadcast_f(df, lambda g: g * ans * grad_tlogpdf_df(   x, df, loc, scale)),
       lambda ans, x, df, loc=0.0, scale=1.0:
       unbroadcast_f(loc, lambda g: g * ans * grad_tlogpdf_loc(  x, df, loc, scale)),
       lambda ans, x, df, loc=0.0, scale=1.0:
       unbroadcast_f(scale, lambda g: g * ans * grad_tlogpdf_scale(x, df, loc, scale)))

defvjp(cdf,
       lambda ans, x, df, loc=0.0, scale=1.0:
       unbroadcast_f(x, lambda g:  g * pdf(x, df, loc, scale)),
       lambda ans, x, df, loc=0.0, scale=1.0:
       unbroadcast_f(loc, lambda g: -g * pdf(x, df, loc, scale)), argnums=(0,2))

defvjp(logpdf,
       lambda ans, x, df, loc=0.0, scale=1.0:
       unbroadcast_f(x, lambda g: g * grad_tlogpdf_x(    x, df, loc, scale)),
       lambda ans, x, df, loc=0.0, scale=1.0:
       unbroadcast_f(df, lambda g: g * grad_tlogpdf_df(   x, df, loc, scale)),
Ejemplo n.º 22
0
nograd_functions = [
    anp.floor, anp.ceil, anp.round, anp.rint, anp.around, anp.fix, anp.trunc, anp.all,
    anp.any, anp.argmax, anp.argmin, anp.argpartition, anp.argsort, anp.argwhere, anp.nonzero,
    anp.flatnonzero, anp.count_nonzero, anp.searchsorted, anp.sign, anp.ndim, anp.shape,
    anp.floor_divide, anp.logical_and, anp.logical_or, anp.logical_not, anp.logical_xor,
    anp.isfinite, anp.isinf, anp.isnan, anp.isneginf, anp.isposinf, anp.allclose, anp.isclose,
    anp.array_equal, anp.array_equiv, anp.greater, anp.greater_equal, anp.less, anp.less_equal,
    anp.equal, anp.not_equal, anp.iscomplexobj, anp.iscomplex, anp.size, anp.isscalar,
    anp.isreal, anp.zeros_like, anp.ones_like, anp.result_type]

for fun in nograd_functions:
    register_notrace(VJPNode, fun)

# ----- Functions that are constant w.r.t. continuous inputs -----

defvjp(anp.nan_to_num, lambda ans, x: lambda g: anp.where(anp.isfinite(x), g, 0.))

# ----- Binary ufuncs -----

defvjp(anp.add,         lambda ans, x, y : unbroadcast_f(x, lambda g: g),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: g))
defvjp(anp.multiply,    lambda ans, x, y : unbroadcast_f(x, lambda g: y * g),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: x * g))
defvjp(anp.subtract,    lambda ans, x, y : unbroadcast_f(x, lambda g: g),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: -g))
defvjp(anp.divide,      lambda ans, x, y : unbroadcast_f(x, lambda g:   g / y),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))
defvjp(anp.maximum,     lambda ans, x, y : unbroadcast_f(x, lambda g: g * balanced_eq(x, ans, y)),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: g * balanced_eq(y, ans, x)))
defvjp(anp.minimum,     lambda ans, x, y : unbroadcast_f(x, lambda g: g * balanced_eq(x, ans, y)),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: g * balanced_eq(y, ans, x)))
Ejemplo n.º 23
0
from __future__ import absolute_import

import autograd.numpy as np
import scipy.stats
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f
from autograd.scipy.special import gamma, psi

cdf = primitive(scipy.stats.gamma.cdf)
logpdf = primitive(scipy.stats.gamma.logpdf)
pdf = primitive(scipy.stats.gamma.pdf)

def grad_gamma_logpdf_arg0(x, a):
    return (a - x - 1) / x

def grad_gamma_logpdf_arg1(x, a):
    return np.log(x) - psi(a)

defvjp(cdf, lambda ans, x, a: unbroadcast_f(x, lambda g: g * np.exp(-x) * np.power(x, a-1) / gamma(a)), argnums=[0])
defvjp(logpdf,
       lambda ans, x, a: unbroadcast_f(x, lambda g: g * grad_gamma_logpdf_arg0(x, a)),
       lambda ans, x, a: unbroadcast_f(a, lambda g: g * grad_gamma_logpdf_arg1(x, a)))
defvjp(pdf,
       lambda ans, x, a: unbroadcast_f(x, lambda g: g * ans * grad_gamma_logpdf_arg0(x, a)),
       lambda ans, x, a: unbroadcast_f(a, lambda g: g * ans * grad_gamma_logpdf_arg1(x, a)))
Ejemplo n.º 24
0
Archivo: fft.py Proyecto: HIPS/autograd
from .numpy_wrapper import wrap_namespace
from .numpy_vjps import match_complex
from . import numpy_wrapper as anp
from autograd.extend import primitive, defvjp, vspace

wrap_namespace(ffto.__dict__, globals())

# TODO: make fft gradient work for a repeated axis,
# e.g. by replacing fftn with repeated calls to 1d fft along each axis
def fft_grad(get_args, fft_fun, ans, x, *args, **kwargs):
    axes, s, norm = get_args(x, *args, **kwargs)
    check_no_repeated_axes(axes)
    vs = vspace(x)
    return lambda g: match_complex(x, truncate_pad(fft_fun(g, *args, **kwargs), vs.shape))

defvjp(fft, lambda *args, **kwargs:
        fft_grad(get_fft_args, fft, *args, **kwargs))
defvjp(ifft, lambda *args, **kwargs:
        fft_grad(get_fft_args, ifft, *args, **kwargs))

defvjp(fft2, lambda *args, **kwargs:
        fft_grad(get_fft_args, fft2, *args, **kwargs))
defvjp(ifft2, lambda *args, **kwargs:
        fft_grad(get_fft_args, ifft2, *args, **kwargs))

defvjp(fftn, lambda *args, **kwargs:
        fft_grad(get_fft_args, fftn, *args, **kwargs))
defvjp(ifftn, lambda *args, **kwargs:
        fft_grad(get_fft_args, ifftn, *args, **kwargs))

def rfft_grad(get_args, irfft_fun, ans, x, *args, **kwargs):
    axes, s, norm = get_args(x, *args, **kwargs)
Ejemplo n.º 25
0
# on both the input to the original function (x), and the output of the
# original function (ans).

def logsumexp_vjp(ans, x):
    # If you want to be able to take higher-order derivatives, then all the
    # code inside this function must be itself differentiable by Autograd.
    # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x).
    # Because Autograd uses reverse-mode differentiation, g contains
    # the gradient of the objective w.r.t. ans, the output of logsumexp.
    # This returned VJP function doesn't close over `x`, so Python can
    # garbage-collect `x` if there are no references to it elsewhere.
    x_shape = x.shape
    return lambda g: np.full(x_shape, g) * np.exp(x - np.full(x_shape, ans))

# Now we tell Autograd that logsumexmp has a gradient-making function.
defvjp(logsumexp, logsumexp_vjp)

if __name__ == '__main__':
    # Now we can use logsumexp() inside a larger function that we want
    # to differentiate.
    def example_func(y):
        z = y**2
        lse = logsumexp(z)
        return np.sum(lse)

    grad_of_example = grad(example_func)
    print("Gradient: \n", grad_of_example(npr.randn(10)))

    # Check the gradients numerically, just to be safe.
    check_grads(example_func, modes=['rev'])(npr.randn(10))
Ejemplo n.º 26
0
    This function is required for integration with Autograd.
    """
    # pylint: disable=unused-argument
    def gradient_product(g):
        """Vector Jacobian product operator.

        Args:
            g (array): scalar or vector multiplying the Jacobian
                from the left (output side).

        Returns:
            nested Sequence[float]: vector-Jacobian product, arranged
            into the nested structure of the QNode input arguments.
        """
        # Jacobian matrix of the circuit
        jac = self.jacobian(args, **kwargs)
        if not g.shape:
            temp = g * jac  # numpy treats 0d arrays as scalars, hence @ cannot be used
        else:
            temp = g @ jac

        # restore the nested structure of the input args
        temp = unflatten(temp.flat, args)
        return temp

    return gradient_product


# define the vector-Jacobian product function for QNode.__call__()
ae.defvjp(QNode.evaluate, QNode_vjp, argnums=[1])
Ejemplo n.º 27
0
    """ Compute the expected error of A on W, under the following assumptions:
            1. A is a sensitivity 1 strategy
            2. A supports W
    """
    AtA1 = np.linalg.pinv(np.dot(A.T, A))
    return np.trace(np.dot(AtA1, WtW))


def grad_error(A, WtW):
    AtA1 = np.linalg.pinv(np.dot(A.T, A))
    X = -np.dot(AtA1, np.dot(WtW, AtA1))
    return 2 * np.dot(A, X)


defvjp(mm_error,
       lambda ans, A, WtW: lambda g: g * grad_error(A, WtW),
       argnums=[0])


class CustomTemplate(templates.TemplateStrategy):
    """
    The CustomTemplate strategy is specified by a function mapping parameters theta to 
    a strategy A(theta).  Gradients + Optimization are handled automatically as long
    as the passed function is compatible with autograd.  
    """
    def __init__(self, strategy, theta0, normalize=True, seed=None):
        """
        :param strategy: a function mapping parameters theta to strategies A(theta)
        :param theta0: the initial parameters
        :param normalize: flag to determine if A(theta) should be normalized
            Note: if normalize=False, A(theta) must always have bounded sensitivity for any theta
Ejemplo n.º 28
0
        g_repeated = np.zeros(shape)
        for I, (ist, ind) in enumerate(zip(Xstrides[:-1], Xstrides[1:])):
            for J, (jst, jnd) in enumerate(zip(Ystrides[:-1], Ystrides[1:])):
                if is_square is True:
                    if I < J:
                        g_repeated[ist:ind, jst:jnd] = g_repeated[jst:jnd,
                                                                  ist:ind].T
                        continue
                g_repeated[ist:ind,
                           jst:jnd] = g[I, J] / ((ind - ist) * (jnd - jst))
        return g_repeated

    return vjp


defvjp(average_kernel, grad_average_kernel, None, None)


def symmetrize(p):
    Nsoap, Ncomp, _, nn = p.shape
    p2 = np.empty((Nsoap, Ncomp * (Ncomp + 1) / 2, nn))
    stride = [0] + list(range(Ncomp, 0, -1))
    stride = np.cumsum(stride)
    for i, st, nd in zip(range(Ncomp - 1), stride[:-1], stride[1:]):
        p2[:, st] = p[:, i, i]
        p2[:, st + 1:nd] = p[:, i, (i + 1):Ncomp] * np.sqrt(2.0)
    p2[:, -1] = p[:, Ncomp - 1, Ncomp - 1]
    return p2


def get_unlin_soap(rawsoap, params, global_species):
Ejemplo n.º 29
0
    @primitive
    def __call__(self, x):
        return sum(
            [a * self.kernel(x, x_repr) for x_repr, a in self.alphas.items()],
            0.0)

    def __add__(self, f):
        return self.vs.add(self, f)

    def __mul__(self, a):
        return self.vs.scalar_mul(self, a)


# TODO: add vjp of __call__ wrt x (and show it in action)
defvjp(func(RKHSFun.__call__),
       lambda ans, f, x: lambda g: RKHSFun(f.kernel, {x: 1}) * g)


class RKHSFunBox(Box, RKHSFun):
    @property
    def kernel(self):
        return self._value.kernel


RKHSFunBox.register(RKHSFun)


class RKHSFunVSpace(VSpace):
    def __init__(self, value):
        self.kernel = value.kernel
Ejemplo n.º 30
0
from __future__ import absolute_import
import scipy.special
import autograd.numpy as np
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f

### Beta function ###
beta    = primitive(scipy.special.beta)
betainc = primitive(scipy.special.betainc)
betaln  = primitive(scipy.special.betaln)

defvjp(beta,
       lambda ans, a, b: unbroadcast_f(a, lambda g: g * ans * (psi(a) - psi(a + b))),
       lambda ans, a, b: unbroadcast_f(b, lambda g: g * ans * (psi(b) - psi(a + b))))
defvjp(betainc,
       lambda ans, a, b, x: unbroadcast_f(x, lambda g: g * np.power(x, a - 1) * np.power(1 - x, b - 1) / beta(a, b)),
       argnums=[2])
defvjp(betaln,
       lambda ans, a, b: unbroadcast_f(a, lambda g: g * (psi(a) - psi(a + b))),
       lambda ans, a, b: unbroadcast_f(b, lambda g: g * (psi(b) - psi(a + b))))

### Gamma functions ###
polygamma    = primitive(scipy.special.polygamma)
psi          = primitive(scipy.special.psi)        # psi(x) is just polygamma(0, x)
digamma      = primitive(scipy.special.digamma)    # digamma is another name for psi.
gamma        = primitive(scipy.special.gamma)
gammaln      = primitive(scipy.special.gammaln)
gammainc     = primitive(scipy.special.gammainc)
gammaincc    = primitive(scipy.special.gammaincc)
gammasgn     = primitive(scipy.special.gammasgn)
rgamma       = primitive(scipy.special.rgamma)
Ejemplo n.º 31
0
    Returns
    -------
    np.ndarray
        shape=(2,2)

    """
    # print('mmbbvv, pu2', pu2r(*tlist) +1j* pu2r(*tlist))
    return pu2r(*tlist) + 1j * pu2i(*tlist)


defvjp(
    pu2r,
    # defines vector-jacobian-product of pu2r
    # g.shape == pu2r.shape
    lambda ans, *tlist: lambda g: np.sum(g * np.real(d_u2(0, *tlist))),
    lambda ans, *tlist: lambda g: np.sum(g * np.real(d_u2(1, *tlist))),
    lambda ans, *tlist: lambda g: np.sum(g * np.real(d_u2(2, *tlist))),
    lambda ans, *tlist: lambda g: np.sum(g * np.real(d_u2(3, *tlist))),
    argnums=range(4))

defvjp(
    pu2i,
    # defines vector-jacobian-product of pu2i
    # g.shape == pu2i.shape
    lambda ans, *tlist: lambda g: np.sum(g * np.imag(d_u2(0, *tlist))),
    lambda ans, *tlist: lambda g: np.sum(g * np.imag(d_u2(1, *tlist))),
    lambda ans, *tlist: lambda g: np.sum(g * np.imag(d_u2(2, *tlist))),
    lambda ans, *tlist: lambda g: np.sum(g * np.imag(d_u2(3, *tlist))),
    argnums=range(4))
Ejemplo n.º 32
0
from __future__ import absolute_import, division

import autograd.numpy as np
import scipy.stats
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f
from autograd.scipy.special import gamma

cdf = primitive(scipy.stats.chi2.cdf)
logpdf = primitive(scipy.stats.chi2.logpdf)
pdf = primitive(scipy.stats.chi2.pdf)

def grad_chi2_logpdf(x, df):
    return np.where(df % 1 == 0, (df - x - 2) / (2 * x), 0)

defvjp(cdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * np.power(2., -df/2) * np.exp(-x/2) * np.power(x, df/2 - 1) / gamma(df/2)), argnums=[0])
defvjp(logpdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * grad_chi2_logpdf(x, df)), argnums=[0])
defvjp(pdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * ans * grad_chi2_logpdf(x, df)), argnums=[0])
Ejemplo n.º 33
0
from __future__ import absolute_import
import scipy.special
import autograd.numpy as np
from autograd.extend import primitive, defvjp, defjvp
from autograd.numpy.numpy_vjps import unbroadcast_f, repeat_to_match_shape

### Beta function ###
beta    = primitive(scipy.special.beta)
betainc = primitive(scipy.special.betainc)
betaln  = primitive(scipy.special.betaln)

defvjp(beta,
       lambda ans, a, b: unbroadcast_f(a, lambda g: g * ans * (psi(a) - psi(a + b))),
       lambda ans, a, b: unbroadcast_f(b, lambda g: g * ans * (psi(b) - psi(a + b))))
defvjp(betainc,
       lambda ans, a, b, x: unbroadcast_f(x, lambda g: g * np.power(x, a - 1) * np.power(1 - x, b - 1) / beta(a, b)),
       argnums=[2])
defvjp(betaln,
       lambda ans, a, b: unbroadcast_f(a, lambda g: g * (psi(a) - psi(a + b))),
       lambda ans, a, b: unbroadcast_f(b, lambda g: g * (psi(b) - psi(a + b))))

### Gamma functions ###
polygamma    = primitive(scipy.special.polygamma)
psi          = primitive(scipy.special.psi)        # psi(x) is just polygamma(0, x)
digamma      = primitive(scipy.special.digamma)    # digamma is another name for psi.
gamma        = primitive(scipy.special.gamma)
gammaln      = primitive(scipy.special.gammaln)
gammainc     = primitive(scipy.special.gammainc)
gammaincc    = primitive(scipy.special.gammaincc)
gammasgn     = primitive(scipy.special.gammasgn)
rgamma       = primitive(scipy.special.rgamma)
Ejemplo n.º 34
0
        raise NotImplementedError("The multivariate normal pdf is not "
                "differentiable w.r.t. a singular covariance matix")
    J = np.linalg.inv(cov)
    solved = np.matmul(J, np.expand_dims(x - mean, -1))
    return 1./2 * (generalized_outer_product(solved) - J)

def solve(allow_singular):
    if allow_singular:
        return lambda A, x: np.dot(np.linalg.pinv(A), x)
    else:
        return np.linalg.solve

defvjp(logpdf,
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(x, lambda g: -np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(mean, lambda g:  np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(cov, lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)))

# Same as log pdf, but multiplied by the pdf (ans).
defvjp(pdf,
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(x, lambda g: -np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(mean, lambda g:  np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T),
       lambda ans, x, mean, cov, allow_singular=False:
       unbroadcast_f(cov, lambda g: -np.reshape(ans * g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)))

defvjp(entropy, None,
       lambda ans, mean, cov:
Ejemplo n.º 35
0
    T, K = ll.shape
    
    # Forward pass to get alphas
    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    grad_hmm_normalizer(log_Ps, alphas, dlog_pi0, dlog_Ps, dll)
    
    if argnum == 0:
        return lambda g: g * dlog_pi0
    if argnum == 1:
        return lambda g: g * dlog_Ps
    if argnum == 2:
        return lambda g: g * dll

defvjp(hmm_normalizer, 
       partial(_make_grad_hmm_normalizer, 0),
       partial(_make_grad_hmm_normalizer, 1),
       partial(_make_grad_hmm_normalizer, 2))


def hmm_expected_states(log_pi0, log_Ps, ll):
    T, K = ll.shape

    # Make sure everything is C contiguous
    log_pi0 = to_c(log_pi0)
    log_Ps = to_c(log_Ps)
    ll = to_c(ll)

    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    normalizer = logsumexp(alphas[-1])
Ejemplo n.º 36
0
from __future__ import division
import scipy.linalg

import autograd.numpy as anp
from autograd.numpy.numpy_wrapper import wrap_namespace
from autograd.extend import defvjp, defvjp_argnums, defjvp, defjvp_argnums

wrap_namespace(scipy.linalg.__dict__, globals())  # populates module namespace

def _vjp_sqrtm(ans, A, disp=True, blocksize=64):
    assert disp, "sqrtm vjp not implemented for disp=False"
    ans_transp = anp.transpose(ans)
    def vjp(g):
        return anp.real(solve_sylvester(ans_transp, ans_transp, g))
    return vjp
defvjp(sqrtm, _vjp_sqrtm)

def _flip(a, trans):
    if anp.iscomplexobj(a):
        return 'H' if trans in ('N', 0) else 'N'
    else:
        return 'T' if trans in ('N', 0) else 'N'

def grad_solve_triangular(ans, a, b, trans=0, lower=False, **kwargs):
    tri = anp.tril if (lower ^ (_flip(a, trans) == 'N')) else anp.triu
    transpose = lambda x: x if _flip(a, trans) != 'N' else x.T
    al2d = lambda x: x if x.ndim > 1 else x[...,None]
    def vjp(g):
        v = al2d(solve_triangular(a, g, trans=_flip(a, trans), lower=lower))
        return -transpose(tri(anp.dot(v, al2d(ans).T)))
    return vjp
Ejemplo n.º 37
0
        raise NotImplementedError(
            "Can't take grad of convolve w.r.t. arg {0}".format(argnum))

    if mode == 'full':
        new_mode = 'valid'
    else:
        if any([
                x_size > y_size for x_size, y_size in zip(
                    shapes[_X_]['conv'], shapes[_Y_]['conv'])
        ]):
            new_mode = 'full'
        else:
            new_mode = 'valid'

    def vjp(g):
        result = convolve(
            g,
            Y[tuple(_autograd_signal.flipped_idxs(Y.ndim, axes[_Y_]['conv']))],
            axes=[axes['out']['conv'], axes[_Y_]['conv']],
            dot_axes=[axes['out'][ignore_Y], axes[_Y_]['ignore']],
            mode=new_mode)
        new_order = npo.argsort(axes[_X_]['ignore'] + axes[_X_]['dot'] +
                                axes[_X_]['conv'])
        return np.transpose(result, new_order)

    return vjp


defvjp(_torch_convolve, partial(_torch_grad_convolve, 0),
       partial(_torch_grad_convolve, 1))
Ejemplo n.º 38
0
def check_probs_matrix(x):
    x = truncate0(x)
    rowsums = np.sum(x, axis=1)
    assert np.allclose(rowsums, 1.0)
    return np.einsum('ij,i->ij', x, 1.0 / rowsums)


@primitive
def set0(x, indices):
    y = np.array(x)
    y[indices] = 0
    return y


defvjp(set0, lambda ans, x, indices: lambda g: set0(g, indices))
#set0.defgrad(lambda ans, x, indices: lambda g: set0(g, indices))
#set0.defvjp(lambda g, ans, vs, gvs, x, indices: set0(g, indices))


def closeleq(x, y):
    return np.logical_or(np.isclose(x, y), x <= y)


def closegeq(x, y):
    return np.logical_or(np.isclose(x, y), x >= y)


@primitive
def make_constant(x):
    return x
Ejemplo n.º 39
0
nograd_functions = [
    anp.floor, anp.ceil, anp.round, anp.rint, anp.around, anp.fix, anp.trunc, anp.all,
    anp.any, anp.argmax, anp.argmin, anp.argpartition, anp.argsort, anp.argwhere, anp.nonzero,
    anp.flatnonzero, anp.count_nonzero, anp.searchsorted, anp.sign, anp.ndim, anp.shape,
    anp.floor_divide, anp.logical_and, anp.logical_or, anp.logical_not, anp.logical_xor,
    anp.isfinite, anp.isinf, anp.isnan, anp.isneginf, anp.isposinf, anp.allclose, anp.isclose,
    anp.array_equal, anp.array_equiv, anp.greater, anp.greater_equal, anp.less, anp.less_equal,
    anp.equal, anp.not_equal, anp.iscomplexobj, anp.iscomplex, anp.size, anp.isscalar,
    anp.isreal, anp.zeros_like, anp.ones_like, anp.result_type]

for fun in nograd_functions:
    register_notrace(VJPNode, fun)

# ----- Functions that are constant w.r.t. continuous inputs -----

defvjp(anp.nan_to_num, lambda ans, x: lambda g: anp.where(anp.isfinite(x), g, 0.))

# ----- Binary ufuncs -----

defvjp(anp.add,         lambda ans, x, y : unbroadcast_f(x, lambda g: g),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: g))
defvjp(anp.multiply,    lambda ans, x, y : unbroadcast_f(x, lambda g: y * g),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: x * g))
defvjp(anp.subtract,    lambda ans, x, y : unbroadcast_f(x, lambda g: g),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: -g))
defvjp(anp.divide,      lambda ans, x, y : unbroadcast_f(x, lambda g:   g / y),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))
defvjp(anp.maximum,     lambda ans, x, y : unbroadcast_f(x, lambda g: g * balanced_eq(x, ans, y)),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: g * balanced_eq(y, ans, x)))
defvjp(anp.minimum,     lambda ans, x, y : unbroadcast_f(x, lambda g: g * balanced_eq(x, ans, y)),
                        lambda ans, x, y : unbroadcast_f(y, lambda g: g * balanced_eq(y, ans, x)))
Ejemplo n.º 40
0
from autograd.misc.optimizers import adam, sgd
from matplotlib import pyplot as plt

from scipy.optimize import basinhopping


@primitive
def relu(x):
    return x * (x > 0)


def relu_vjp(ans, x):
    return lambda x: (x > 0).astype(float)


defvjp(relu, relu_vjp)


def init_random_params(scale, layer_sizes, rs=npr.RandomState()):
    """Build a list of (weights, biases) tuples,
       one for each layer in the net."""
    return [
        [
            scale * rs.randn(m, n),  # weight matrix
            scale * rs.randn(n)
        ]  # bias vector
        for m, n in zip(layer_sizes[:-1], layer_sizes[1:])
    ]


def init_ones_params(scale, layer_sizes, rs=npr.RandomState()):
Ejemplo n.º 41
0
    T, K = ll.shape
    
    # Forward pass to get alphas
    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    grad_hmm_normalizer(log_Ps, alphas, dlog_pi0, dlog_Ps, dll)
    
    if argnum == 0:
        return lambda g: g * dlog_pi0
    if argnum == 1:
        return lambda g: g * dlog_Ps
    if argnum == 2:
        return lambda g: g * dll

defvjp(hmm_normalizer, 
       partial(_make_grad_hmm_normalizer, 0),
       partial(_make_grad_hmm_normalizer, 1),
       partial(_make_grad_hmm_normalizer, 2))


def hmm_expected_states(log_pi0, log_Ps, ll):
    T, K = ll.shape

    # Make sure everything is C contiguous
    to_c = lambda arr: np.copy(arr, 'C') if not arr.flags['C_CONTIGUOUS'] else arr
    log_pi0 = to_c(getval(log_pi0))
    log_Ps = to_c(getval(log_Ps))
    ll = to_c(getval(ll))

    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    normalizer = logsumexp(alphas[-1])
Ejemplo n.º 42
0

def logsumexp_vjp(ans, x):
    # If you want to be able to take higher-order derivatives, then all the
    # code inside this function must be itself differentiable by Autograd.
    # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x).
    # Because Autograd uses reverse-mode differentiation, g contains
    # the gradient of the objective w.r.t. ans, the output of logsumexp.
    # This returned VJP function doesn't close over `x`, so Python can
    # garbage-collect `x` if there are no references to it elsewhere.
    x_shape = x.shape
    return lambda g: np.full(x_shape, g) * np.exp(x - np.full(x_shape, ans))


# Now we tell Autograd that logsumexmp has a gradient-making function.
defvjp(logsumexp, logsumexp_vjp)

if __name__ == '__main__':
    # Now we can use logsumexp() inside a larger function that we want
    # to differentiate.
    def example_func(y):
        z = y**2
        lse = logsumexp(z)
        return np.sum(lse)

    grad_of_example = grad(example_func)
    print("Gradient: \n", grad_of_example(npr.randn(10)))

    # Check the gradients numerically, just to be safe.
    check_grads(example_func, modes=['rev'])(npr.randn(10))
Ejemplo n.º 43
0
# batched diagonal, similar to matrix_diag in tensorflow
def _matrix_diag(a):
    reps = anp.array(a.shape)
    reps[:-1] = 1
    reps[-1] = a.shape[-1]
    newshape = list(a.shape) + [a.shape[-1]]
    return _diag(anp.tile(a, reps).reshape(newshape))


# add two dimensions to the end of x
def add2d(x):
    return anp.reshape(x, anp.shape(x) + (1, 1))


defvjp(det, lambda ans, x: lambda g: add2d(g) * add2d(ans) * T(inv(x)))
defvjp(slogdet, lambda ans, x: lambda g: add2d(g[1]) * T(inv(x)))


def grad_inv(ans, x):
    return lambda g: -_dot(_dot(T(ans), g), T(ans))


defvjp(inv, grad_inv)


def grad_pinv(ans, x):
    # https://mathoverflow.net/questions/25778/analytical-formula-for-numerical-derivative-of-the-matrix-pseudo-inverse
    return lambda g: T(-_dot(_dot(ans, T(g)), ans) + _dot(
        _dot(_dot(ans, T(ans)), g),
        anp.eye(x.shape[-2]) - _dot(x, ans)) + _dot(
Ejemplo n.º 44
0
from autograd.extend import primitive, defvjp, vspace
from autograd.builtins import tuple
from autograd import make_vjp

@primitive
def fixed_point(f, a, x0, distance, tol):
    _f = f(a)
    x, x_prev = _f(x0), x0
    while distance(x, x_prev) > tol:
        x, x_prev = _f(x), x
    return x

def fixed_point_vjp(ans, f, a, x0, distance, tol):
    def rev_iter(params):
        a, x_star, x_star_bar = params
        vjp_x, _ = make_vjp(f(a))(x_star)
        vs = vspace(x_star)
        return lambda g: vs.add(vjp_x(g), x_star_bar)
    vjp_a, _ = make_vjp(lambda x, y: f(x)(y))(a, ans)
    return lambda g: vjp_a(fixed_point(rev_iter, tuple((a, ans, g)),
                           vspace(x0).zeros(), distance, tol))

defvjp(fixed_point, None, fixed_point_vjp, None)
Ejemplo n.º 45
0
        out = ag_np.zeros_like(x)
        out[mask0] = ag_np.log1p(ag_np.exp(x[mask0]))
        out[mask1] = x[mask1] + ag_np.log1p(ag_np.exp(-x[mask1]))
        return out
    if x > 0:
        return x + ag_np.log1p(ag_np.exp(-x))
    else:
        return ag_np.log1p(ag_np.exp(x))

def make_grad__to_common_arr(ans, x):
    x = ag_np.asarray(x)
    def gradient_product(g):
        return ag_np.full(x.shape, g) * ag_np.exp(x - ans)
    return gradient_product

defvjp(to_common_arr, make_grad__to_common_arr)


@primitive
def to_unconstrained_arr(p):
    """ Numerically stable transform from positive reals to real line

    Implements ag_np.log(ag_np.exp(x) - 1.0)

    Autograd friendly and fully vectorized

    Args
    ----
    p : array of values in (0, +\infty)

    Returns
Ejemplo n.º 46
0
from __future__ import absolute_import
import scipy.stats

import autograd.numpy as np
from autograd.scipy.special import digamma
from autograd.extend import primitive, defvjp

rvs    = primitive(scipy.stats.dirichlet.rvs)
pdf    = primitive(scipy.stats.dirichlet.pdf)
logpdf = primitive(scipy.stats.dirichlet.logpdf)

defvjp(logpdf,lambda ans, x, alpha: lambda g:
              g * (alpha - 1) / x,
              lambda ans, x, alpha: lambda g:
              g * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)))

# Same as log pdf, but multiplied by the pdf (ans).
defvjp(pdf,lambda ans, x, alpha: lambda g:
           g * ans * (alpha - 1) / x,
           lambda ans, x, alpha: lambda g:
           g * ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)))