Ejemplo n.º 1
0
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs,  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs,  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(vs, gvs, -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defvjp_is_zero(argnums=(0,))
entropy.defvjp(lambda g, ans, vs, gvs, mean, cov: unbroadcast(vs, gvs, 0.5 * g * np.linalg.inv(cov).T), argnum=1)
Ejemplo n.º 2
0
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError


def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))


logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(
    ans, x, lambda g: -np.expand_dims(g, 1) * np.linalg.solve(
        cov, (x - mean).T).T),
               argnum=0)
logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(
    ans, mean, lambda g: np.expand_dims(g, 1) * np.linalg.solve(
        cov, (x - mean).T).T),
               argnum=1)
logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(
    ans, cov, lambda g: -np.reshape(g,
                                    np.shape(g) +
                                    (1, 1)) * covgrad(x, mean, cov)),
               argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(
    ans, x, lambda g: -g * ans * np.linalg.solve(cov, x - mean)),
Ejemplo n.º 3
0
"""Gradients of the normal distribution."""

from __future__ import absolute_import
import scipy.stats
import autograd.numpy as anp

from autograd.core import primitive
from autograd.numpy.numpy_grads import unbroadcast

pdf = primitive(scipy.stats.norm.pdf)
cdf = primitive(scipy.stats.norm.cdf)
logpdf = primitive(scipy.stats.norm.logpdf)
logcdf = primitive(scipy.stats.norm.logcdf)

pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: -g * ans * (x - loc) / scale**2))
pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g:  g * ans * (x - loc) / scale**2), argnum=1)
pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g:  g * ans * (((x - loc)/scale)**2 - 1.0)/scale), argnum=2)

cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * pdf(x, loc, scale)))
cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * pdf(x, loc, scale)), argnum=1)
cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * pdf(x, loc, scale)*(x-loc)/scale), argnum=2)

logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: -g * (x - loc) / scale**2))
logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g:  g * (x - loc) / scale**2), argnum=1)
logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g:  g * (-1.0/scale + (x - loc)**2/scale**3)), argnum=2)

logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))))
logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))), argnum=1)
logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))*(x-loc)/scale), argnum=2)
Ejemplo n.º 4
0
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError

def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, x,    lambda g: -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0)
logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, mean, lambda g:  np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1)
logpdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, cov,  lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, x,    lambda g: -g * ans * np.linalg.solve(cov, x - mean)), argnum=0)
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, mean, lambda g:  g * ans * np.linalg.solve(cov, x - mean)), argnum=1)
pdf.defgrad(lambda ans, x, mean=None, cov=1, allow_singular=False: unbroadcast(ans, cov,  lambda g: -g * ans * covgrad(x, mean, cov)),          argnum=2)

entropy.defgrad_is_zero(argnums=(0,))
entropy.defgrad(lambda ans, mean, cov: unbroadcast(ans, cov, lambda g:  0.5 * g * np.linalg.inv(cov).T), argnum=1)
Ejemplo n.º 5
0
def grad_tlogpdf_scale(x, df, loc, scale):
    diff = x - loc
    return -(df * (scale**2 - diff**2)) / (scale * (df * scale**2 + diff**2))


def grad_tlogpdf_df(x, df, loc, scale):
    y = (x - loc) / scale
    return 0.5 * (
        (y**2 * (df + 1)) /
        (df *
         (y**2 + df)) - np.log(y**2 / df + 1) - 1.0 / df - psi(df / 2.0) + psi(
             (df + 1) / 2.0))


pdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(
    vs, gvs, g * ans * grad_tlogpdf_x(x, df, loc, scale)),
           argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(
    vs, gvs, g * ans * grad_tlogpdf_df(x, df, loc, scale)),
           argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(
    vs, gvs, g * ans * grad_tlogpdf_loc(x, df, loc, scale)),
           argnum=2)
pdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(
    vs, gvs, g * ans * grad_tlogpdf_scale(x, df, loc, scale)),
           argnum=3)

cdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(
    vs, gvs, g * pdf(x, df, loc, scale)),
           argnum=0)
cdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(
Ejemplo n.º 6
0
logcdf = primitive(scipy.stats.t.logcdf)

def grad_tlogpdf_diff(diff, df):
    return -diff * (1.0 + df) / (diff**2 + df)
def grad_tlogpdf_x(x, df, loc, scale):
    return grad_tlogpdf_diff((x - loc) / scale, df) / scale
def grad_tlogpdf_loc(x, df, loc, scale):
    return -grad_tlogpdf_diff((x - loc) / scale, df) / scale
def grad_tlogpdf_scale(x, df, loc, scale):
    diff = x - loc
    return -(df * (scale**2 - diff**2))/(scale * (df * scale**2 + diff**2))
def grad_tlogpdf_df(x, df, loc, scale):
    y = (x - loc)/scale
    return 0.5 * ((y**2 * (df+1))/(df * (y**2 + df)) - np.log(y**2 / df + 1) - 1.0/df -psi(df/2.0) + psi((df + 1)/2.0))

pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: g * ans * grad_tlogpdf_x(    x, df, loc, scale)), argnum=0)
pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, df,    lambda g: g * ans * grad_tlogpdf_df(   x, df, loc, scale)), argnum=1)
pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: g * ans * grad_tlogpdf_loc(  x, df, loc, scale)), argnum=2)
pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * ans * grad_tlogpdf_scale(x, df, loc, scale)), argnum=3)

cdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * pdf(x, df, loc, scale)), argnum=0)
cdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * pdf(x, df, loc, scale)), argnum=2)
# What is the gradient of the cdf wrt the degrees of freedom or scale?  No one knows.

logpdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g: g * grad_tlogpdf_x(    x, df, loc, scale)), argnum=0)
logpdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, df,    lambda g: g * grad_tlogpdf_df(   x, df, loc, scale)), argnum=1)
logpdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: g * grad_tlogpdf_loc(  x, df, loc, scale)), argnum=2)
logpdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * grad_tlogpdf_scale(x, df, loc, scale)), argnum=3)

logcdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, x,     lambda g:  g * np.exp(logpdf(x, df, loc, scale) - logcdf(x, df, loc, scale))), argnum=0)
logcdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, loc,   lambda g: -g * np.exp(logpdf(x, df, loc, scale) - logcdf(x, df, loc, scale))), argnum=2)
Ejemplo n.º 7
0
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError

def covgrad(x, mean, cov, allow_singular=False):
    if allow_singular:
        raise NotImplementedError("The multivariate normal pdf is not "
                "differentiable w.r.t. a singular covariance matix")
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))

def solve(allow_singular):
    if allow_singular:
        return lambda A, x: np.dot(np.linalg.pinv(A), x)
    else:
        return np.linalg.solve

logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs,  np.expand_dims(g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)), argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs,  np.expand_dims(ans * g, 1) * solve(allow_singular)(cov, (x - mean).T).T), argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov, allow_singular=False: unbroadcast(vs, gvs, -np.reshape(ans * g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov, allow_singular)), argnum=2)

entropy.defvjp_is_zero(argnums=(0,))
entropy.defvjp(lambda g, ans, vs, gvs, mean, cov: unbroadcast(vs, gvs, 0.5 * g * np.linalg.inv(cov).T), argnum=1)
Ejemplo n.º 8
0
def generalized_outer_product(mat):
    if len(mat.shape) == 1:
        return np.outer(mat, mat)
    elif len(mat.shape) == 2:
        return np.einsum('ij,ik->ijk', mat, mat)
    else:
        raise ArithmeticError


def covgrad(x, mean, cov):
    # I think once we have Cholesky we can make this nicer.
    solved = np.linalg.solve(cov, (x - mean).T).T
    return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved))


logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(
    vs, gvs, -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T),
              argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(
    vs, gvs,
    np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T),
              argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(
    vs, gvs, -np.reshape(g,
                         np.shape(g) + (1, 1)) * covgrad(x, mean, cov)),
              argnum=2)

# Same as log pdf, but multiplied by the pdf (ans).
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(
    vs, gvs, -g * ans * np.linalg.solve(cov, x - mean)),
           argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, mean, cov: unbroadcast(
Ejemplo n.º 9
0
"""Gradients of the normal distribution."""

from __future__ import absolute_import
import scipy.stats
import autograd.numpy as anp

from autograd.core import primitive
from autograd.numpy.numpy_grads import unbroadcast

pdf = primitive(scipy.stats.norm.pdf)
cdf = primitive(scipy.stats.norm.cdf)
logpdf = primitive(scipy.stats.norm.logpdf)
logcdf = primitive(scipy.stats.norm.logcdf)

pdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs, -g * ans * (x - loc) / scale**2))
pdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs,  g * ans * (x - loc) / scale**2), argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs,  g * ans * (((x - loc)/scale)**2 - 1.0)/scale), argnum=2)

cdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs,  g * pdf(x, loc, scale)))
cdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs, -g * pdf(x, loc, scale)), argnum=1)
cdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs, -g * pdf(x, loc, scale)*(x-loc)/scale), argnum=2)

logpdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs, -g * (x - loc) / scale**2))
logpdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs,  g * (x - loc) / scale**2), argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs,  g * (-1.0/scale + (x - loc)**2/scale**3)), argnum=2)

logcdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs,  g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))))
logcdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs, -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))), argnum=1)
logcdf.defvjp(lambda g, ans, vs, gvs, x, loc=0.0, scale=1.0: unbroadcast(vs, gvs, -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))*(x-loc)/scale), argnum=2)
Ejemplo n.º 10
0
logcdf = primitive(scipy.stats.t.logcdf)

def grad_tlogpdf_diff(diff, df):
    return -diff * (1.0 + df) / (diff**2 + df)
def grad_tlogpdf_x(x, df, loc, scale):
    return grad_tlogpdf_diff((x - loc) / scale, df) / scale
def grad_tlogpdf_loc(x, df, loc, scale):
    return -grad_tlogpdf_diff((x - loc) / scale, df) / scale
def grad_tlogpdf_scale(x, df, loc, scale):
    diff = x - loc
    return -(df * (scale**2 - diff**2))/(scale * (df * scale**2 + diff**2))
def grad_tlogpdf_df(x, df, loc, scale):
    y = (x - loc)/scale
    return 0.5 * ((y**2 * (df+1))/(df * (y**2 + df)) - np.log(y**2 / df + 1) - 1.0/df -psi(df/2.0) + psi((df + 1)/2.0))

pdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, g * ans * grad_tlogpdf_x(    x, df, loc, scale)), argnum=0)
pdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, g * ans * grad_tlogpdf_df(   x, df, loc, scale)), argnum=1)
pdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, g * ans * grad_tlogpdf_loc(  x, df, loc, scale)), argnum=2)
pdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, g * ans * grad_tlogpdf_scale(x, df, loc, scale)), argnum=3)

cdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs,  g * pdf(x, df, loc, scale)), argnum=0)
cdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, -g * pdf(x, df, loc, scale)), argnum=2)
# What is the gradient of the cdf wrt the degrees of freedom or scale?  No one knows.

logpdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, g * grad_tlogpdf_x(    x, df, loc, scale)), argnum=0)
logpdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, g * grad_tlogpdf_df(   x, df, loc, scale)), argnum=1)
logpdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, g * grad_tlogpdf_loc(  x, df, loc, scale)), argnum=2)
logpdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, g * grad_tlogpdf_scale(x, df, loc, scale)), argnum=3)

logcdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs,  g * np.exp(logpdf(x, df, loc, scale) - logcdf(x, df, loc, scale))), argnum=0)
logcdf.defvjp(lambda g, ans, vs, gvs, x, df, loc=0.0, scale=1.0: unbroadcast(vs, gvs, -g * np.exp(logpdf(x, df, loc, scale) - logcdf(x, df, loc, scale))), argnum=2)