def hessian_vector_product(fun, argnum=0):
    """Builds a function that returns the exact Hessian-vector product.
    The returned function has arguments (*args, vector, **kwargs), and takes
    roughly 4x as long to evaluate as the original function."""
    fun_grad = grad(fun, argnum)
    def vector_dot_grad(*args, **kwargs):
        args, vector = args[:-1], args[-1]
        return np.dot(vector, fun_grad(*args, **kwargs))
    return grad(vector_dot_grad, argnum)  # Grad wrt original input.
Example #2
0
def hessian_vector_product(fun, argnum=0):
    """Builds a function that returns the exact Hessian-vector product.
    The returned function has arguments (*args, vector, **kwargs), and takes
    roughly 4x as long to evaluate as the original function."""
    fun_grad = grad(fun, argnum)
    def vector_dot_grad(*args, **kwargs):
        args, vector = args[:-1], args[-1]
        return np.dot(vector, fun_grad(*args, **kwargs))
    return grad(vector_dot_grad, argnum)  # Grad wrt original input.
Example #3
0
def quick_grad_check(fun,
                     arg0,
                     extra_args=(),
                     kwargs={},
                     verbose=True,
                     eps=EPS,
                     rtol=RTOL,
                     atol=ATOL,
                     rs=None):
    """Checks the gradient of a function (w.r.t. to its first arg) in a random direction"""

    if verbose:
        print("Checking gradient of {0} at {1}".format(fun, arg0))

    if rs is None:
        rs = np.random.RandomState()

    random_dir = rs.standard_normal(np.shape(arg0))
    random_dir = random_dir / np.sqrt(np.sum(random_dir * random_dir))
    unary_fun = lambda x: fun(arg0 + x * random_dir, *extra_args, **kwargs)
    numeric_grad = unary_nd(unary_fun, 0.0, eps=eps)

    analytic_grad = np.sum(grad(fun)(arg0, *extra_args, **kwargs) * random_dir)

    assert np.allclose(numeric_grad, analytic_grad, rtol=rtol, atol=atol), \
        "Check failed! nd={0}, ad={1}".format(numeric_grad, analytic_grad)

    if verbose:
        print("Gradient projection OK (numeric grad: {0}, analytic grad: {1})".
              format(numeric_grad, analytic_grad))
Example #4
0
def check_grads(fun, *args):
    if not args:
        raise Exception("No args given")
    exact = tuple([grad(fun, i)(*args) for i in range(len(args))])
    numeric = nd(fun, *args)

    check_equivalent(exact, numeric)
 def grad_and_aux_fun(*args, **kwargs):
     saved = lambda: None
     def return_val_save_aux(*args, **kwargs):
         val, saved.aux = fun(*args, **kwargs)
         return val
     gradval = grad(return_val_save_aux, argnum)(*args, **kwargs)
     return gradval, saved.aux
def elementwise_grad(fun, argnum=0):
    """Like `jacobian`, but produces a function which computes just the diagonal
    of the Jacobian, and does the computation in one pass rather than in a loop.
    Note: this is only valid if the Jacobian is diagonal. Only arrays are
    currently supported. Can be used for broadcasting."""
    def sum_output(*args, **kwargs):
        return np.sum(fun(*args, **kwargs))
    return grad(sum_output, argnum=argnum)
 def grad_and_aux_fun(*args, **kwargs):
     saved_aux = []
     def return_val_save_aux(*args, **kwargs):
         val, aux = fun(*args, **kwargs)
         saved_aux.append(aux)
         return val
     gradval = grad(return_val_save_aux, argnum)(*args, **kwargs)
     return gradval, saved_aux[0]
Example #8
0
 def grad_and_aux_fun(*args, **kwargs):
     saved_aux = []
     def return_val_save_aux(*args, **kwargs):
         val, aux = fun(*args, **kwargs)
         saved_aux.append(aux)
         return val
     gradval = grad(return_val_save_aux, argnum)(*args, **kwargs)
     return gradval, saved_aux[0]
Example #9
0
def elementwise_grad(fun, argnum=0):
    """Like `jacobian`, but produces a function which computes just the diagonal
    of the Jacobian, and does the computation in one pass rather than in a loop.
    Note: this is only valid if the Jacobian is diagonal. Only arrays are
    currently supported."""
    def sum_output(*args, **kwargs):
        return np.sum(fun(*args, **kwargs))
    return grad(sum_output, argnum=argnum)
Example #10
0
def _hessian_vector_product(fun, argnum=0):
    """Builds a function that returns the exact Hessian-vector product.
    The returned function has arguments (*args, vector, **kwargs). Note,
    this function will be incorporated into autograd, with name
    hessian_vector_product. Once it has been this function can be
    deleted."""
    fun_grad = grad(fun, argnum)

    def vector_dot_grad(*args, **kwargs):
        args, vector = args[:-1], args[-1]
        try:
            return np.tensordot(fun_grad(*args, **kwargs), vector,
                                axes=vector.ndim)
        except AttributeError:
            # Assume we are on the product manifold.
            return np.sum([np.tensordot(fun_grad(*args, **kwargs)[k],
                                        vector[k], axes=vector[k].ndim)
                           for k in range(len(vector))])
    # Grad wrt original input.
    return grad(vector_dot_grad, argnum)
Example #11
0
 def jac_fun(*args, **kwargs):
     arg_in = args[argnum]
     output = fun(*args, **kwargs)
     assert isinstance(getval(arg_in), np.ndarray), "Must have array input"
     assert isinstance(getval(output), np.ndarray), "Must have array output"
     jac = np.zeros(output.shape + arg_in.shape)
     input_slice = (slice(None),) * len(arg_in.shape)
     for idxs in it.product(*map(range, output.shape)):
         scalar_fun = lambda *args, **kwargs : fun(*args, **kwargs)[idxs]
         jac[idxs + input_slice] = grad(scalar_fun, argnum=argnum)(*args, **kwargs)
     return jac
Example #12
0
 def jac_fun(*args, **kwargs):
     arg_in = args[argnum]
     output = fun(*args, **kwargs)
     assert isinstance(getval(arg_in), np.ndarray), "Must have array input"
     assert isinstance(getval(output), np.ndarray), "Must have array output"
     jac = np.zeros(output.shape + arg_in.shape)
     input_slice = (slice(None),) * len(arg_in.shape)
     for idxs in it.product(*list(map(range, output.shape))):
         scalar_fun = lambda *args, **kwargs : fun(*args, **kwargs)[idxs]
         jac[idxs + input_slice] = grad(scalar_fun, argnum=argnum)(*args, **kwargs)
     return jac
Example #13
0
def multigrad(fun, argnums=[0]):
    """Takes gradients wrt multiple arguments simultaneously."""
    def combined_arg_fun(multi_arg, *args, **kwargs):
        extra_args_list = list(args)
        for argnum_ix, arg_ix in enumerate(argnums):
            extra_args_list[arg_ix] = multi_arg[argnum_ix]
        return fun(*extra_args_list, **kwargs)
    gradfun = grad(combined_arg_fun, argnum=0)
    def gradfun_rearranged(*args, **kwargs):
        multi_arg = tuple([args[i] for i in argnums])
        return gradfun(multi_arg, *args, **kwargs)
    return gradfun_rearranged
Example #14
0
    def gradfun(*args, **kwargs):
        bindings = sig.bind(*args, **kwargs)

        args = lambda dct: tuple(dct[var_pos[0]]) if var_pos else ()
        kwargs = lambda dct: todict(dct[var_kwd[0]]) if var_kwd else {}
        others = lambda dct: tuple(dct[argname] for argname in argnames if argname not in var_kwd + var_pos)

        newfun = lambda dct: fun(*(others(dct) + args(dct)), **kwargs(dct))

        argdict = apply_defaults(bindings.arguments)
        grad_dict = grad(newfun)(dict(argdict))
        return OrderedDict((argname, grad_dict[argname]) for argname in argdict)
def multigrad(fun, argnums=[0]):
    """Takes gradients wrt multiple arguments simultaneously."""
    def combined_arg_fun(multi_arg, *args, **kwargs):
        extra_args_list = list(args)
        for argnum_ix, arg_ix in enumerate(argnums):
            extra_args_list[arg_ix] = multi_arg[argnum_ix]
        return fun(*extra_args_list, **kwargs)
    gradfun = grad(combined_arg_fun, argnum=0)
    def gradfun_rearranged(*args, **kwargs):
        multi_arg = tuple([args[i] for i in argnums])
        return gradfun(multi_arg, *args, **kwargs)
    return gradfun_rearranged
    def gradfun(*args, **kwargs):
        bindings = sig.bind(*args, **kwargs)

        args = lambda dct: tuple(dct[var_pos[0]]) if var_pos else ()
        kwargs = lambda dct: todict(dct[var_kwd[0]]) if var_kwd else {}
        others = lambda dct: tuple(dct[argname] for argname in argnames
                                   if argname not in var_kwd + var_pos)

        newfun = lambda dct: fun(*(others(dct) + args(dct)), **kwargs(dct))

        argdict = apply_defaults(bindings.arguments)
        grad_dict = grad(newfun)(dict(argdict))
        return OrderedDict((argname, grad_dict[argname]) for argname in argdict)
Example #17
0
    def compute_gradient(self, objective, argument):
        """
        Compute the gradient of 'objective' with respect to the first
        argument and return as a function.
        """
        g = grad(objective)

        # Sometimes x will be some custom type, e.g. with the FixedRankEmbedded
        # manifold. Therefore cast it to a numpy.array.
        def gradient(x):
            if type(x) in (list, tuple):
                return g([np.array(xi) for xi in x])
            else:
                return g(np.array(x))
        return gradient
Example #18
0
def _hessian_vector_product(fun, argnum=0):
    """Builds a function that returns the exact Hessian-vector product.
    The returned function has arguments (*args, vector, **kwargs). Note,
    this function will be incorporated into autograd, with name
    hessian_vector_product. Once it has been this function can be
    deleted."""
    fun_grad = grad(fun, argnum)

    def vector_dot_grad(*args, **kwargs):
        args, vector = args[:-1], args[-1]
        try:
            return np.tensordot(fun_grad(*args, **kwargs),
                                vector,
                                axes=vector.ndim)
        except AttributeError:
            # Assume we are on the product manifold.
            return np.sum([
                np.tensordot(fun_grad(*args, **kwargs)[k],
                             vector[k],
                             axes=vector[k].ndim) for k in range(len(vector))
            ])

    # Grad wrt original input.
    return grad(vector_dot_grad, argnum)
Example #19
0
    def compute_gradient(self, objective, argument):
        """
        Compute the gradient of 'objective' with respect to the first
        argument and return as a function.
        """
        g = grad(objective)

        # Sometimes x will be some custom type, e.g. with the FixedRankEmbedded
        # manifold. Therefore cast it to a numpy.array.
        def gradient(x):
            if type(x) in (list, tuple):
                return g([np.array(xi) for xi in x])
            else:
                return g(np.array(x))

        return gradient
Example #20
0
def quick_grad_check(fun, arg0, extra_args=(), kwargs={}, verbose=True,
                     eps=EPS, rtol=RTOL, atol=ATOL, rs=None):
    """Checks the gradient of a function (w.r.t. to its first arg) in a random direction"""

    if verbose:
        print("Checking gradient of {0} at {1}".format(fun, arg0))

    if rs is None:
        rs = np.random.RandomState()

    random_dir = rs.standard_normal(np.shape(arg0))
    random_dir = random_dir / np.sqrt(np.sum(random_dir * random_dir))
    unary_fun = lambda x : fun(arg0 + x * random_dir, *extra_args, **kwargs)
    numeric_grad = unary_nd(unary_fun, 0.0, eps=eps)

    analytic_grad = np.sum(grad(fun)(arg0, *extra_args, **kwargs) * random_dir)

    assert np.allclose(numeric_grad, analytic_grad, rtol=rtol, atol=atol), \
        "Check failed! nd={0}, ad={1}".format(numeric_grad, analytic_grad)

    if verbose:
        print("Gradient projection OK (numeric grad: {0}, analytic grad: {1})".format(
            numeric_grad, analytic_grad))
Example #21
0
 def compute_gradient(self, objective, argument):
     """
     Compute the gradient of 'objective' with respect to the first
     argument and return as a function.
     """
     return grad(objective)
Example #22
0
def make_functions(X, inv_var, lam0, lam0_delta, K, K_chol, sig2_omega,
                   sig2_mu):
    """ Make basis fitting functions
      INPUTS: 
        - X       : N_spec x len(lam0) matrix of spectra 
                    (missing stuff can be 0'd out)
        - inv_var : N_spec x len(lam0) matrix of spectra inverse variances 
                    (0 = infinite variance = no observation)
        - lam0       : wavelength observation locations
        - lam0_delta : wavelength observation jumps (could be inferred...)
        - K          : number of bases
        - K_chol     : cholesky decomposition of MVN covariance prior for 
                       a single basis
        - sig2_omega : variance for omega (logit loadings)
        - sig2_mu    : variance for log magnitudes 

      OUTPUTS: 
        - loss_fun, loss_fun_grad, prior_loss, prior_loss_grad, train_model
    """

    parser = ParamParser()
    V = len(lam0)
    N = X.shape[0]
    parser.add_weights('mus', (N, 1))
    parser.add_weights('betas', (K, V))
    parser.add_weights('omegas', (N, K))

    ## weighted loss function - observations have gaussian noise
    #def loss_fun(th_vec, X, inv_var, lam0_delta, K):
    def loss_fun(th_vec, idx=None):
        """ Negative log likelihood function.  The likelihood model encoded here is

                beta_k  ~ GP(0, K)
                omega_k ~ Normal(0, 1)
                mu_k    ~ Normal(0, 10)

          Normalize Basis and weights so they both sum to 1
                B_k = exp(beta_k) / sum( exp(beta_k) DeltaLam)
                w_k = exp(w_k) / sum(exp(w_i))
                m   = exp(mu_k)
                f   = m \sum w_k B_k

          Observations are normal about the latent spectra, with known variance
                X_lam ~ Normal(f, var_lam)
        """
        # unpack params
        N = X.shape[0]
        mus = parser.get(th_vec, 'mus')
        betas = parser.get(th_vec, 'betas')
        omegas = parser.get(th_vec, 'omegas')

        # subselect for SGD
        if idx is not None:
            mus = mus[idx]
            omegas = omegas[idx, :]
            X_idx = X[idx, :]
            inv_var_idx = inv_var[idx, :]
        else:
            X_idx = X
            inv_var_idx = inv_var

        # exponentiate and normalize params
        W = np.exp(omegas)
        W = W / np.sum(W, axis=1, keepdims=True)
        B = np.exp(np.dot(K_chol, betas.T).T)
        B = B / np.sum(B * lam0_delta, axis=1, keepdims=True)
        M = np.exp(mus)
        Xtilde = np.dot(W * M, B)
        loss_mat = inv_var_idx * np.square(X_idx - Xtilde)
        return np.sum(loss_mat[~np.isnan(loss_mat)])

    loss_grad = grad(loss_fun)

    ## joint prior over parameters
    def prior_loss(th, idx=None):
        """ WHITENED SPACE PRIOR 
            - th_mat    : K x (N + V) matrix holding all weights and basis params
            - N         : number of examples in training set
            - sig2_omega: prior variance on log weights
        """
        mus = parser.get(th, 'mus')
        betas = parser.get(th, 'betas')
        omegas = parser.get(th, 'omegas')
        if idx is not None:
            mus = mus[idx]
            omegas = omegas[idx, :]
        loss_mus = .5 / (sig2_mu) * np.sum(np.square(mus))
        loss_omegas = .5 / (sig2_omega) * np.sum(np.square(omegas))
        loss_betas = .5 * np.sum(np.square(betas))
        return loss_omegas + loss_mus + loss_betas

    prior_loss_grad = grad(prior_loss)
    return parser, loss_fun, loss_grad, prior_loss, prior_loss_grad
Example #23
0
# on both the input to the original function (x), and the output of the
# original function (ans).
def make_grad_logsumexp(ans, x):
    # If you want to be able to take higher-order derivatives, then all the
    # code inside this function must be itself differentiable by autograd.
    def gradient_product(g):
        # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x).
        # Because autograd uses reverse-mode differentiation, g contains
        # the gradient of the objective w.r.t. ans, the output of logsumexp.
        return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans))

    return gradient_product


# Now we tell autograd that logsumexmp has a gradient-making function.
logsumexp.defgrad(make_grad_logsumexp)

if __name__ == '__main__':
    # Now we can use logsumexp() inside a larger function that we want
    # to differentiate.
    def example_func(y):
        z = y**2
        lse = logsumexp(z)
        return np.sum(lse)

    grad_of_example = grad(example_func)
    print("Gradient: ", grad_of_example(npr.randn(10)))

    # Check the gradients numerically, just to be safe.
    quick_grad_check(example_func, npr.randn(10))
Example #24
0
def rgrad(cost, proj):
    """
    Generates the Riemannain gradient of cost. Cost must be defined using
    autograd.numpy.
    """
    return lambda x: proj(x, grad(cost)(x))
Example #25
0
# The reason for the closure is so that the gradient can depend
# on both the input to the original function (x), and the output of the
# original function (ans).
def make_grad_logsumexp(ans, x):
    # If you want to be able to take higher-order derivatives, then all the
    # code inside this function must be itself differentiable by autograd.
    def gradient_product(g):
        # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x).
        # Because autograd uses reverse-mode differentiation, g contains
        # the gradient of the objective w.r.t. ans, the output of logsumexp.
        return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans))
    return gradient_product

# Now we tell autograd that logsumexmp has a gradient-making function.
logsumexp.defgrad(make_grad_logsumexp)


if __name__ == '__main__':
    # Now we can use logsumexp() inside a larger function that we want
    # to differentiate.
    def example_func(y):
        z = y**2
        lse = logsumexp(z)
        return np.sum(lse)

    grad_of_example = grad(example_func)
    print("Gradient: ", grad_of_example(npr.randn(10)))

    # Check the gradients numerically, just to be safe.
    quick_grad_check(example_func, npr.randn(10))
Example #26
0
def check_grads(fun, *args):
    if not args:
        raise Exception("No args given")
    exact = tuple([grad(fun, i)(*args) for i in range(len(args))])
    numeric = nd(fun, *args)
    check_equivalent(exact, numeric)
Example #27
0
def grad_named(fun, argname):
    '''Takes gradients with respect to a named argument.
       Doesn't work on *args or **kwargs.'''
    arg_index = getargspec(fun).args.index(argname)
    return grad(fun, arg_index)
Example #28
0
def rgrad(cost, proj):
    """
    Generates the Riemannain gradient of cost. Cost must be defined using
    autograd.numpy.
    """
    return lambda x: proj(x, grad(cost)(x))
Example #29
0
 def compute_gradient(self, objective, argument):
     """
     Compute the gradient of 'objective' with respect to the first
     argument and return as a function.
     """
     return grad(objective)