def ggnvp_maker(*args, **kwargs): f_vjp, f_x = make_vjp(f, f_argnum)(*args, **kwargs) g_hvp, grad_g_x = make_vjp(grad(g))(f_x) f_vjp_vjp, _ = make_vjp(f_vjp)(vspace(getval(grad_g_x)).zeros()) def ggnvp(v): return f_vjp(g_hvp(f_vjp_vjp(v))) return ggnvp
def mtp_hessian_grad_and_value(fun, x): """ Makes a function that returns MTP, Jacobian and value of a function. For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is here defined as a function of a matrix `m` corresponding to mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2)) where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the 3D array of third-order partial derivatives of the scalar-valued function such that t[i, j, k] = ∂³f / (∂x[i] ∂x[j] ∂x[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mtp, (hessian, grad, val) = make_vjp(lambda x: atuple(hessian_grad_and_value(fun)(x)), x) return ( lambda m: mtp((m, vspace(grad).zeros(), vspace(val).zeros())), hessian, grad, val, )
def check_vjp(fun, arg): vs_in = vspace(arg) vs_out = vspace(fun(arg)) autograd_jac = linear_fun_to_matrix( flatten_fun(make_vjp(fun)(arg)[0], vs_out), vs_out).T numerical_jac = linear_fun_to_matrix( numerical_deriv(flatten_fun(fun, vs_in), vspace_flatten(arg)), vs_in) assert np.allclose(autograd_jac, numerical_jac)
def grad_and_value(fun, x): """ Returns a function that returns both gradient and value of a function. """ vjp, val = make_vjp(fun, x) if not vspace(val).size == 1: raise TypeError("grad_and_value only applies to real scalar-output" " functions.") return vjp(vspace(val).ones()), val
def grad_and_value(fun, x): """ Makes a function that returns both gradient and value of a function. """ vjp, val = make_vjp(fun, x) if not vspace(val).size == 1: raise TypeError("grad_and_value only applies to real scalar-output" " functions.") return vjp(vspace(val).ones()), val
def jacobian_and_value(fun, x): """ Makes a function that returns both the Jacobian and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ val = fun(x) v_vspace = vspace(val) x_vspace = vspace(x) x_rep = np.tile(x, (v_vspace.size, ) + (1, ) * x_vspace.ndim) vjp_rep, _ = make_vjp(fun, x_rep) jacobian_shape = v_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in v_vspace.standard_basis()]) jacobian = vjp_rep(basis_vectors) return np.reshape(jacobian, jacobian_shape), val
def jacobian_and_value(fun, x): """ Returns a function that returns both the Jacobian and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ val = fun(x) v_vspace = vspace(val) x_vspace = vspace(x) x_rep = np.tile(x, (v_vspace.size,) + (1,) * x_vspace.ndim) vjp_rep, _ = make_vjp(fun, x_rep) jacobian_shape = v_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in v_vspace.standard_basis()]) jacobian = vjp_rep(basis_vectors) return np.reshape(jacobian, jacobian_shape), val
def hessian_grad_and_value(fun, x): """ Makes a function that returns the Hessian, gradient & value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ def grad_fun(x): vjp, val = make_vjp(fun, x) return vjp(vspace(val).ones()), val x_vspace = vspace(x) x_rep = np.tile(x, (x_vspace.size, ) + (1, ) * x_vspace.ndim) vjp_grad, (grad, val) = make_vjp(lambda x: atuple(grad_fun(x)), x_rep) hessian_shape = x_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in x_vspace.standard_basis()]) hessian = vjp_grad((basis_vectors, vspace(val).zeros())) return np.reshape(hessian, hessian_shape), grad[0], val[0]
def hessian_grad_and_value(fun, x): """ Returns a function that returns the Hessian, gradient and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ def grad_fun(x): vjp, val = make_vjp(fun, x) return vjp(vspace(val).ones()), val x_vspace = vspace(x) x_rep = np.tile(x, (x_vspace.size,) + (1,) * x_vspace.ndim) vjp_grad, (grad, val) = make_vjp(lambda x: atuple(grad_fun(x)), x_rep) hessian_shape = x_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in x_vspace.standard_basis()]) hessian = vjp_grad((basis_vectors, vspace(val).zeros())) return np.reshape(hessian, hessian_shape), grad[0], val[0]
def mhp_jacobian_and_value(fun, x): """ Makes a function that returns MHP, Jacobian and value of a function. For a vector-valued function `fun` the matrix-Hessian-product (MHP) is here defined as a function of a matrix `m` corresponding to mhp(m) = sum(m[:, :, None] * h[:, :, :], axis=(0, 1)) where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of second-order partial derivatives of the vector-valued function, such that h[i, j, k] = ∂²f[i] / (∂x[j] ∂x[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mhp, (jacob, val) = make_vjp(lambda x: atuple(jacobian_and_value(fun)(x)), x) return lambda m: mhp((m, vspace(val).zeros())), jacob, val
def mhp_jacobian_and_value(fun, x): """ Returns a function that returns MHP, Jacobian and value of a function. For a vector-valued function `fun` the matrix-Hessian-product (MHP) is here defined as a function of a matrix `m` corresponding to mhp(m) = sum(m[:, :] * h[:, :, :], axis=(-1, -2)) where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of second-order partial derivatives of the vector-valued function, such that h[k, i, j] = (d**2 f[i]) / (dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mhp, (jacob, val) = make_vjp( lambda x: atuple(jacobian_and_value(fun)(x)), x) return lambda m: mhp((m, vspace(val).zeros())), jacob, val
def mtp_hessian_grad_and_value(fun, x): """ Returns a function that returns MTP, Jacobian and value of a function. For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is here defined as a function of a matrix `m` corresponding to mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2)) where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of third-order partial derivatives of the scalar-valued function such that t[i, j, k] = d**3 f / (dx[i] * dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mtp, (hessian, grad, val) = make_vjp( lambda x: atuple(hessian_grad_and_value(fun)(x)), x) return ( lambda m: mtp((m, vspace(grad).zeros(), vspace(val).zeros())), hessian, grad, val)
def gradfun(*args,**kwargs): args = list(args) args[argnum] = safe_type(args[argnum]) vjp, ans = make_vjp(scalar_fun, argnum)(*args, **kwargs) return vjp(cast_to_same_dtype(1.0, ans))
def gradfun(*args,**kwargs): vjp, _ = make_vjp(fun, argnum)(*args, **kwargs) return vjp(1.0)
def gradfun(*args, **kwargs): args = list(args) args[argnum] = safe_type(args[argnum]) vjp, _ = make_vjp(scalar_fun, argnum)(*args, **kwargs) return vjp(1.0)
def jacfun(*args, **kwargs): vjp, ans = make_vjp(fun, argnum)(*args, **kwargs) ans_vspace = vspace(getval(ans)) jacobian_shape = ans_vspace.shape + vspace(getval(args[argnum])).shape grads = map(vjp, ans_vspace.standard_basis()) return np.reshape(np.stack(grads), jacobian_shape)
def hvp_maker(*args, **kwargs): return make_vjp(grad(fun, argnum), argnum)(*args, **kwargs)
def wrapped_grad(argnum, g, ans, vs, gvs, args, kwargs): return make_vjp(fun, argnum)(*args, **kwargs)[0](g)
def ggnvp_maker(*args, **kwargs): f_vjp, f_x = make_vjp(f, f_argnum)(*args, **kwargs) g_hvp, grad_g_x = make_vjp(grad(g))(f_x) f_jvp, _ = make_vjp(f_vjp)(vspace(grad_g_x).zeros()) def ggnvp(v): return f_vjp(g_hvp(f_jvp(v))) return ggnvp
def grad_fun(x): vjp, val = make_vjp(fun, x) return vjp(vspace(val).ones()), val
def gradfun(*args,**kwargs): args = list(args) args[argnum] = args[argnum] vjp, _ = make_vjp(fun, argnum)(*args, **kwargs) return vjp(1.0)
def gradfun(*args, **kwargs): vjp, _ = make_vjp(fun, argnum)(*args, **kwargs) return vjp(1.0)
def grad(fun, x): vjp, _ = make_vjp(fun, x) return vjp(1.0)
def jvp_maker(*args, **kwargs): vjp, y = make_vjp(fun, argnum)(*args, **kwargs) vjp_vjp, _ = make_vjp(vjp)(vspace(y).zeros()) return vjp_vjp # vjp_vjp is just jvp by linearity
def jacfun(*args, **kwargs): vjp, ans = make_vjp(fun, argnum)(*args, **kwargs) outshape = getshape(ans) grads = map(vjp, unit_vectors(outshape)) jacobian_shape = outshape + getshape(args[argnum]) return np.reshape(concatenate(grads), jacobian_shape)
def gradfun(*args,**kwargs): args = list(args) args[argnum] = safe_type(args[argnum]) vjp, ans = make_vjp(fun, argnum)(*args, **kwargs) return vjp(vspace(ans).ones())
def hvp_maker(*args, **kwargs): return make_vjp(grad(fun, argnum), argnum)(*args, **kwargs)[0]
def jacfun(*args, **kwargs): vjp, ans = make_vjp(fun, argnum)(*args, **kwargs) ans_vspace = vspace(ans) jacobian_shape = ans_vspace.shape + vspace(args[argnum]).shape grads = map(vjp, ans_vspace.standard_basis()) return np.reshape(np.stack(grads), jacobian_shape)
def gradfun(*args, **kwargs): args = list(args) args[argnum] = safe_type(args[argnum]) vjp, ans = make_vjp(scalar_fun, argnum)(*args, **kwargs) return vjp(cast_to_same_dtype(1.0, ans))
def jacobian_reverse(fun, x): """ Compute jacobian of fun with respect to x using reverse mode differentiation""" vjp, ans = make_vjp(fun, x) grads = map(vjp, vspace(ans).standard_basis()) m, n = _jac_shape(x, ans) return npa.reshape(npa.stack(grads), (n, m))
def jvp_maker(*args, **kwargs): vjp, y = make_vjp(fun, argnum)(*args, **kwargs) vjp_vjp, _ = make_vjp(vjp)(vspace(getval(y)).zeros()) return vjp_vjp # vjp_vjp is just jvp by linearity
def gradfun(*args,**kwargs): args = list(args) args[argnum] = safe_type(args[argnum]) vjp, ans = make_vjp(fun, argnum)(*args, **kwargs) return vjp(vspace(getval(ans)).ones())