def mtp_hessian_grad_and_value(fun, x): """ Makes a function that returns MTP, Jacobian and value of a function. For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is here defined as a function of a matrix `m` corresponding to mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2)) where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the 3D array of third-order partial derivatives of the scalar-valued function such that t[i, j, k] = ∂³f / (∂x[i] ∂x[j] ∂x[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mtp, (hessian, grad, val) = make_vjp(lambda x: atuple(hessian_grad_and_value(fun)(x)), x) return ( lambda m: mtp((m, vspace(grad).zeros(), vspace(val).zeros())), hessian, grad, val, )
def performance_measure(Ahat): params = atuple((Ahat, B, Q, R)) K = solve_riccati(*params) riccati_policy = make_riccati_policy(K, *params) smooth_riccati_policy = make_smooth_policy(riccati_policy, standard_deviation, rng) samples = take_samples( generate_trajectory(smooth_riccati_policy, *env), 100) states = samples[:, :2] actions = samples[:, 2] rewards = samples[:, 3] reward_accumulation = np.cumsum(rewards[::-1])[::-1] logpdf = log_diagonal_normal_pdf(riccati_policy, standard_deviation, states, actions[:, np.newaxis]) return np.mean(reward_accumulation[:, np.newaxis] * logpdf)
def hessian_grad_and_value(fun, x): """ Returns a function that returns Hessian, gradient and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ def grad_fun(x): vjp, val = _make_vjp(fun, x) return vjp(vspace(val).ones()), val x_vspace = vspace(x) x_rep = np.tile(x, (x_vspace.size, ) + (1, ) * x_vspace.ndim) vjp_grad, (grad, val) = _make_vjp(lambda x: atuple(grad_fun(x)), x_rep) hessian_shape = x_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in x_vspace.standard_basis()]) hessian = vjp_grad((basis_vectors, vspace(val).zeros())) return np.reshape(hessian, hessian_shape), grad[0], val[0]
def hessian_grad_and_value(fun, x): """ Returns a function that returns the Hessian, gradient and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ def grad_fun(x): vjp, val = make_vjp(fun, x) return vjp(vspace(val).ones()), val x_vspace = vspace(x) x_rep = np.tile(x, (x_vspace.size,) + (1,) * x_vspace.ndim) vjp_grad, (grad, val) = make_vjp(lambda x: atuple(grad_fun(x)), x_rep) hessian_shape = x_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in x_vspace.standard_basis()]) hessian = vjp_grad((basis_vectors, vspace(val).zeros())) return np.reshape(hessian, hessian_shape), grad[0], val[0]
def mhp_jacobian_and_value(fun, x): """ Returns a function that returns MHP, Jacobian and value of a function. For a vector-valued function `fun` the matrix-Hessian-product (MHP) is here defined as a function of a matrix `m` corresponding to mhp(m) = sum(m[:, :] * h[:, :, :], axis=(-1, -2)) where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of second-order partial derivatives of the vector-valued function, such that h[k, i, j] = (d**2 f[i]) / (dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mhp, (jacob, val) = _make_vjp(lambda x: atuple(jacobian_and_value(fun)(x)), x) return lambda m: mhp((m, vspace(val).zeros())), jacob, val
def mhp_jacobian_and_value(fun, x): """ Returns a function that returns MHP, Jacobian and value of a function. For a vector-valued function `fun` the matrix-Hessian-product (MHP) is here defined as a function of a matrix `m` corresponding to mhp(m) = sum(m[:, :] * h[:, :, :], axis=(-1, -2)) where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of second-order partial derivatives of the vector-valued function, such that h[k, i, j] = (d**2 f[i]) / (dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mhp, (jacob, val) = make_vjp( lambda x: atuple(jacobian_and_value(fun)(x)), x) return lambda m: mhp((m, vspace(val).zeros())), jacob, val
def mtp_hessian_grad_and_value(fun, x): """ Returns a function that returns MTP, Jacobian and value of a function. For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is here defined as a function of a matrix `m` corresponding to mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2)) where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of third-order partial derivatives of the scalar-valued function such that t[i, j, k] = d**3 f / (dx[i] * dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mtp, (hessian, grad, val) = make_vjp( lambda x: atuple(hessian_grad_and_value(fun)(x)), x) return ( lambda m: mtp((m, vspace(grad).zeros(), vspace(val).zeros())), hessian, grad, val)
def solve_riccati(A, B, Q, R): params = atuple((A, B, Q, R)) return fixed_point(lambda k, p: k + riccati_operator(k, p), params, np.eye(A.shape[0]), distance_predicate(tol=1e-5))