Esempio n. 1
0
def mtp_hessian_grad_and_value(fun, x):
    """
    Makes a function that returns MTP, Jacobian and value of a function.

    For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is
    here defined as a function of a matrix `m` corresponding to

        mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2))

    where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the 3D array of
    third-order partial derivatives of the scalar-valued function such that

        t[i, j, k] = ∂³f / (∂x[i] ∂x[j] ∂x[k])

    Assumes that the function `fun` broadcasts along the first dimension of the
    input being differentiated with respect to such that a batch of outputs can
    be computed concurrently for a batch of inputs.
    """
    mtp, (hessian, grad,
          val) = make_vjp(lambda x: atuple(hessian_grad_and_value(fun)(x)), x)
    return (
        lambda m: mtp((m, vspace(grad).zeros(), vspace(val).zeros())),
        hessian,
        grad,
        val,
    )
Esempio n. 2
0
    def performance_measure(Ahat):
        params = atuple((Ahat, B, Q, R))
        K = solve_riccati(*params)
        riccati_policy = make_riccati_policy(K, *params)

        smooth_riccati_policy = make_smooth_policy(riccati_policy,
                                                   standard_deviation, rng)
        samples = take_samples(
            generate_trajectory(smooth_riccati_policy, *env), 100)
        states = samples[:, :2]
        actions = samples[:, 2]
        rewards = samples[:, 3]

        reward_accumulation = np.cumsum(rewards[::-1])[::-1]
        logpdf = log_diagonal_normal_pdf(riccati_policy, standard_deviation,
                                         states, actions[:, np.newaxis])

        return np.mean(reward_accumulation[:, np.newaxis] * logpdf)
Esempio n. 3
0
def hessian_grad_and_value(fun, x):
    """
    Returns a function that returns Hessian, gradient and value of a function.

    Assumes that the function `fun` broadcasts along the first dimension of the
    input being differentiated with respect to such that a batch of outputs can
    be computed concurrently for a batch of inputs.
    """
    def grad_fun(x):
        vjp, val = _make_vjp(fun, x)
        return vjp(vspace(val).ones()), val

    x_vspace = vspace(x)
    x_rep = np.tile(x, (x_vspace.size, ) + (1, ) * x_vspace.ndim)
    vjp_grad, (grad, val) = _make_vjp(lambda x: atuple(grad_fun(x)), x_rep)
    hessian_shape = x_vspace.shape + x_vspace.shape
    basis_vectors = np.array([b for b in x_vspace.standard_basis()])
    hessian = vjp_grad((basis_vectors, vspace(val).zeros()))
    return np.reshape(hessian, hessian_shape), grad[0], val[0]
Esempio n. 4
0
    def hessian_grad_and_value(fun, x):
        """
        Returns a function that returns the Hessian, gradient and value of a
        function.

        Assumes that the function `fun` broadcasts along the first dimension of
        the input being differentiated with respect to such that a batch of
        outputs can be computed concurrently for a batch of inputs.
        """
        def grad_fun(x):
            vjp, val = make_vjp(fun, x)
            return vjp(vspace(val).ones()), val
        x_vspace = vspace(x)
        x_rep = np.tile(x, (x_vspace.size,) + (1,) * x_vspace.ndim)
        vjp_grad, (grad, val) = make_vjp(lambda x: atuple(grad_fun(x)), x_rep)
        hessian_shape = x_vspace.shape + x_vspace.shape
        basis_vectors = np.array([b for b in x_vspace.standard_basis()])
        hessian = vjp_grad((basis_vectors, vspace(val).zeros()))
        return np.reshape(hessian, hessian_shape), grad[0], val[0]
Esempio n. 5
0
def mhp_jacobian_and_value(fun, x):
    """
    Returns a function that returns MHP, Jacobian and value of a function.

    For a vector-valued function `fun` the matrix-Hessian-product (MHP) is here
    defined as a function of a matrix `m` corresponding to

        mhp(m) = sum(m[:, :] * h[:, :, :], axis=(-1, -2))

    where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3
    tensor of second-order partial derivatives of the vector-valued function,
    such that

        h[k, i, j] = (d**2 f[i]) / (dx[j] * dx[k])

    Assumes that the function `fun` broadcasts along the first dimension of the
    input being differentiated with respect to such that a batch of outputs can
    be computed concurrently for a batch of inputs.
    """
    mhp, (jacob, val) = _make_vjp(lambda x: atuple(jacobian_and_value(fun)(x)),
                                  x)
    return lambda m: mhp((m, vspace(val).zeros())), jacob, val
Esempio n. 6
0
    def mhp_jacobian_and_value(fun, x):
        """
        Returns a function that returns MHP, Jacobian and value of a function.

        For a vector-valued function `fun` the matrix-Hessian-product (MHP) is
        here defined as a function of a matrix `m` corresponding to

            mhp(m) = sum(m[:, :] * h[:, :, :], axis=(-1, -2))

        where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3
        tensor of second-order partial derivatives of the vector-valued
        function, such that

            h[k, i, j] = (d**2 f[i]) / (dx[j] * dx[k])

        Assumes that the function `fun` broadcasts along the first dimension of
        the input being differentiated with respect to such that a batch of
        outputs can be computed concurrently for a batch of inputs.
        """
        mhp, (jacob, val) = make_vjp(
            lambda x: atuple(jacobian_and_value(fun)(x)), x)
        return lambda m: mhp((m, vspace(val).zeros())), jacob, val
Esempio n. 7
0
    def mtp_hessian_grad_and_value(fun, x):
        """
        Returns a function that returns MTP, Jacobian and value of a function.

        For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is
        here defined as a function of a matrix `m` corresponding to

            mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2))

        where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the rank-3
        tensor of third-order partial derivatives of the scalar-valued function
        such that

            t[i, j, k] = d**3 f / (dx[i] * dx[j] * dx[k])

        Assumes that the function `fun` broadcasts along the first dimension of
        the input being differentiated with respect to such that a batch of
        outputs can be computed concurrently for a batch of inputs.
        """
        mtp, (hessian, grad, val) = make_vjp(
            lambda x: atuple(hessian_grad_and_value(fun)(x)), x)
        return (
            lambda m: mtp((m, vspace(grad).zeros(), vspace(val).zeros())),
            hessian, grad, val)
Esempio n. 8
0
def solve_riccati(A, B, Q, R):
    params = atuple((A, B, Q, R))
    return fixed_point(lambda k, p: k + riccati_operator(k, p), params,
                       np.eye(A.shape[0]), distance_predicate(tol=1e-5))