Ejemplo n.º 1
0
def gradient(Q, R, A, B):
    """Compute the gradient of th -> Tr(P(th))

    It is assumed that Q, R are both invertible.

    """

    P, K = utils.dlqr(A, B, Q, R)
    A_c = A + B.dot(K)

    n, p = B.shape

    ret = np.zeros((n, n + p))

    # TODO: we should invert the operator X -> A_c^T X A_c - X
    # once and then use it to solve many linear equations,
    # rather than repeating the inversion many times

    # See Eq. (13) of
    # https://arxiv.org/pdf/1703.08972.pdf
    for idx in range(n):
        for jdx in range(n + p):
            U = np.zeros((n, n + p))
            U[idx, jdx] = 1
            target = A_c.T.dot(P.dot(U)).dot(np.vstack((np.eye(n), K)))
            target += target.T
            DU = utils.solve_discrete_lyapunov(A_c, target)
            ret[idx, jdx] = np.trace(DU)

    return ret
Ejemplo n.º 2
0
def _main():
    import examples
    A_star, B_star = examples.unstable_laplacian_dynamics()

    # define costs
    Q = 1e-3 * np.eye(3)
    R = np.eye(3)

    # initial controller
    _, K_init = utils.dlqr(A_star, B_star, 1e-3*np.eye(3), np.eye(3))

    rng = np.random

    env = OFUStrategy(Q=Q,
                      R=R,
                      A_star=A_star,
                      B_star=B_star,
                      sigma_w=1,
                      reg=1e-5,
                      actual_error_multiplier=1, 
                      rls_lam=None)

    env.reset(rng)
    env.prime(100, K_init, 0.1, rng)
    for idx in range(500):
        env.step(rng)
Ejemplo n.º 3
0
    def _design_controller(self, states, inputs, transitions, rng):
        logger = self._get_logger()
        logger.debug("_design_controller: have {} points for regression".format(inputs.shape[0]))

        # TODO(stephentu):
        # Currently I am using the algorithm of Abbasi-Yadkori and Szepesvari.
        # We should also try the subtly different algorithm in
        # https://arxiv.org/pdf/1711.07230.pdf.

        # fit the data
        Anom, Bnom, emp_cov = utils.solve_least_squares(states, inputs, transitions, reg=self._reg)

        if not self._has_primed:
            self._emp_cov = np.array(emp_cov)
            self._last_emp_cov = np.array(emp_cov)

        emp_cov /= inputs.shape[0] # normalize by T to improve numerics

        theta_nom = np.hstack((Anom, Bnom))
        theta_star = np.hstack((self._A_star, self._B_star))
        delta = theta_nom - theta_star
        actual_error = np.trace(delta.dot(emp_cov.dot(delta.T)))
        eps = self._actual_error_multiplier * actual_error
        logger.info("_design_controller: actual weighted error is {}, eps is {}".format(actual_error, eps))

        n, p = self._n, self._p

        def projection_operator(A, B):
            M = np.hstack((A, B))
            theta = utils.project_weighted_ball(M, theta_nom, emp_cov, eps)
            return theta[:, :n], theta[:, n:]

        A_ofu, B_ofu = ofu_pgd(
                Q=self._Q,
                R=self._R,
                Ahat=Anom,
                Bhat=Bnom,
                projection_operator=projection_operator,
                logger=logger,
                num_restarts=self._num_restarts)

        theta_ofu = np.hstack((A_ofu, B_ofu))
        delta_ofu = theta_ofu - theta_nom
        TOL = 1e-5
        assert np.trace(delta_ofu.dot(emp_cov.dot(delta_ofu.T))) <= eps + TOL

        _, K = utils.dlqr(A_ofu, B_ofu, self._Q, self._R)
        self._current_K = K

        # compute the infinite horizon cost of this controller
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K, self._Q, self._R, self._sigma_w)

        # for debugging purposes,
        # check to see if this controller will stabilize the true system
        rho_true = utils.spectral_radius(self._A_star + self._B_star.dot(self._current_K))
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0,
            rho_true))

        return (Anom, Bnom, Jnom)
Ejemplo n.º 4
0
def test_sls_common_lyapunov():

    rng = np.random.RandomState(237853)

    Ahat = np.array([[1.01, 0.01, 0], [-0.01, 1.01, 0.01], [0, -0.01, 1.01]])
    Bhat = np.eye(3)

    eps_A = 0.0001
    eps_B = 0.0001
    Ahat = utils.sample_2_to_2_ball(Ahat, eps_A, rng)
    Bhat = utils.sample_2_to_2_ball(Bhat, eps_B, rng)

    Q = np.eye(3)
    R = np.eye(3)

    n = 3
    p = 3

    is_feasible, _, P, K = sls_common_lyapunov(Ahat, Bhat, Q, R, eps_A, eps_B,
                                               0.999, None)

    assert is_feasible

    P_nom, K_nom = utils.dlqr(Ahat, Bhat, Q, R)

    # THIS FAILS
    #assert np.allclose(np.trace(P), np.trace(P_nom))

    assert np.allclose(K, K_nom, atol=1e-6)
Ejemplo n.º 5
0
def test_sls_synth():

    rng = np.random.RandomState(893754)

    Ahat = np.array([[1.01, 0.01, 0], [-0.01, 1.01, 0.01], [0, -0.01, 1.01]])
    Bhat = np.eye(3)

    eps_A = 0.0001
    eps_B = 0.0001
    Ahat = utils.sample_2_to_2_ball(Ahat, eps_A, rng)
    Bhat = utils.sample_2_to_2_ball(Bhat, eps_B, rng)

    Q = np.eye(3)
    R = np.eye(3)

    alpha = 0.5
    gamma = 0.98

    n = 3
    p = 3

    T = 15

    is_feasible, sqrt_htwo_cost, Phi_x, Phi_u = sls_synth(
        Q, R, Ahat, Bhat, eps_A, eps_B, T, gamma, alpha)

    assert is_feasible, "should be feasible"

    P_nom, K_nom = utils.dlqr(Ahat, Bhat, Q, R)

    assert np.allclose(sqrt_htwo_cost**2, np.trace(P_nom))

    L = Ahat + Bhat.dot(K_nom)
    cur = np.eye(L.shape[0])
    coeffs = [np.array(cur)]
    for _ in range(T):
        cur = L.dot(cur)
        coeffs.append(np.array(cur))

    for idx in range(T):
        expected = coeffs[idx]
        actual = Phi_x[idx * n:(idx + 1) * n, :]
        assert np.allclose(expected, actual, atol=1e-5)

    A_k, B_k, C_k, D_k = make_state_space_controller(Phi_x, Phi_u, n, p)

    A_cl = np.block([[Ahat + Bhat.dot(D_k), Bhat.dot(C_k)], [B_k, A_k]])
    cur = np.eye(A_cl.shape[0])
    cl_coeffs = [np.eye(n)]
    for _ in range(T):
        cur = A_cl.dot(cur)
        cl_coeffs.append(np.array(cur[:n, :n]))

    for idx in range(T):
        expected = coeffs[idx]
        actual = cl_coeffs[idx]
        assert np.allclose(expected, actual, atol=1e-5)
Ejemplo n.º 6
0
def _main():
    import examples
    A_star, B_star = examples.unstable_laplacian_dynamics()

    # define costs
    Q = 1e-3 * np.eye(3)
    R = np.eye(3)

    # initial controller
    _, K_init = utils.dlqr(A_star, B_star, 1e-3 * np.eye(3), np.eye(3))

    rng = np.random

    env = SLS_FIRStrategy(Q=Q,
                          R=R,
                          A_star=A_star,
                          B_star=B_star,
                          sigma_w=1,
                          sigma_explore=0.1,
                          reg=1e-5,
                          epoch_multiplier=10,
                          truncation_length=12,
                          actual_error_multiplier=1,
                          rls_lam=None)

    env.reset(rng)
    env.prime(250, K_init, 0.5, rng)
    for idx in range(500):
        env.step(rng)

    env = SLS_CommonLyapunovStrategy(Q=Q,
                                     R=R,
                                     A_star=A_star,
                                     B_star=B_star,
                                     sigma_w=1,
                                     sigma_explore=0.1,
                                     reg=1e-5,
                                     epoch_multiplier=10,
                                     actual_error_multiplier=1,
                                     rls_lam=None)

    env.reset(rng)
    env.prime(250, K_init, 0.5, rng)
    for idx in range(500):
        env.step(rng)
Ejemplo n.º 7
0
def test_sls_h2_cost():

    rng = np.random.RandomState(805238)

    Astar = np.array([[1.01, 0.01, 0], [-0.01, 1.01, 0.01], [0, -0.01, 1.01]])
    Bstar = np.eye(3)

    eps_A = 0.00001
    eps_B = 0.00001
    Ahat = utils.sample_2_to_2_ball(Astar, eps_A, rng)
    Bhat = utils.sample_2_to_2_ball(Bstar, eps_B, rng)

    Q = np.eye(3)
    R = np.eye(3)

    n = 3
    p = 3

    T = 15

    is_feasible, _, _, K_cl = sls_common_lyapunov(Ahat, Bhat, Q, R, eps_A,
                                                  eps_B, 0.999, None)

    assert is_feasible

    P_star, K_star = utils.dlqr(Astar, Bstar, Q, R)
    J_star = np.trace(P_star)

    assert np.allclose(J_star, utils.LQR_cost(Astar, Bstar, K_star, Q, R, 1))
    assert np.allclose(J_star,
                       utils.LQR_cost(Astar, Bstar, K_cl, Q, R, 1),
                       atol=1e-6)

    is_feasible, _, Phi_x, Phi_u = sls_synth(Q, R, Ahat, Bhat, eps_A, eps_B, T,
                                             0.999, 0.5)

    assert np.allclose(J_star,
                       h2_squared_norm(Astar, Bstar, Phi_x, Phi_u, Q, R, 1),
                       atol=1e-6)
Ejemplo n.º 8
0
def test_rls():

    rng = np.random.RandomState(657423)

    n, p = 3, 2

    A = rng.normal(size=(n, n))
    B = rng.normal(size=(n, p))
    _, K = utils.dlqr(A, B)
    assert utils.spectral_radius(A + B.dot(K)) <= 1

    lam = 1e-5

    rls = utils.RecursiveLeastSquaresEstimator(n, p, lam)

    states = []
    inputs = []
    transitions = []
    xcur = np.zeros((n, ))
    for _ in range(100):
        ucur = K.dot(xcur) + rng.normal(size=(p, ))
        xnext = A.dot(xcur) + B.dot(ucur) + rng.normal(size=(n, ))
        states.append(xcur)
        inputs.append(ucur)
        transitions.append(xnext)
        rls.update(xcur, ucur, xnext)
        xcur = xnext

    # LS estimate
    Ahat_ls, Bhat_ls, Cov_ls = utils.solve_least_squares(np.array(states),
                                                         np.array(inputs),
                                                         np.array(transitions),
                                                         reg=lam)

    # RLS estimate
    Ahat_rls, Bhat_rls, Cov_rls = rls.get_estimate()

    assert np.allclose(Ahat_ls, Ahat_rls)
    assert np.allclose(Bhat_ls, Bhat_rls)
    assert np.allclose(Cov_ls, Cov_rls)

    for _ in range(100):
        ucur = K.dot(xcur) + rng.normal(size=(p, ))
        xnext = A.dot(xcur) + B.dot(ucur) + rng.normal(size=(n, ))
        states.append(xcur)
        inputs.append(ucur)
        transitions.append(xnext)
        rls.update(xcur, ucur, xnext)
        xcur = xnext

    # LS estimate
    Ahat_ls, Bhat_ls, Cov_ls = utils.solve_least_squares(np.array(states),
                                                         np.array(inputs),
                                                         np.array(transitions),
                                                         reg=lam)

    # RLS estimate
    Ahat_rls, Bhat_rls, Cov_rls = rls.get_estimate()

    assert np.allclose(Ahat_ls, Ahat_rls)
    assert np.allclose(Bhat_ls, Bhat_rls)
    assert np.allclose(Cov_ls, Cov_rls)
Ejemplo n.º 9
0
def function_value(Q, R, A, B):
    P, K = utils.dlqr(A, B, Q, R)
    return np.trace(P)
Ejemplo n.º 10
0
        plt.xlabel('$x_1$')
        plt.ylabel('$x_2$')
        plt.legend()
        plt.title("x_0 = {0}, N = {1}".format(str(x0), str(N)))
        plt.savefig("problem_2/approach1_figs/HW2_pb2_1_approach1_N" + str(N) +
                    ".png")

    plt.show()

    # =======================================================================================
    # ============== Approach 2 =============================================================
    # =======================================================================================
    # Hint: the dlqr function return: i) P which is the solution to the DARE,
    # ii) the optimal feedback gain K and iii) the closed-loop system matrix Acl = (A-BK)
    P, K, Acl = dlqr(A, B, Q, R)
    Ftot = np.vstack((Fx, np.dot(Fu, -K)))
    btot = np.hstack((bx, bu))
    Qf = np.eye(n)  # filled in here

    poli = polytope(Ftot, btot)
    F, b = poli.computeO_inf(Acl)
    # Hint: this function returns F and b so that compute O_inf = \{ x | Fx <= b\}
    # matrix F define the set, want to use the same matrice
    # matrix define the set
    # O_inf a set if F is identiy, b O all neg value use the similar to

    # Hint: the terminal set is X_f =\{x | F_f x <= b_f\}
    Ff = F  # filled in here
    bf = b  # filled in here
Ejemplo n.º 11
0
 def _design_controller(self, states, inputs, transitions, rng):
     P, self._optimal_K = utils.dlqr(self._A_star, self._B_star, self._Q,
                                     self._R)
     return (self._A_star, self._B_star, (self._sigma_w**2) * np.trace(P))
Ejemplo n.º 12
0
    def _design_controller(self, states, inputs, transitions, rng):

        logger = self._get_logger()

        epoch_id = self._epoch_idx + 1 if self._has_primed else 0

        logger.debug(
            "_design_controller(epoch={}): have {} points for regression".
            format(epoch_id, inputs.shape[0]))

        # do a least squares fit and design based on the nominal
        Anom, Bnom, emp_cov = utils.solve_least_squares(states,
                                                        inputs,
                                                        transitions,
                                                        reg=self._reg)

        if not self._has_primed:
            self._emp_cov = np.array(emp_cov)
            self._last_emp_cov = np.array(emp_cov)

        emp_cov /= inputs.shape[0]  # normalize by T to improve numerics

        theta_nom = np.hstack((Anom, Bnom))
        theta_star = np.hstack((self._A_star, self._B_star))
        delta = theta_nom - theta_star
        actual_error = np.trace(delta.dot(emp_cov.dot(delta.T)))
        eps = self._actual_error_multiplier * actual_error
        logger.info(
            "_design_controller(epoch={}): actual weighted error is {}, eps is {}"
            .format(epoch_id, actual_error, eps))

        def is_contained_in_confidence_set(A, B):
            theta_ab = np.hstack((A, B))
            this_delta = theta_ab - theta_nom
            return np.trace(this_delta.dot(emp_cov).dot(this_delta.T)) <= eps

        inv_sqrt_emp_cov = utils.pd_inv_sqrt(emp_cov)
        MAX_TRIES = 100000
        rng = self._get_rng(rng)
        success = False
        for rejection_idx in range(MAX_TRIES):
            eta = rng.normal(size=theta_nom.shape)
            eta *= np.power(
                rng.uniform(), 1 /
                (theta_nom.shape[0] * theta_nom.shape[1])) / np.linalg.norm(
                    eta, ord="fro")
            theta_tilde = theta_nom + np.sqrt(eps) * eta.dot(inv_sqrt_emp_cov)
            A_tilde = theta_tilde[:, :self._n]
            B_tilde = theta_tilde[:, self._n:]
            if is_contained_in_confidence_set(A_tilde, B_tilde):
                A_ts = A_tilde
                B_ts = B_tilde
                success = True
                break

        if not success:
            logger.warn(
                "_design_controller(epoch={}): was unable to rejection sample after {} attempts"
                .format(epoch_id, MAX_TRIES))
            raise Exception("this is a very low probability event")

        else:
            logger.info(
                "_design_controller(epoch={}): took {} attempts to rejection sample"
                .format(epoch_id, rejection_idx + 1))

        _, K = utils.dlqr(A_ts, B_ts, self._Q, self._R)
        self._current_K = K

        # compute the infinite horizon cost of this controller
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K,
                              self._Q, self._R, self._sigma_w)

        rho_true = utils.spectral_radius(self._A_star +
                                         self._B_star.dot(self._current_K))
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, rho_true))

        return (Anom, Bnom, Jnom)