コード例 #1
0
    def _design_controller(self, states, inputs, transitions, rng):
        T, n = states.shape
        _, d = inputs.shape

        phi_dim = n * (n + 1) // 2
        psi_dim = (n + d) * (n + d + 1) // 2

        logger = self._get_logger()
        logger.info("_design_controller(epoch={}): n_transitions={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, states.shape[0]))

        if self._Phis is None:
            assert self._Phis_plus is None
            assert self._costs is None
            assert self._Psis is None
            assert self._G_sum is None
            self._Phis = np.zeros((states.shape[0], phi_dim))
            self._Phis_plus = np.zeros((states.shape[0], phi_dim))
            self._Psis = np.zeros((states.shape[0], psi_dim))
            self._G_sum = np.zeros((n + d, n + d))
            for i in range(states.shape[0]):
                self._Phis[i] = phi(states[i])
                self._Phis_plus[i] = phi(transitions[i])
                self._Psis[i] = psi(states[i], inputs[i])
            self._costs = (np.diag((states @ self._Q) @ states.T) + np.diag(
                (inputs @ self._R) @ inputs.T))
        else:
            assert self._Phis_plus is not None
            assert self._costs is not None
            assert self._Psis is not None
            assert self._Phis.shape[0] == self._Psis.shape[0]
            assert self._Phis.shape[0] == self._Phis_plus.shape[0]
            base_idx = self._Phis.shape[0]
            newPhis = np.zeros((states.shape[0] - base_idx, phi_dim))
            newPhis_plus = np.zeros((states.shape[0] - base_idx, phi_dim))
            newPsis = np.zeros((states.shape[0] - base_idx, psi_dim))
            for i in range(newPhis.shape[0]):
                newPhis[i] = phi(states[base_idx + i])
                newPhis_plus[i] = phi(transitions[base_idx + i])
                newPsis[i] = psi(states[base_idx + i], inputs[base_idx + i])
            newCosts = (np.diag(
                (states[base_idx:] @ self._Q) @ states[base_idx:].T) + np.diag(
                    (inputs[base_idx:] @ self._R) @ inputs[base_idx:].T))
            self._Phis = np.vstack((self._Phis, newPhis))
            self._Phis_plus = np.vstack((self._Phis_plus, newPhis_plus))
            self._Psis = np.vstack((self._Psis, newPsis))
            self._costs = np.hstack((self._costs, newCosts))

        Gt = self._estimate_G(self._Phis, self._Phis_plus, self._Psis,
                              self._costs, self._sigma_w, n)
        self._G_sum += Gt
        self._Kt = -np.linalg.solve(self._G_sum[d:, d:], self._G_sum[d:, :n])

        rho_true = utils.spectral_radius(self._A_star +
                                         self._B_star @ self._Kt)
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, rho_true))
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._Kt, self._Q,
                              self._R, self._sigma_w)
        return (self._A_star, self._B_star, Jnom)
コード例 #2
0
    def _design_controller(self, states, inputs, transitions, rng):
        logger = self._get_logger()
        logger.debug("_design_controller: have {} points for regression".format(inputs.shape[0]))

        # TODO(stephentu):
        # Currently I am using the algorithm of Abbasi-Yadkori and Szepesvari.
        # We should also try the subtly different algorithm in
        # https://arxiv.org/pdf/1711.07230.pdf.

        # fit the data
        Anom, Bnom, emp_cov = utils.solve_least_squares(states, inputs, transitions, reg=self._reg)

        if not self._has_primed:
            self._emp_cov = np.array(emp_cov)
            self._last_emp_cov = np.array(emp_cov)

        emp_cov /= inputs.shape[0] # normalize by T to improve numerics

        theta_nom = np.hstack((Anom, Bnom))
        theta_star = np.hstack((self._A_star, self._B_star))
        delta = theta_nom - theta_star
        actual_error = np.trace(delta.dot(emp_cov.dot(delta.T)))
        eps = self._actual_error_multiplier * actual_error
        logger.info("_design_controller: actual weighted error is {}, eps is {}".format(actual_error, eps))

        n, p = self._n, self._p

        def projection_operator(A, B):
            M = np.hstack((A, B))
            theta = utils.project_weighted_ball(M, theta_nom, emp_cov, eps)
            return theta[:, :n], theta[:, n:]

        A_ofu, B_ofu = ofu_pgd(
                Q=self._Q,
                R=self._R,
                Ahat=Anom,
                Bhat=Bnom,
                projection_operator=projection_operator,
                logger=logger,
                num_restarts=self._num_restarts)

        theta_ofu = np.hstack((A_ofu, B_ofu))
        delta_ofu = theta_ofu - theta_nom
        TOL = 1e-5
        assert np.trace(delta_ofu.dot(emp_cov.dot(delta_ofu.T))) <= eps + TOL

        _, K = utils.dlqr(A_ofu, B_ofu, self._Q, self._R)
        self._current_K = K

        # compute the infinite horizon cost of this controller
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K, self._Q, self._R, self._sigma_w)

        # for debugging purposes,
        # check to see if this controller will stabilize the true system
        rho_true = utils.spectral_radius(self._A_star + self._B_star.dot(self._current_K))
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0,
            rho_true))

        return (Anom, Bnom, Jnom)
コード例 #3
0
    def _design_controller(self, states, inputs, transitions, rng):
        logger = self._get_logger()

        Anom, Bnom, _ = utils.solve_least_squares(states,
                                                  inputs,
                                                  transitions,
                                                  reg=self._reg)
        eps_A = np.linalg.norm(Anom - self._A_star, ord=2)
        eps_B = np.linalg.norm(Bnom - self._B_star, ord=2)

        effective_eps_A = self._actual_error_multiplier * eps_A
        effective_eps_B = self._actual_error_multiplier * eps_B

        epoch_id = self._epoch_idx + 1 if self._has_primed else 0

        logger.info(
            "_design_controller(epoch={}): effective_eps_A={}, effective_eps_B={}"
            .format(epoch_id, effective_eps_A, effective_eps_B))

        is_feasible, _, _, K = sls_common_lyapunov(Anom,
                                                   Bnom,
                                                   self._Q,
                                                   self._R,
                                                   effective_eps_A,
                                                   effective_eps_B,
                                                   tau=0.999,
                                                   logger=logger)

        if not is_feasible:

            try:
                self._current_K
                # keep current controller
                assert self._current_K is not None
                logger.warn(
                    "_design_controller(epoch={}): SLS not feasible: keeping current controller"
                    .format(epoch_id))
                self._midway_infeasible += 1
            except AttributeError:
                logger.warn(
                    "_design_controller(epoch={}): SLS not feasible: no existing controller to fallback on, effective_eps_A={}, effective_eps_B={}"
                    .format(epoch_id, effective_eps_A, effective_eps_B))
                raise SLSInfeasibleException()

        else:
            logger.info(
                "_design_controller(epoch={}): SLS was feasible. updating controller"
                .format(epoch_id))
            self._current_K = K

        # compute the infinite horizon cost of this controller
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K,
                              self._Q, self._R, self._sigma_w)

        return Anom, Bnom, Jnom
コード例 #4
0
    def _design_controller(self, states, inputs, transitions, rng):
        T, n = states.shape
        _, d = inputs.shape

        lifted_dim = (n + d) * (n + d + 1) // 2

        logger = self._get_logger()
        logger.info("_design_controller(epoch={}): n_transitions={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, states.shape[0]))

        if self._Phis is None:
            assert self._costs is None
            self._Phis = np.zeros((states.shape[0], lifted_dim))
            for i in range(states.shape[0]):
                self._Phis[i] = phi(states[i], inputs[i])
            self._costs = (np.diag((states @ self._Q) @ states.T) + np.diag(
                (inputs @ self._R) @ inputs.T))
        else:
            assert self._costs is not None
            base_idx = self._Phis.shape[0]
            newPhis = np.zeros((states.shape[0] - base_idx, lifted_dim))
            for i in range(newPhis.shape[0]):
                newPhis[i] = phi(states[base_idx + i], inputs[base_idx + i])
            newCosts = (np.diag(
                (states[base_idx:] @ self._Q) @ states[base_idx:].T) + np.diag(
                    (inputs[base_idx:] @ self._R) @ inputs[base_idx:].T))
            self._Phis = np.vstack((self._Phis, newPhis))
            self._costs = np.hstack((self._costs, newCosts))

        # TODO(stephentu):
        # this is a hack
        if T <= 2000:
            num_iters = self._num_PI_iters
        elif T <= 4000:
            num_iters = self._num_PI_iters + 1
        elif T <= 6000:
            num_iters = self._num_PI_iters + 2
        else:
            num_iters = self._num_PI_iters + 3

        logger.info("num_iters={}".format(num_iters))
        for i in range(num_iters):
            Qt = self._lstdq(self._Phis, transitions, self._costs, self._Kt,
                             self._sigma_w, self._mu, self._L)
            Ktp1 = -scipy.linalg.solve(Qt[n:, n:], Qt[:n, n:].T, sym_pos=True)
            self._Kt = Ktp1

        rho_true = utils.spectral_radius(self._A_star +
                                         self._B_star @ self._Kt)
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, rho_true))
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._Kt, self._Q,
                              self._R, self._sigma_w)
        return (self._A_star, self._B_star, Jnom)
コード例 #5
0
def test_sls_h2_cost():

    rng = np.random.RandomState(805238)

    Astar = np.array([[1.01, 0.01, 0], [-0.01, 1.01, 0.01], [0, -0.01, 1.01]])
    Bstar = np.eye(3)

    eps_A = 0.00001
    eps_B = 0.00001
    Ahat = utils.sample_2_to_2_ball(Astar, eps_A, rng)
    Bhat = utils.sample_2_to_2_ball(Bstar, eps_B, rng)

    Q = np.eye(3)
    R = np.eye(3)

    n = 3
    p = 3

    T = 15

    is_feasible, _, _, K_cl = sls_common_lyapunov(Ahat, Bhat, Q, R, eps_A,
                                                  eps_B, 0.999, None)

    assert is_feasible

    P_star, K_star = utils.dlqr(Astar, Bstar, Q, R)
    J_star = np.trace(P_star)

    assert np.allclose(J_star, utils.LQR_cost(Astar, Bstar, K_star, Q, R, 1))
    assert np.allclose(J_star,
                       utils.LQR_cost(Astar, Bstar, K_cl, Q, R, 1),
                       atol=1e-6)

    is_feasible, _, Phi_x, Phi_u = sls_synth(Q, R, Ahat, Bhat, eps_A, eps_B, T,
                                             0.999, 0.5)

    assert np.allclose(J_star,
                       h2_squared_norm(Astar, Bstar, Phi_x, Phi_u, Q, R, 1),
                       atol=1e-6)
コード例 #6
0
    def _design_controller(self, states, inputs, transitions, rng):

        logger = self._get_logger()

        epoch_id = self._epoch_idx + 1 if self._has_primed else 0

        logger.debug(
            "_design_controller(epoch={}): have {} points for regression".
            format(epoch_id, inputs.shape[0]))

        # do a least squares fit and design based on the nominal
        Anom, Bnom, emp_cov = utils.solve_least_squares(states,
                                                        inputs,
                                                        transitions,
                                                        reg=self._reg)

        if not self._has_primed:
            self._emp_cov = np.array(emp_cov)
            self._last_emp_cov = np.array(emp_cov)

        emp_cov /= inputs.shape[0]  # normalize by T to improve numerics

        theta_nom = np.hstack((Anom, Bnom))
        theta_star = np.hstack((self._A_star, self._B_star))
        delta = theta_nom - theta_star
        actual_error = np.trace(delta.dot(emp_cov.dot(delta.T)))
        eps = self._actual_error_multiplier * actual_error
        logger.info(
            "_design_controller(epoch={}): actual weighted error is {}, eps is {}"
            .format(epoch_id, actual_error, eps))

        def is_contained_in_confidence_set(A, B):
            theta_ab = np.hstack((A, B))
            this_delta = theta_ab - theta_nom
            return np.trace(this_delta.dot(emp_cov).dot(this_delta.T)) <= eps

        inv_sqrt_emp_cov = utils.pd_inv_sqrt(emp_cov)
        MAX_TRIES = 100000
        rng = self._get_rng(rng)
        success = False
        for rejection_idx in range(MAX_TRIES):
            eta = rng.normal(size=theta_nom.shape)
            eta *= np.power(
                rng.uniform(), 1 /
                (theta_nom.shape[0] * theta_nom.shape[1])) / np.linalg.norm(
                    eta, ord="fro")
            theta_tilde = theta_nom + np.sqrt(eps) * eta.dot(inv_sqrt_emp_cov)
            A_tilde = theta_tilde[:, :self._n]
            B_tilde = theta_tilde[:, self._n:]
            if is_contained_in_confidence_set(A_tilde, B_tilde):
                A_ts = A_tilde
                B_ts = B_tilde
                success = True
                break

        if not success:
            logger.warn(
                "_design_controller(epoch={}): was unable to rejection sample after {} attempts"
                .format(epoch_id, MAX_TRIES))
            raise Exception("this is a very low probability event")

        else:
            logger.info(
                "_design_controller(epoch={}): took {} attempts to rejection sample"
                .format(epoch_id, rejection_idx + 1))

        _, K = utils.dlqr(A_ts, B_ts, self._Q, self._R)
        self._current_K = K

        # compute the infinite horizon cost of this controller
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K,
                              self._Q, self._R, self._sigma_w)

        rho_true = utils.spectral_radius(self._A_star +
                                         self._B_star.dot(self._current_K))
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, rho_true))

        return (Anom, Bnom, Jnom)