def _design_controller(self, states, inputs, transitions, rng):
        T, n = states.shape
        _, d = inputs.shape

        phi_dim = n * (n + 1) // 2
        psi_dim = (n + d) * (n + d + 1) // 2

        logger = self._get_logger()
        logger.info("_design_controller(epoch={}): n_transitions={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, states.shape[0]))

        if self._Phis is None:
            assert self._Phis_plus is None
            assert self._costs is None
            assert self._Psis is None
            assert self._G_sum is None
            self._Phis = np.zeros((states.shape[0], phi_dim))
            self._Phis_plus = np.zeros((states.shape[0], phi_dim))
            self._Psis = np.zeros((states.shape[0], psi_dim))
            self._G_sum = np.zeros((n + d, n + d))
            for i in range(states.shape[0]):
                self._Phis[i] = phi(states[i])
                self._Phis_plus[i] = phi(transitions[i])
                self._Psis[i] = psi(states[i], inputs[i])
            self._costs = (np.diag((states @ self._Q) @ states.T) + np.diag(
                (inputs @ self._R) @ inputs.T))
        else:
            assert self._Phis_plus is not None
            assert self._costs is not None
            assert self._Psis is not None
            assert self._Phis.shape[0] == self._Psis.shape[0]
            assert self._Phis.shape[0] == self._Phis_plus.shape[0]
            base_idx = self._Phis.shape[0]
            newPhis = np.zeros((states.shape[0] - base_idx, phi_dim))
            newPhis_plus = np.zeros((states.shape[0] - base_idx, phi_dim))
            newPsis = np.zeros((states.shape[0] - base_idx, psi_dim))
            for i in range(newPhis.shape[0]):
                newPhis[i] = phi(states[base_idx + i])
                newPhis_plus[i] = phi(transitions[base_idx + i])
                newPsis[i] = psi(states[base_idx + i], inputs[base_idx + i])
            newCosts = (np.diag(
                (states[base_idx:] @ self._Q) @ states[base_idx:].T) + np.diag(
                    (inputs[base_idx:] @ self._R) @ inputs[base_idx:].T))
            self._Phis = np.vstack((self._Phis, newPhis))
            self._Phis_plus = np.vstack((self._Phis_plus, newPhis_plus))
            self._Psis = np.vstack((self._Psis, newPsis))
            self._costs = np.hstack((self._costs, newCosts))

        Gt = self._estimate_G(self._Phis, self._Phis_plus, self._Psis,
                              self._costs, self._sigma_w, n)
        self._G_sum += Gt
        self._Kt = -np.linalg.solve(self._G_sum[d:, d:], self._G_sum[d:, :n])

        rho_true = utils.spectral_radius(self._A_star +
                                         self._B_star @ self._Kt)
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, rho_true))
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._Kt, self._Q,
                              self._R, self._sigma_w)
        return (self._A_star, self._B_star, Jnom)
    def _design_controller(self, states, inputs, transitions, rng):
        logger = self._get_logger()
        logger.debug("_design_controller: have {} points for regression".format(inputs.shape[0]))

        # TODO(stephentu):
        # Currently I am using the algorithm of Abbasi-Yadkori and Szepesvari.
        # We should also try the subtly different algorithm in
        # https://arxiv.org/pdf/1711.07230.pdf.

        # fit the data
        Anom, Bnom, emp_cov = utils.solve_least_squares(states, inputs, transitions, reg=self._reg)

        if not self._has_primed:
            self._emp_cov = np.array(emp_cov)
            self._last_emp_cov = np.array(emp_cov)

        emp_cov /= inputs.shape[0] # normalize by T to improve numerics

        theta_nom = np.hstack((Anom, Bnom))
        theta_star = np.hstack((self._A_star, self._B_star))
        delta = theta_nom - theta_star
        actual_error = np.trace(delta.dot(emp_cov.dot(delta.T)))
        eps = self._actual_error_multiplier * actual_error
        logger.info("_design_controller: actual weighted error is {}, eps is {}".format(actual_error, eps))

        n, p = self._n, self._p

        def projection_operator(A, B):
            M = np.hstack((A, B))
            theta = utils.project_weighted_ball(M, theta_nom, emp_cov, eps)
            return theta[:, :n], theta[:, n:]

        A_ofu, B_ofu = ofu_pgd(
                Q=self._Q,
                R=self._R,
                Ahat=Anom,
                Bhat=Bnom,
                projection_operator=projection_operator,
                logger=logger,
                num_restarts=self._num_restarts)

        theta_ofu = np.hstack((A_ofu, B_ofu))
        delta_ofu = theta_ofu - theta_nom
        TOL = 1e-5
        assert np.trace(delta_ofu.dot(emp_cov.dot(delta_ofu.T))) <= eps + TOL

        _, K = utils.dlqr(A_ofu, B_ofu, self._Q, self._R)
        self._current_K = K

        # compute the infinite horizon cost of this controller
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K, self._Q, self._R, self._sigma_w)

        # for debugging purposes,
        # check to see if this controller will stabilize the true system
        rho_true = utils.spectral_radius(self._A_star + self._B_star.dot(self._current_K))
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0,
            rho_true))

        return (Anom, Bnom, Jnom)
Beispiel #3
0
def EHET_EH_ET(H, T, e):
    '''
  rho(EHET - EH + ET).
  '''
    E = np.diag(e)
    ET = E.dot(T)
    EH = E.dot(H)
    A = EH.dot(ET) - EH + ET
    r = utils.spectral_radius(A)
    return r
Beispiel #4
0
    def _design_controller(self, states, inputs, transitions, rng):
        T, n = states.shape
        _, d = inputs.shape

        lifted_dim = (n + d) * (n + d + 1) // 2

        logger = self._get_logger()
        logger.info("_design_controller(epoch={}): n_transitions={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, states.shape[0]))

        if self._Phis is None:
            assert self._costs is None
            self._Phis = np.zeros((states.shape[0], lifted_dim))
            for i in range(states.shape[0]):
                self._Phis[i] = phi(states[i], inputs[i])
            self._costs = (np.diag((states @ self._Q) @ states.T) + np.diag(
                (inputs @ self._R) @ inputs.T))
        else:
            assert self._costs is not None
            base_idx = self._Phis.shape[0]
            newPhis = np.zeros((states.shape[0] - base_idx, lifted_dim))
            for i in range(newPhis.shape[0]):
                newPhis[i] = phi(states[base_idx + i], inputs[base_idx + i])
            newCosts = (np.diag(
                (states[base_idx:] @ self._Q) @ states[base_idx:].T) + np.diag(
                    (inputs[base_idx:] @ self._R) @ inputs[base_idx:].T))
            self._Phis = np.vstack((self._Phis, newPhis))
            self._costs = np.hstack((self._costs, newCosts))

        # TODO(stephentu):
        # this is a hack
        if T <= 2000:
            num_iters = self._num_PI_iters
        elif T <= 4000:
            num_iters = self._num_PI_iters + 1
        elif T <= 6000:
            num_iters = self._num_PI_iters + 2
        else:
            num_iters = self._num_PI_iters + 3

        logger.info("num_iters={}".format(num_iters))
        for i in range(num_iters):
            Qt = self._lstdq(self._Phis, transitions, self._costs, self._Kt,
                             self._sigma_w, self._mu, self._L)
            Ktp1 = -scipy.linalg.solve(Qt[n:, n:], Qt[:n, n:].T, sym_pos=True)
            self._Kt = Ktp1

        rho_true = utils.spectral_radius(self._A_star +
                                         self._B_star @ self._Kt)
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, rho_true))
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._Kt, self._Q,
                              self._R, self._sigma_w)
        return (self._A_star, self._B_star, Jnom)
Beispiel #5
0
 def _generate_w(self, w):
     """
     Generate W matrix
     :return:
     """
     if w is None:
         w = self.generate_w(self.output_dim, self.w_sparsity)
     else:
         if callable(w):
             w = w(self.output_dim)
     # Scale it to spectral radius
     w *= self.spectral_radius / utils.spectral_radius(w)
     return w.requires_grad_(requires_grad=False)
Beispiel #6
0
def spectral(opt, model):
    '''
  Construct update matrix and calculate eigenvalues.
  Compare with finite difference. Plot eigenvalues.
  '''
    image_size = 10
    # Remove activation and bc_mask
    activation = model.get_activation()
    model.change_activation('none')
    T = utils.construct_matrix_wraparound(image_size, utils.fd_step)
    H = utils.construct_matrix_wraparound(image_size, model.H)
    np.save('tmp/H.npy', H)
    np.save('tmp/T.npy', T)

    r = utils.spectral_radius(H.dot(T) + T - H)
    print('rho(HT + T - H):', r)

    # Different E's
    size = image_size * image_size
    e = np.ones(size)

    print('Square geometry')
    pad = 2
    e_square = np.zeros((image_size, image_size))
    e_square[pad:-pad, pad:-pad] = 1
    r = EHET_EH_ET(H, T, e_square.flatten())
    assert r < 1

    test_specific(H, T, image_size)
    return

    for n_zeros in range(1, 11):
        print('\n###############################\n{} zeros\n'.format(n_zeros))
        for i in range(1000):
            e2 = np.ones((image_size - 2 * pad)**2)
            indices = np.random.choice((image_size - 2 * pad)**2,
                                       n_zeros,
                                       replace=False)
            e2[indices] = 0
            e1 = e_square.copy()
            e1[pad:-pad, pad:-pad] = e2.reshape(image_size - 2 * pad,
                                                image_size - 2 * pad)
            r = EHET_EH_ET(H, T, e1.flatten())
            if r >= 1:
                print('************ Rho > 1 ************')
                print(e1.reshape((image_size, image_size)))
                print('*********************************')
def test_rls():

    rng = np.random.RandomState(657423)

    n, p = 3, 2

    A = rng.normal(size=(n, n))
    B = rng.normal(size=(n, p))
    _, K = utils.dlqr(A, B)
    assert utils.spectral_radius(A + B.dot(K)) <= 1

    lam = 1e-5

    rls = utils.RecursiveLeastSquaresEstimator(n, p, lam)

    states = []
    inputs = []
    transitions = []
    xcur = np.zeros((n, ))
    for _ in range(100):
        ucur = K.dot(xcur) + rng.normal(size=(p, ))
        xnext = A.dot(xcur) + B.dot(ucur) + rng.normal(size=(n, ))
        states.append(xcur)
        inputs.append(ucur)
        transitions.append(xnext)
        rls.update(xcur, ucur, xnext)
        xcur = xnext

    # LS estimate
    Ahat_ls, Bhat_ls, Cov_ls = utils.solve_least_squares(np.array(states),
                                                         np.array(inputs),
                                                         np.array(transitions),
                                                         reg=lam)

    # RLS estimate
    Ahat_rls, Bhat_rls, Cov_rls = rls.get_estimate()

    assert np.allclose(Ahat_ls, Ahat_rls)
    assert np.allclose(Bhat_ls, Bhat_rls)
    assert np.allclose(Cov_ls, Cov_rls)

    for _ in range(100):
        ucur = K.dot(xcur) + rng.normal(size=(p, ))
        xnext = A.dot(xcur) + B.dot(ucur) + rng.normal(size=(n, ))
        states.append(xcur)
        inputs.append(ucur)
        transitions.append(xnext)
        rls.update(xcur, ucur, xnext)
        xcur = xnext

    # LS estimate
    Ahat_ls, Bhat_ls, Cov_ls = utils.solve_least_squares(np.array(states),
                                                         np.array(inputs),
                                                         np.array(transitions),
                                                         reg=lam)

    # RLS estimate
    Ahat_rls, Bhat_rls, Cov_rls = rls.get_estimate()

    assert np.allclose(Ahat_ls, Ahat_rls)
    assert np.allclose(Bhat_ls, Bhat_rls)
    assert np.allclose(Cov_ls, Cov_rls)
Beispiel #8
0
 def get_spectral_radius(self):
     return utils.spectral_radius(self.w)
Beispiel #9
0
    def _design_controller(self, states, inputs, transitions, rng):

        logger = self._get_logger()

        epoch_id = self._epoch_idx + 1 if self._has_primed else 0

        logger.debug(
            "_design_controller(epoch={}): have {} points for regression".
            format(epoch_id, inputs.shape[0]))

        # do a least squares fit and design based on the nominal
        Anom, Bnom, emp_cov = utils.solve_least_squares(states,
                                                        inputs,
                                                        transitions,
                                                        reg=self._reg)

        if not self._has_primed:
            self._emp_cov = np.array(emp_cov)
            self._last_emp_cov = np.array(emp_cov)

        emp_cov /= inputs.shape[0]  # normalize by T to improve numerics

        theta_nom = np.hstack((Anom, Bnom))
        theta_star = np.hstack((self._A_star, self._B_star))
        delta = theta_nom - theta_star
        actual_error = np.trace(delta.dot(emp_cov.dot(delta.T)))
        eps = self._actual_error_multiplier * actual_error
        logger.info(
            "_design_controller(epoch={}): actual weighted error is {}, eps is {}"
            .format(epoch_id, actual_error, eps))

        def is_contained_in_confidence_set(A, B):
            theta_ab = np.hstack((A, B))
            this_delta = theta_ab - theta_nom
            return np.trace(this_delta.dot(emp_cov).dot(this_delta.T)) <= eps

        inv_sqrt_emp_cov = utils.pd_inv_sqrt(emp_cov)
        MAX_TRIES = 100000
        rng = self._get_rng(rng)
        success = False
        for rejection_idx in range(MAX_TRIES):
            eta = rng.normal(size=theta_nom.shape)
            eta *= np.power(
                rng.uniform(), 1 /
                (theta_nom.shape[0] * theta_nom.shape[1])) / np.linalg.norm(
                    eta, ord="fro")
            theta_tilde = theta_nom + np.sqrt(eps) * eta.dot(inv_sqrt_emp_cov)
            A_tilde = theta_tilde[:, :self._n]
            B_tilde = theta_tilde[:, self._n:]
            if is_contained_in_confidence_set(A_tilde, B_tilde):
                A_ts = A_tilde
                B_ts = B_tilde
                success = True
                break

        if not success:
            logger.warn(
                "_design_controller(epoch={}): was unable to rejection sample after {} attempts"
                .format(epoch_id, MAX_TRIES))
            raise Exception("this is a very low probability event")

        else:
            logger.info(
                "_design_controller(epoch={}): took {} attempts to rejection sample"
                .format(epoch_id, rejection_idx + 1))

        _, K = utils.dlqr(A_ts, B_ts, self._Q, self._R)
        self._current_K = K

        # compute the infinite horizon cost of this controller
        Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K,
                              self._Q, self._R, self._sigma_w)

        rho_true = utils.spectral_radius(self._A_star +
                                         self._B_star.dot(self._current_K))
        logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format(
            self._epoch_idx + 1 if self._has_primed else 0, rho_true))

        return (Anom, Bnom, Jnom)