def gradient(Q, R, A, B): """Compute the gradient of th -> Tr(P(th)) It is assumed that Q, R are both invertible. """ P, K = utils.dlqr(A, B, Q, R) A_c = A + B.dot(K) n, p = B.shape ret = np.zeros((n, n + p)) # TODO: we should invert the operator X -> A_c^T X A_c - X # once and then use it to solve many linear equations, # rather than repeating the inversion many times # See Eq. (13) of # https://arxiv.org/pdf/1703.08972.pdf for idx in range(n): for jdx in range(n + p): U = np.zeros((n, n + p)) U[idx, jdx] = 1 target = A_c.T.dot(P.dot(U)).dot(np.vstack((np.eye(n), K))) target += target.T DU = utils.solve_discrete_lyapunov(A_c, target) ret[idx, jdx] = np.trace(DU) return ret
def _main(): import examples A_star, B_star = examples.unstable_laplacian_dynamics() # define costs Q = 1e-3 * np.eye(3) R = np.eye(3) # initial controller _, K_init = utils.dlqr(A_star, B_star, 1e-3*np.eye(3), np.eye(3)) rng = np.random env = OFUStrategy(Q=Q, R=R, A_star=A_star, B_star=B_star, sigma_w=1, reg=1e-5, actual_error_multiplier=1, rls_lam=None) env.reset(rng) env.prime(100, K_init, 0.1, rng) for idx in range(500): env.step(rng)
def _design_controller(self, states, inputs, transitions, rng): logger = self._get_logger() logger.debug("_design_controller: have {} points for regression".format(inputs.shape[0])) # TODO(stephentu): # Currently I am using the algorithm of Abbasi-Yadkori and Szepesvari. # We should also try the subtly different algorithm in # https://arxiv.org/pdf/1711.07230.pdf. # fit the data Anom, Bnom, emp_cov = utils.solve_least_squares(states, inputs, transitions, reg=self._reg) if not self._has_primed: self._emp_cov = np.array(emp_cov) self._last_emp_cov = np.array(emp_cov) emp_cov /= inputs.shape[0] # normalize by T to improve numerics theta_nom = np.hstack((Anom, Bnom)) theta_star = np.hstack((self._A_star, self._B_star)) delta = theta_nom - theta_star actual_error = np.trace(delta.dot(emp_cov.dot(delta.T))) eps = self._actual_error_multiplier * actual_error logger.info("_design_controller: actual weighted error is {}, eps is {}".format(actual_error, eps)) n, p = self._n, self._p def projection_operator(A, B): M = np.hstack((A, B)) theta = utils.project_weighted_ball(M, theta_nom, emp_cov, eps) return theta[:, :n], theta[:, n:] A_ofu, B_ofu = ofu_pgd( Q=self._Q, R=self._R, Ahat=Anom, Bhat=Bnom, projection_operator=projection_operator, logger=logger, num_restarts=self._num_restarts) theta_ofu = np.hstack((A_ofu, B_ofu)) delta_ofu = theta_ofu - theta_nom TOL = 1e-5 assert np.trace(delta_ofu.dot(emp_cov.dot(delta_ofu.T))) <= eps + TOL _, K = utils.dlqr(A_ofu, B_ofu, self._Q, self._R) self._current_K = K # compute the infinite horizon cost of this controller Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K, self._Q, self._R, self._sigma_w) # for debugging purposes, # check to see if this controller will stabilize the true system rho_true = utils.spectral_radius(self._A_star + self._B_star.dot(self._current_K)) logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format( self._epoch_idx + 1 if self._has_primed else 0, rho_true)) return (Anom, Bnom, Jnom)
def test_sls_common_lyapunov(): rng = np.random.RandomState(237853) Ahat = np.array([[1.01, 0.01, 0], [-0.01, 1.01, 0.01], [0, -0.01, 1.01]]) Bhat = np.eye(3) eps_A = 0.0001 eps_B = 0.0001 Ahat = utils.sample_2_to_2_ball(Ahat, eps_A, rng) Bhat = utils.sample_2_to_2_ball(Bhat, eps_B, rng) Q = np.eye(3) R = np.eye(3) n = 3 p = 3 is_feasible, _, P, K = sls_common_lyapunov(Ahat, Bhat, Q, R, eps_A, eps_B, 0.999, None) assert is_feasible P_nom, K_nom = utils.dlqr(Ahat, Bhat, Q, R) # THIS FAILS #assert np.allclose(np.trace(P), np.trace(P_nom)) assert np.allclose(K, K_nom, atol=1e-6)
def test_sls_synth(): rng = np.random.RandomState(893754) Ahat = np.array([[1.01, 0.01, 0], [-0.01, 1.01, 0.01], [0, -0.01, 1.01]]) Bhat = np.eye(3) eps_A = 0.0001 eps_B = 0.0001 Ahat = utils.sample_2_to_2_ball(Ahat, eps_A, rng) Bhat = utils.sample_2_to_2_ball(Bhat, eps_B, rng) Q = np.eye(3) R = np.eye(3) alpha = 0.5 gamma = 0.98 n = 3 p = 3 T = 15 is_feasible, sqrt_htwo_cost, Phi_x, Phi_u = sls_synth( Q, R, Ahat, Bhat, eps_A, eps_B, T, gamma, alpha) assert is_feasible, "should be feasible" P_nom, K_nom = utils.dlqr(Ahat, Bhat, Q, R) assert np.allclose(sqrt_htwo_cost**2, np.trace(P_nom)) L = Ahat + Bhat.dot(K_nom) cur = np.eye(L.shape[0]) coeffs = [np.array(cur)] for _ in range(T): cur = L.dot(cur) coeffs.append(np.array(cur)) for idx in range(T): expected = coeffs[idx] actual = Phi_x[idx * n:(idx + 1) * n, :] assert np.allclose(expected, actual, atol=1e-5) A_k, B_k, C_k, D_k = make_state_space_controller(Phi_x, Phi_u, n, p) A_cl = np.block([[Ahat + Bhat.dot(D_k), Bhat.dot(C_k)], [B_k, A_k]]) cur = np.eye(A_cl.shape[0]) cl_coeffs = [np.eye(n)] for _ in range(T): cur = A_cl.dot(cur) cl_coeffs.append(np.array(cur[:n, :n])) for idx in range(T): expected = coeffs[idx] actual = cl_coeffs[idx] assert np.allclose(expected, actual, atol=1e-5)
def _main(): import examples A_star, B_star = examples.unstable_laplacian_dynamics() # define costs Q = 1e-3 * np.eye(3) R = np.eye(3) # initial controller _, K_init = utils.dlqr(A_star, B_star, 1e-3 * np.eye(3), np.eye(3)) rng = np.random env = SLS_FIRStrategy(Q=Q, R=R, A_star=A_star, B_star=B_star, sigma_w=1, sigma_explore=0.1, reg=1e-5, epoch_multiplier=10, truncation_length=12, actual_error_multiplier=1, rls_lam=None) env.reset(rng) env.prime(250, K_init, 0.5, rng) for idx in range(500): env.step(rng) env = SLS_CommonLyapunovStrategy(Q=Q, R=R, A_star=A_star, B_star=B_star, sigma_w=1, sigma_explore=0.1, reg=1e-5, epoch_multiplier=10, actual_error_multiplier=1, rls_lam=None) env.reset(rng) env.prime(250, K_init, 0.5, rng) for idx in range(500): env.step(rng)
def test_sls_h2_cost(): rng = np.random.RandomState(805238) Astar = np.array([[1.01, 0.01, 0], [-0.01, 1.01, 0.01], [0, -0.01, 1.01]]) Bstar = np.eye(3) eps_A = 0.00001 eps_B = 0.00001 Ahat = utils.sample_2_to_2_ball(Astar, eps_A, rng) Bhat = utils.sample_2_to_2_ball(Bstar, eps_B, rng) Q = np.eye(3) R = np.eye(3) n = 3 p = 3 T = 15 is_feasible, _, _, K_cl = sls_common_lyapunov(Ahat, Bhat, Q, R, eps_A, eps_B, 0.999, None) assert is_feasible P_star, K_star = utils.dlqr(Astar, Bstar, Q, R) J_star = np.trace(P_star) assert np.allclose(J_star, utils.LQR_cost(Astar, Bstar, K_star, Q, R, 1)) assert np.allclose(J_star, utils.LQR_cost(Astar, Bstar, K_cl, Q, R, 1), atol=1e-6) is_feasible, _, Phi_x, Phi_u = sls_synth(Q, R, Ahat, Bhat, eps_A, eps_B, T, 0.999, 0.5) assert np.allclose(J_star, h2_squared_norm(Astar, Bstar, Phi_x, Phi_u, Q, R, 1), atol=1e-6)
def test_rls(): rng = np.random.RandomState(657423) n, p = 3, 2 A = rng.normal(size=(n, n)) B = rng.normal(size=(n, p)) _, K = utils.dlqr(A, B) assert utils.spectral_radius(A + B.dot(K)) <= 1 lam = 1e-5 rls = utils.RecursiveLeastSquaresEstimator(n, p, lam) states = [] inputs = [] transitions = [] xcur = np.zeros((n, )) for _ in range(100): ucur = K.dot(xcur) + rng.normal(size=(p, )) xnext = A.dot(xcur) + B.dot(ucur) + rng.normal(size=(n, )) states.append(xcur) inputs.append(ucur) transitions.append(xnext) rls.update(xcur, ucur, xnext) xcur = xnext # LS estimate Ahat_ls, Bhat_ls, Cov_ls = utils.solve_least_squares(np.array(states), np.array(inputs), np.array(transitions), reg=lam) # RLS estimate Ahat_rls, Bhat_rls, Cov_rls = rls.get_estimate() assert np.allclose(Ahat_ls, Ahat_rls) assert np.allclose(Bhat_ls, Bhat_rls) assert np.allclose(Cov_ls, Cov_rls) for _ in range(100): ucur = K.dot(xcur) + rng.normal(size=(p, )) xnext = A.dot(xcur) + B.dot(ucur) + rng.normal(size=(n, )) states.append(xcur) inputs.append(ucur) transitions.append(xnext) rls.update(xcur, ucur, xnext) xcur = xnext # LS estimate Ahat_ls, Bhat_ls, Cov_ls = utils.solve_least_squares(np.array(states), np.array(inputs), np.array(transitions), reg=lam) # RLS estimate Ahat_rls, Bhat_rls, Cov_rls = rls.get_estimate() assert np.allclose(Ahat_ls, Ahat_rls) assert np.allclose(Bhat_ls, Bhat_rls) assert np.allclose(Cov_ls, Cov_rls)
def function_value(Q, R, A, B): P, K = utils.dlqr(A, B, Q, R) return np.trace(P)
plt.xlabel('$x_1$') plt.ylabel('$x_2$') plt.legend() plt.title("x_0 = {0}, N = {1}".format(str(x0), str(N))) plt.savefig("problem_2/approach1_figs/HW2_pb2_1_approach1_N" + str(N) + ".png") plt.show() # ======================================================================================= # ============== Approach 2 ============================================================= # ======================================================================================= # Hint: the dlqr function return: i) P which is the solution to the DARE, # ii) the optimal feedback gain K and iii) the closed-loop system matrix Acl = (A-BK) P, K, Acl = dlqr(A, B, Q, R) Ftot = np.vstack((Fx, np.dot(Fu, -K))) btot = np.hstack((bx, bu)) Qf = np.eye(n) # filled in here poli = polytope(Ftot, btot) F, b = poli.computeO_inf(Acl) # Hint: this function returns F and b so that compute O_inf = \{ x | Fx <= b\} # matrix F define the set, want to use the same matrice # matrix define the set # O_inf a set if F is identiy, b O all neg value use the similar to # Hint: the terminal set is X_f =\{x | F_f x <= b_f\} Ff = F # filled in here bf = b # filled in here
def _design_controller(self, states, inputs, transitions, rng): P, self._optimal_K = utils.dlqr(self._A_star, self._B_star, self._Q, self._R) return (self._A_star, self._B_star, (self._sigma_w**2) * np.trace(P))
def _design_controller(self, states, inputs, transitions, rng): logger = self._get_logger() epoch_id = self._epoch_idx + 1 if self._has_primed else 0 logger.debug( "_design_controller(epoch={}): have {} points for regression". format(epoch_id, inputs.shape[0])) # do a least squares fit and design based on the nominal Anom, Bnom, emp_cov = utils.solve_least_squares(states, inputs, transitions, reg=self._reg) if not self._has_primed: self._emp_cov = np.array(emp_cov) self._last_emp_cov = np.array(emp_cov) emp_cov /= inputs.shape[0] # normalize by T to improve numerics theta_nom = np.hstack((Anom, Bnom)) theta_star = np.hstack((self._A_star, self._B_star)) delta = theta_nom - theta_star actual_error = np.trace(delta.dot(emp_cov.dot(delta.T))) eps = self._actual_error_multiplier * actual_error logger.info( "_design_controller(epoch={}): actual weighted error is {}, eps is {}" .format(epoch_id, actual_error, eps)) def is_contained_in_confidence_set(A, B): theta_ab = np.hstack((A, B)) this_delta = theta_ab - theta_nom return np.trace(this_delta.dot(emp_cov).dot(this_delta.T)) <= eps inv_sqrt_emp_cov = utils.pd_inv_sqrt(emp_cov) MAX_TRIES = 100000 rng = self._get_rng(rng) success = False for rejection_idx in range(MAX_TRIES): eta = rng.normal(size=theta_nom.shape) eta *= np.power( rng.uniform(), 1 / (theta_nom.shape[0] * theta_nom.shape[1])) / np.linalg.norm( eta, ord="fro") theta_tilde = theta_nom + np.sqrt(eps) * eta.dot(inv_sqrt_emp_cov) A_tilde = theta_tilde[:, :self._n] B_tilde = theta_tilde[:, self._n:] if is_contained_in_confidence_set(A_tilde, B_tilde): A_ts = A_tilde B_ts = B_tilde success = True break if not success: logger.warn( "_design_controller(epoch={}): was unable to rejection sample after {} attempts" .format(epoch_id, MAX_TRIES)) raise Exception("this is a very low probability event") else: logger.info( "_design_controller(epoch={}): took {} attempts to rejection sample" .format(epoch_id, rejection_idx + 1)) _, K = utils.dlqr(A_ts, B_ts, self._Q, self._R) self._current_K = K # compute the infinite horizon cost of this controller Jnom = utils.LQR_cost(self._A_star, self._B_star, self._current_K, self._Q, self._R, self._sigma_w) rho_true = utils.spectral_radius(self._A_star + self._B_star.dot(self._current_K)) logger.info("_design_controller(epoch={}): rho(A_* + B_* K)={}".format( self._epoch_idx + 1 if self._has_primed else 0, rho_true)) return (Anom, Bnom, Jnom)