def __init__(self): super().__init__(**vars(cfg.env_kwargs)) wingrock.load_config() self.x = wingrock.System() self.x.unc = lambda t, x: 0 self.A = wingrock.cfg.Am self.B = wingrock.cfg.B self.Kopt, self.Popt = LQR.clqr(self.A, self.B, cfg.Q, cfg.R) self.behave_K, _ = LQR.clqr(self.A - 3, self.B, cfg.Qb, cfg.Rb) self.logger = fym.logging.Logger(Path(cfg.dir, "env.h5")) self.logger.set_info(cfg=cfg)
def __init__(self): super().__init__(**vars(cfg.env_kwargs)) self.x = MorphingLon() self.PI = BaseSystem() trims = self.x.get_trim() self.trim = {k: v for k, v in zip(["x", "u", "eta"], trims)} self.A = jacob_analytic(self.x.deriv, 0)(*trims) self.B = jacob_analytic(self.x.deriv, 1)(*trims) self.Kopt, self.Popt = LQR.clqr(self.A, self.B, cfg.Q, cfg.R) self.behave_K, _ = LQR.clqr(self.A, self.B, cfg.Qb, cfg.Rb) self.add_noise = True
def __init__(self): super().__init__(**cfg.QLearner.env_kwargs) self.x = LinearSystem() self.behave_K, *_ = LQR.clqr(cfg.A, cfg.B, cfg.Q * 0.1, cfg.R * 0.1) self.logger = fym.logging.Logger(Path(cfg.dir, "qlearner-env.h5")) self.logger.set_info(cfg=cfg)
def load_config(): cfg.dir = "data" cfg.final_time = 40 # cfg.A = np.array([[0, 1, 0], [0, 0, 0], [1, 0, 0]]) cfg.A = np.array([[0, 1, 0], [0, -2, -1], [1, 0, -1]]) # cfg.A = np.array([[2, 1, 0], [0, 1, 0], [1, 0, 3]]) cfg.B = np.array([ [0, 1], [1, 0], [0, 0], ]) m = cfg.B.shape[1] Fp = np.array([[-1, 1], [0, 1]]) Kf, *_ = LQR.clqr(Fp, np.eye((m)), np.eye(m), np.eye(m)) cfg.F = Fp - Kf # cfg.F = -1 * np.eye(2) # cfg.F = -1 * np.eye(1) cfg.Q = np.diag([1, 10, 10]) cfg.R = np.diag([1, 10]) cfg.x_init = np.vstack((0.3, 0, 0)) cfg.QLearner = SN() cfg.QLearner.env_kwargs = dict( max_t=cfg.final_time, # solver="odeint", dt=20, ode_step_len=int(20/0.01), solver="rk4", dt=0.001, ) cfg.QLearner.memory_len = 10000 cfg.QLearner.batch_size = 400 cfg.QLearner.train_epoch = 10 calc_config()
def get_random_stable_gain(self): A = self.A + 0 * np.random.randn(*self.A.shape) B = self.B + 0 * np.random.randn(*self.B.shape) Q = np.diag(np.random.rand(cfg.agent.Q.shape[0])) R = np.diag(np.random.rand(cfg.agent.R.shape[0])) # R = np.random.rand() * cfg.agent.R gain, _ = LQR.clqr(A, B, Q, R) return gain
def calc_config(): cfg.K, cfg.P, *_ = LQR.clqr(cfg.A, cfg.B, cfg.Q, cfg.R) cfg.QLearner.K_init = np.ones_like(cfg.K) cfg.QLearner.W1_init = np.zeros_like(cfg.P) cfg.QLearner.W2_init = np.zeros_like(cfg.K) cfg.QLearner.W3_init = np.zeros_like(cfg.R) K = cfg.QLearner.K_init print(np.linalg.eigvals(cfg.A - cfg.B.dot(K)))
def __init__(self): super().__init__(**vars(cfg.env.kwargs)) self.plant = Quadrotor(**vars(cfg.quad.init)) # Get the linear model self.xtrim, self.utrim = self.get_trims(alt=1) self.A = jacob_analytic(self.deriv, 0)(self.xtrim, self.utrim) self.B = jacob_analytic(self.deriv, 1)(self.xtrim, self.utrim) # Get the optimal gain self.K, self.P = LQR.clqr(self.A, self.B, cfg.agent.Q, cfg.agent.R) # Base controller (returns 0) gain = np.zeros_like(self.B.T) self.controller = NoisyLQR(gain, self.xtrim, self.utrim)
def __init__(self): super().__init__(dt=0.05, max_t=50) # for level flight self.x = BaseSystem(np.vstack([VT0 + 5, gamma0, h0 + 30, alpha0, Q0])) ## for VT tracking # t = self.clock.get() # if t < 10: # self.x = BaseSystem(np.vstack self.A = A_trim self.B = B_trim Q = np.diag([0.003, 0.4, 0.002, 0.4, 1.7]) R = 1.7 * np.identity(2) self.K, *_ = LQR.clqr(self.A, self.B, Q, R)
def exp5(): """ This experiment compares our algorithms and the Kleinman algorithm. """ basedir = Path("data", "exp5") # Setup np.random.seed(3) A = np.random.rand(5, 5) B = np.random.rand(5, 3) Q = np.diag([100, 10, 1, 20, 30]) R = np.diag([1, 3, 8]) Kopt, Popt, *_ = LQR.clqr(A, B, Q, R) maxiter = 70 # Kleinman Iteration def kleinman(K, name): logger = fym.logging.Logger(path=Path(basedir, name)) for i in itertools.count(0): P = scipy.linalg.solve_lyapunov( (A - B.dot(K)).T, -(Q + K.T.dot(R).dot(K))) next_K = np.linalg.inv(R).dot(B.T).dot(P) logger.record(i=i, P=P, K=K, next_K=next_K, Popt=Popt, Kopt=Kopt) if ((K - next_K)**2).sum() < 1e-10 or i > maxiter: break K = next_K logger.close() # SQL Iteration def sql(K, name): F = - np.diag([3, 3, 3]) * 1 logger = fym.logging.Logger(path=Path(basedir, name)) for i in itertools.count(0): blkA = np.block([[A - B.dot(K), B], [np.zeros_like(B.T), F]]) blkK = np.block([[np.eye(5), np.zeros_like(B)], [-K, np.eye(3)]]) blkQ = blkK.T.dot(scipy.linalg.block_diag(Q, R)).dot(blkK) blkP = scipy.linalg.solve_lyapunov(blkA.T, -blkQ) P = blkP[:5, :5] next_K = K + np.linalg.inv(blkP[5:, 5:]).dot(blkP[5:, :5]) logger.record(i=i, P=P, K=K, next_K=next_K, Popt=Popt, Kopt=Kopt) if ((K - next_K)**2).sum() < 1e-10 or i > maxiter: break K = next_K logger.close() # Stabiling initial gain K, *_ = LQR.clqr(A, B, np.eye(5), np.eye(3)) kleinman(K, "kleinman-stable.h5") sql(K, "sql-stable.h5") # Nonstabiliing initial gain K = np.random.rand(3, 5) kleinman(K, "kleinman-unstable.h5") sql(K, "sql-unstable.h5")
def exp5(): """ This experiment compares our algorithms and the Kleinman algorithm. """ basedir = Path("data", "exp5") # Setup np.random.seed(3000) v = np.random.randn(5, 5) * 3 A = np.diag([2, 3, 4, 5, 6]) A = v.dot(A).dot(np.linalg.inv(v)) B = np.random.randn(5, 3) * 3 Q = np.diag([100, 0, 0, 20, 30]) R = np.diag([1, 3, 8]) Kopt, Popt, *_ = LQR.clqr(A, B, Q, R) eps = 1e-16 maxiter = 1000 n, m = B.shape # Kleinman Iteration def kleinman(K, name): logger = fym.logging.Logger(path=Path(basedir, name)) for i in itertools.count(0): P = scipy.linalg.solve_lyapunov((A - B.dot(K)).T, -(Q + K.T.dot(R).dot(K))) next_K = np.linalg.inv(R).dot(B.T).dot(P) # print(np.linalg.eigvals(P)) logger.record( i=i, P=P, K=K, next_K=next_K, Popt=Popt, Kopt=Kopt, ) if ((K - next_K)**2).sum() < eps or i > maxiter: break K = next_K logger.close() # SQL Iteration def sql(K, name): F = -np.eye(m) * 1 # f = np.random.rand(3, 3) # F = - f.T.dot(f) K0 = K # prev_H21 = None logger = fym.logging.Logger(path=Path(basedir, name)) for i in itertools.count(0): blkA = np.block([[A - B.dot(K), B], [np.zeros_like(B.T), F]]) blkK = np.block([[np.eye(n), np.zeros_like(B)], [-K, np.eye(m)]]) blkQ = blkK.T.dot(scipy.linalg.block_diag(Q, R)).dot(blkK) blkH = scipy.linalg.solve_lyapunov(blkA.T, -blkQ) H11, H21, H22 = blkH[:n, :n], blkH[n:, :n], blkH[n:, n:] next_K = K + np.linalg.inv(H22).dot(H21) # eigvals, eigvecs = np.linalg.eig(A - B.dot(K)) # eigvec = eigvecs[:, -1] # if np.linalg.eigvals(H22).min() > 0: # # print(eigvec.T @ H11 @ eigvec) # Binv = np.linalg.pinv(B) # H11min = Q + (A - np.eye(n)).T @ Binv.T @ R @ Binv @ (A - np.eye(n)) # print(np.linalg.eigvals(H11).min()) # print(-np.linalg.eigvals(H11min).max()) # breakpoint() V = np.linalg.inv(A - B.dot(K) - np.eye(n)) @ B next_V = np.linalg.inv(A - B.dot(next_K) - np.eye(n)) @ B if i == 0: prev_H11, prev_H21, prev_H22 = H11, H21, H22 prev_K = K prev_V = V else: eigvals, eigvecs = np.linalg.eig(H11) eigvec = eigvecs[:, [eigvals.argmin()]] Kk_tilde = K - prev_K V_error = V - prev_V @ (np.eye(m) + Kk_tilde @ V) H22_error = H22 - prev_H22 - prev_H21 @ V - V.T @ prev_H21.T breakpoint() P = H11 - H21.T.dot(np.linalg.inv(H22)).dot(H21) next_H11 = P Kt = Kopt - K blkKt = np.block([[np.eye(n), np.zeros_like(B)], [-Kt, np.eye(m)]]) blkA_s = blkKt.dot(np.block([[A - B.dot(K), B], [F.dot(Kt), F]])) blkH_s = scipy.linalg.solve_lyapunov(blkA_s.T, -blkQ) H11_s, H22_s = blkH_s[:n, :n], blkH_s[n:, n:] P_s = H11_s - Kt.T.dot(H22_s).dot(Kt) eigs = np.linalg.eigvals(P) Peig = [eigs.min().real, eigs.max().real] logger.record( i=i, P=P, K=K, next_K=next_K, Popt=Popt, Kopt=Kopt, P_s=P_s, Peig=Peig, K0=K0, H11=H11, next_H11=next_H11, ) if ((K - next_K)**2).sum() < eps or i > maxiter: break K = next_K logger.close() # Nonstabiliing initial gain K = np.zeros((m, n)) kleinman(K, "kleinman-unstable.h5") sql(K, "sql-unstable.h5") # Stabiling initial gain K, *_ = LQR.clqr(A, B, 2 * np.eye(n), 2 * np.eye(m)) kleinman(K, "kleinman-stable.h5") sql(K, "sql-stable.h5")