Python LQR Examples, fym.agents.LQR Python Examples

Example #1

0

Show file

        def __init__(self):
            super().__init__(**vars(cfg.env_kwargs))
            wingrock.load_config()
            self.x = wingrock.System()
            self.x.unc = lambda t, x: 0

            self.A = wingrock.cfg.Am
            self.B = wingrock.cfg.B
            self.Kopt, self.Popt = LQR.clqr(self.A, self.B, cfg.Q, cfg.R)
            self.behave_K, _ = LQR.clqr(self.A - 3, self.B, cfg.Qb, cfg.Rb)

            self.logger = fym.logging.Logger(Path(cfg.dir, "env.h5"))
            self.logger.set_info(cfg=cfg)

Example #2

0

Show file

        def __init__(self):
            super().__init__(**vars(cfg.env_kwargs))
            self.x = MorphingLon()
            self.PI = BaseSystem()

            trims = self.x.get_trim()
            self.trim = {k: v for k, v in zip(["x", "u", "eta"], trims)}

            self.A = jacob_analytic(self.x.deriv, 0)(*trims)
            self.B = jacob_analytic(self.x.deriv, 1)(*trims)
            self.Kopt, self.Popt = LQR.clqr(self.A, self.B, cfg.Q, cfg.R)
            self.behave_K, _ = LQR.clqr(self.A, self.B, cfg.Qb, cfg.Rb)

            self.add_noise = True

Example #3

0

Show file

    def __init__(self):
        super().__init__(**cfg.QLearner.env_kwargs)
        self.x = LinearSystem()
        self.behave_K, *_ = LQR.clqr(cfg.A, cfg.B, cfg.Q * 0.1, cfg.R * 0.1)

        self.logger = fym.logging.Logger(Path(cfg.dir, "qlearner-env.h5"))
        self.logger.set_info(cfg=cfg)

Example #4

0

Show file

def load_config():
    cfg.dir = "data"
    cfg.final_time = 40

    # cfg.A = np.array([[0, 1, 0], [0, 0, 0], [1, 0, 0]])
    cfg.A = np.array([[0, 1, 0], [0, -2, -1], [1, 0, -1]])
    # cfg.A = np.array([[2, 1, 0], [0, 1, 0], [1, 0, 3]])
    cfg.B = np.array([
        [0, 1],
        [1, 0],
        [0, 0],
    ])
    m = cfg.B.shape[1]
    Fp = np.array([[-1, 1], [0, 1]])
    Kf, *_ = LQR.clqr(Fp, np.eye((m)), np.eye(m), np.eye(m))
    cfg.F = Fp - Kf
    # cfg.F = -1 * np.eye(2)
    # cfg.F = -1 * np.eye(1)
    cfg.Q = np.diag([1, 10, 10])
    cfg.R = np.diag([1, 10])

    cfg.x_init = np.vstack((0.3, 0, 0))

    cfg.QLearner = SN()
    cfg.QLearner.env_kwargs = dict(
        max_t=cfg.final_time,
        # solver="odeint", dt=20, ode_step_len=int(20/0.01),
        solver="rk4",
        dt=0.001,
    )
    cfg.QLearner.memory_len = 10000
    cfg.QLearner.batch_size = 400
    cfg.QLearner.train_epoch = 10

    calc_config()

Example #5

0

Show file

File: exp7.py Project: seong-hun/structured-q-learning

 def get_random_stable_gain(self):
     A = self.A + 0 * np.random.randn(*self.A.shape)
     B = self.B + 0 * np.random.randn(*self.B.shape)
     Q = np.diag(np.random.rand(cfg.agent.Q.shape[0]))
     R = np.diag(np.random.rand(cfg.agent.R.shape[0]))
     # R = np.random.rand() * cfg.agent.R
     gain, _ = LQR.clqr(A, B, Q, R)
     return gain

Example #6

0

Show file

def calc_config():
    cfg.K, cfg.P, *_ = LQR.clqr(cfg.A, cfg.B, cfg.Q, cfg.R)

    cfg.QLearner.K_init = np.ones_like(cfg.K)
    cfg.QLearner.W1_init = np.zeros_like(cfg.P)
    cfg.QLearner.W2_init = np.zeros_like(cfg.K)
    cfg.QLearner.W3_init = np.zeros_like(cfg.R)

    K = cfg.QLearner.K_init
    print(np.linalg.eigvals(cfg.A - cfg.B.dot(K)))

Example #7

0

Show file

File: exp7.py Project: seong-hun/structured-q-learning

    def __init__(self):
        super().__init__(**vars(cfg.env.kwargs))
        self.plant = Quadrotor(**vars(cfg.quad.init))

        # Get the linear model
        self.xtrim, self.utrim = self.get_trims(alt=1)
        self.A = jacob_analytic(self.deriv, 0)(self.xtrim, self.utrim)
        self.B = jacob_analytic(self.deriv, 1)(self.xtrim, self.utrim)

        # Get the optimal gain
        self.K, self.P = LQR.clqr(self.A, self.B, cfg.agent.Q, cfg.agent.R)

        # Base controller (returns 0)
        gain = np.zeros_like(self.B.T)
        self.controller = NoisyLQR(gain, self.xtrim, self.utrim)

Example #8

0

Show file

File: main.py Project: miaerosae/Pitch-Autopilot-Design-FixedWingUAV

    def __init__(self):
        super().__init__(dt=0.05, max_t=50)

        # for level flight
        self.x = BaseSystem(np.vstack([VT0 + 5, gamma0, h0 + 30, alpha0, Q0]))

        ## for VT tracking
        #        t = self.clock.get()
        #        if t < 10:
        #            self.x = BaseSystem(np.vstack
        self.A = A_trim
        self.B = B_trim

        Q = np.diag([0.003, 0.4, 0.002, 0.4, 1.7])
        R = 1.7 * np.identity(2)
        self.K, *_ = LQR.clqr(self.A, self.B, Q, R)

Example #9

0

Show file

def exp5():
    """
    This experiment compares our algorithms and the Kleinman algorithm.
    """
    basedir = Path("data", "exp5")

    # Setup
    np.random.seed(3)
    A = np.random.rand(5, 5)
    B = np.random.rand(5, 3)
    Q = np.diag([100, 10, 1, 20, 30])
    R = np.diag([1, 3, 8])
    Kopt, Popt, *_ = LQR.clqr(A, B, Q, R)
    maxiter = 70

    # Kleinman Iteration
    def kleinman(K, name):
        logger = fym.logging.Logger(path=Path(basedir, name))

        for i in itertools.count(0):
            P = scipy.linalg.solve_lyapunov(
                (A - B.dot(K)).T, -(Q + K.T.dot(R).dot(K)))
            next_K = np.linalg.inv(R).dot(B.T).dot(P)

            logger.record(i=i, P=P, K=K, next_K=next_K, Popt=Popt, Kopt=Kopt)

            if ((K - next_K)**2).sum() < 1e-10 or i > maxiter:
                break

            K = next_K

        logger.close()

    # SQL Iteration
    def sql(K, name):
        F = - np.diag([3, 3, 3]) * 1
        logger = fym.logging.Logger(path=Path(basedir, name))

        for i in itertools.count(0):
            blkA = np.block([[A - B.dot(K), B], [np.zeros_like(B.T), F]])
            blkK = np.block([[np.eye(5), np.zeros_like(B)], [-K, np.eye(3)]])
            blkQ = blkK.T.dot(scipy.linalg.block_diag(Q, R)).dot(blkK)
            blkP = scipy.linalg.solve_lyapunov(blkA.T, -blkQ)
            P = blkP[:5, :5]
            next_K = K + np.linalg.inv(blkP[5:, 5:]).dot(blkP[5:, :5])

            logger.record(i=i, P=P, K=K, next_K=next_K, Popt=Popt, Kopt=Kopt)

            if ((K - next_K)**2).sum() < 1e-10 or i > maxiter:
                break

            K = next_K

        logger.close()

    # Stabiling initial gain
    K, *_ = LQR.clqr(A, B, np.eye(5), np.eye(3))
    kleinman(K, "kleinman-stable.h5")
    sql(K, "sql-stable.h5")

    # Nonstabiliing initial gain
    K = np.random.rand(3, 5)
    kleinman(K, "kleinman-unstable.h5")
    sql(K, "sql-unstable.h5")

Example #10

0

Show file

def exp5():
    """
    This experiment compares our algorithms and the Kleinman algorithm.
    """
    basedir = Path("data", "exp5")

    # Setup
    np.random.seed(3000)
    v = np.random.randn(5, 5) * 3
    A = np.diag([2, 3, 4, 5, 6])
    A = v.dot(A).dot(np.linalg.inv(v))
    B = np.random.randn(5, 3) * 3
    Q = np.diag([100, 0, 0, 20, 30])
    R = np.diag([1, 3, 8])
    Kopt, Popt, *_ = LQR.clqr(A, B, Q, R)
    eps = 1e-16
    maxiter = 1000
    n, m = B.shape

    # Kleinman Iteration
    def kleinman(K, name):
        logger = fym.logging.Logger(path=Path(basedir, name))

        for i in itertools.count(0):
            P = scipy.linalg.solve_lyapunov((A - B.dot(K)).T,
                                            -(Q + K.T.dot(R).dot(K)))
            next_K = np.linalg.inv(R).dot(B.T).dot(P)

            # print(np.linalg.eigvals(P))

            logger.record(
                i=i,
                P=P,
                K=K,
                next_K=next_K,
                Popt=Popt,
                Kopt=Kopt,
            )

            if ((K - next_K)**2).sum() < eps or i > maxiter:
                break

            K = next_K

        logger.close()

    # SQL Iteration
    def sql(K, name):
        F = -np.eye(m) * 1
        # f = np.random.rand(3, 3)
        # F = - f.T.dot(f)

        K0 = K
        # prev_H21 = None

        logger = fym.logging.Logger(path=Path(basedir, name))

        for i in itertools.count(0):
            blkA = np.block([[A - B.dot(K), B], [np.zeros_like(B.T), F]])
            blkK = np.block([[np.eye(n), np.zeros_like(B)], [-K, np.eye(m)]])
            blkQ = blkK.T.dot(scipy.linalg.block_diag(Q, R)).dot(blkK)
            blkH = scipy.linalg.solve_lyapunov(blkA.T, -blkQ)
            H11, H21, H22 = blkH[:n, :n], blkH[n:, :n], blkH[n:, n:]
            next_K = K + np.linalg.inv(H22).dot(H21)

            # eigvals, eigvecs = np.linalg.eig(A - B.dot(K))
            # eigvec = eigvecs[:, -1]
            # if np.linalg.eigvals(H22).min() > 0:
            #     # print(eigvec.T @ H11 @ eigvec)
            #     Binv = np.linalg.pinv(B)
            #     H11min = Q + (A - np.eye(n)).T @ Binv.T @ R @ Binv @ (A - np.eye(n))
            #     print(np.linalg.eigvals(H11).min())
            #     print(-np.linalg.eigvals(H11min).max())
            #     breakpoint()

            V = np.linalg.inv(A - B.dot(K) - np.eye(n)) @ B
            next_V = np.linalg.inv(A - B.dot(next_K) - np.eye(n)) @ B

            if i == 0:
                prev_H11, prev_H21, prev_H22 = H11, H21, H22
                prev_K = K
                prev_V = V
            else:
                eigvals, eigvecs = np.linalg.eig(H11)
                eigvec = eigvecs[:, [eigvals.argmin()]]
                Kk_tilde = K - prev_K
                V_error = V - prev_V @ (np.eye(m) + Kk_tilde @ V)
                H22_error = H22 - prev_H22 - prev_H21 @ V - V.T @ prev_H21.T
                breakpoint()

            P = H11 - H21.T.dot(np.linalg.inv(H22)).dot(H21)

            next_H11 = P

            Kt = Kopt - K
            blkKt = np.block([[np.eye(n), np.zeros_like(B)], [-Kt, np.eye(m)]])
            blkA_s = blkKt.dot(np.block([[A - B.dot(K), B], [F.dot(Kt), F]]))
            blkH_s = scipy.linalg.solve_lyapunov(blkA_s.T, -blkQ)
            H11_s, H22_s = blkH_s[:n, :n], blkH_s[n:, n:]

            P_s = H11_s - Kt.T.dot(H22_s).dot(Kt)

            eigs = np.linalg.eigvals(P)
            Peig = [eigs.min().real, eigs.max().real]

            logger.record(
                i=i,
                P=P,
                K=K,
                next_K=next_K,
                Popt=Popt,
                Kopt=Kopt,
                P_s=P_s,
                Peig=Peig,
                K0=K0,
                H11=H11,
                next_H11=next_H11,
            )

            if ((K - next_K)**2).sum() < eps or i > maxiter:
                break

            K = next_K

        logger.close()

    # Nonstabiliing initial gain
    K = np.zeros((m, n))
    kleinman(K, "kleinman-unstable.h5")
    sql(K, "sql-unstable.h5")

    # Stabiling initial gain
    K, *_ = LQR.clqr(A, B, 2 * np.eye(n), 2 * np.eye(m))
    kleinman(K, "kleinman-stable.h5")
    sql(K, "sql-stable.h5")