Exemple #1
0
import numpy as np
import lqr_control as control

A = np.array([[1.01, 0.01, 0], [0.01, 1.01, 0.01], [0, 0.01, 1.01]])
B = np.identity(3)
Q = np.identity(3)
R1 = np.identity(3) * 100
R2 = np.identity(3) * 1000

x0 = np.array([[1], [0], [-1]])  #(0,0,0) is stable
u0 = np.zeros((3, 1))  # default to 0 init for starting action

# number of time steps to simulate
T = 100

K_1, _, _ = control.dlqr(A, B, Q, R1)

K_2, _, _ = control.dlqr(A, B, Q, R2)

x_1, u_1 = control.simulate_discrete(A, B, K_1, x0, u0, T)
x_2, u_2 = control.simulate_discrete(A, B, K_2, x0, u0, T)

control.plot_states(x_1, 'State Temps', R1)
control.plot_states(x_2, 'State Temps', R2)
Exemple #2
0
            running_cost += cost.item()
            if done:
                break

        avg_length += t

        # logging
        if i_episode % log_interval == 0:
            avg_length = avg_length / log_interval
            running_cost = running_cost / log_interval

            print(
                'Episode {} \t Avg length: {:.2f} \t Avg cost: {:.2f}'.format(
                    i_episode, avg_length, running_cost))
            running_cost = 0
            avg_length = 0

    # random init to compare how the two controls act
    x0 = np.random.uniform(-5, 5, (1, ))
    u0 = np.zeros((1, ))
    T = 50

    x_star, u_star = control.simulate_discrete(A, B, K, x0.reshape(1, 1),
                                               u0.reshape(1, 1), T)
    x_sim, u_sim = simulate(A, B, ppo.policy.actor, x0, u0, T)

    compare_paths(np.array(x_sim), np.squeeze(x_star[:, :-1]), "state")
    compare_paths(np.array(u_sim), np.squeeze(u_star[:, :-1]), "action")
    compare_V(ppo.policy.critic, A, B, Q, R, K, T, gamma, alpha)
    compare_P(ppo.policy.actor, K)