예제 #1
0
nb_states = 2
nb_actions = 2
WB_model = WB_Model(dt)
BB_model = BB_Model(nb_states, nb_actions)
loaded = BB_model.load_model(path='linear_2D_')
if loaded: print("2 Input Linear model loaded!")

# create the SUSD learner
Q = np.eye(2)
R = np.eye(2)
K0 = np.array([[-10., -10.],[-10., -10.]])
N_trajectories = 10
N_agents = 25
T_rollout = 10

S = SUSD(WB_model, Q, R, K0.copy(), N_agents, T_rollout)
PG = Policy_Gradient(WB_model, Q, R, K0.copy(), N_trajectories, T_rollout)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)

# get optimal K cost
C_lqr = 0
x = x0.copy()
Nsteps = int(T_rollout/dt)
for t in range(Nsteps):
    # compute input
    u = np.dot(-K_lqr, x)

    # compute cost
for idn in range(len(n)):
    # define simulation length
    Nsteps = 500

    # define starting state
    x0 = np.array([-8, 9])

    # load model predictor
    nb_states = 2
    nb_actions = 1
    wb_model = WB_Model(dt)

    # create the SUSD learner
    Q = np.eye(2)
    R = np.array([1])
    K0 = np.array([0.1, 0.1])
    N_rollout = n[idn]
    T_rollout = 20

    S = SUSD(wb_model, Q, R, K0, N_rollout, T_rollout, alpha=0.35, term_len=100)

    # estimate optimal K using SUSD
    conveged, iters = S.search(x0, max_iter=100)
    K = S.K.reshape((nb_actions, nb_states))
    print "SUSD Search took", iters, "iterations"
    print "Estimated K:"; print K
    z_list[idn] = S.z_buf

cost_plot(z_list, n)
plt.show()
예제 #3
0
nb_states = 4
nb_actions = 2
WB_model = WB_Model(dt)

# create the SUSD learner
Q = np.eye(4)
R = np.eye(2)
K0 = np.array([[-2., -2., -2., -2.], [-2., -2., -2., -2.]])
N_agents = 30
T_rollout = 10

S = SUSD(WB_model,
         Q,
         R,
         K0,
         N_agents,
         T_rollout,
         dt=dt,
         alpha=0.1,
         term_len=200)

# compute the optimal K
A, B = WB_model.system()
sys = control.StateSpace(A, B, np.eye(4), np.zeros((4, 2)), 1)
K_lqr, _, _ = control.lqr(sys, Q, R)

# estiamte optimal K using SUSD
conveged, iters = S.search(x0, r=0.001, max_iter=100000)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"
예제 #4
0
R = np.array([1])
K0 = np.array([-10., -10.])
N_points = 10
N_trajectories = 100
N_agents = 5
T_rollout = 10

RS = Random_Search(WB_model,
                   Q,
                   R,
                   K0.copy(),
                   N_points,
                   T_rollout,
                   0.01 * np.eye(2),
                   dt=dt)
S = SUSD(WB_model, Q, R, K0.copy(), N_agents, T_rollout, dt=dt, alpha=0.9)
PG = Policy_Gradient(WB_model,
                     Q,
                     R,
                     K0.copy(),
                     N_trajectories,
                     T_rollout,
                     dt=dt,
                     alpha=9E-1)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)
print "LQR Gains:", -K_lqr

# estimate optimal K using FD
N_agents = 30
T_rollout = 10

RS = Random_Search(WB_model,
                   Q,
                   R,
                   K0.copy(),
                   N_points,
                   T_rollout,
                   0.5 * np.eye(12),
                   dt=dt)
S = SUSD(WB_model,
         Q,
         R,
         K0.copy(),
         N_agents,
         T_rollout,
         dt=dt,
         alpha=0.1,
         term_len=100)
PG = Policy_Gradient(WB_model,
                     Q,
                     R,
                     K0.copy(),
                     N_trajectories,
                     T_rollout,
                     dt=dt,
                     alpha=0.8)

# compute the optimal K
A, B = WB_model.system()
# load model predictor
nb_states = 2
nb_actions = 1
model = BB_Model(nb_states, nb_actions)
model.load_model(path='linear_said_')
WB_model = WB_Model(dt)

# create the SUSD learner
Q = np.eye(2)
R = np.array([1])
K0 = np.array([-10., -10.])
N_rollout = 3
T_rollout = 30

S = SUSD(WB_model, Q, R, K0, N_rollout, T_rollout)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)

# estiamte optimal K using SUSD
conveged, iters = S.search(x0)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"

print "Estimated K:", K
print "LQR K:", -K_lqr

# simulate the trajectory
x = np.zeros([nb_states, Nsteps])
# load model predictor
nb_states = 2
nb_actions = 2
bb_model = BB_Model(nb_states, nb_actions)
loaded = bb_model.load_model(path='linear_2D_')
if loaded: print("2 Input Linear model loaded!")
WB_model = WB_Model(dt)

# create the SUSD learner
Q = np.eye(2)
R = np.eye(2)
K0 = np.array([[-1., -1.], [-1., -1.]])
N_agents = 10
T_rollout = 15

S = SUSD(bb_model, Q, R, K0, N_agents, T_rollout)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)

# estiamte optimal K using SUSD
conveged, iters = S.search(x0)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"

print "Estimated K:", K
print "LQR K:", -K_lqr

# simulate the trajectory
x = np.zeros([nb_states, Nsteps])