for idn in range(len(n)):
    # define simulation length
    Nsteps = 500

    # define starting state
    x0 = np.array([-8, 9])

    # load model predictor
    nb_states = 2
    nb_actions = 1
    wb_model = WB_Model(dt)

    # create the SUSD learner
    Q = np.eye(2)
    R = np.array([1])
    K0 = np.array([0.1, 0.1])
    N_rollout = n[idn]
    T_rollout = 20

    S = SUSD(wb_model, Q, R, K0, N_rollout, T_rollout, alpha=0.35, term_len=100)

    # estimate optimal K using SUSD
    conveged, iters = S.search(x0, max_iter=100)
    K = S.K.reshape((nb_actions, nb_states))
    print "SUSD Search took", iters, "iterations"
    print "Estimated K:"; print K
    z_list[idn] = S.z_buf

cost_plot(z_list, n)
plt.show()
예제 #2
0
    else: Ct += np.sum(x*np.matmul(Q, x), axis=0)

    if nb_actions == 1: Ct += (u*R*u).flatten()
    else: Ct += np.sum(u*np.matmul(R, u), axis=0)

    C_lqr += Ct

    # simulate forward in time
    x = WB_model.predict(x, u).flatten()


# estimate optimal K using PG
print "Search using Natural Policy Gradient..."
converged, PG_iters = PG.search(x0, r=1)
K_PG = PG.K.reshape((nb_actions, nb_states))

# estiamte optimal K using SUSD
print "Search using SUSD..."
conveged, S_iters = S.search(x0, r=1)
K_S = S.K.reshape((nb_actions, nb_states))
        
print "LQR Gains:"; print -K_lqr
print "Natural Policy Gradient Search took", PG_iters, "iterations"
print "Natural Policy Gradient Gains:"; print K_PG
print "SUSD Search took", S_iters, "iterations"
print "SUSD Gains:"; print K_S

# plot the cost results
cost_plot(PG.z_buf, S.z_buf, N_trajectories, N_agents, int(T_rollout/dt), C_lqr)
plt.show()
예제 #3
0
         Q,
         R,
         K0,
         N_agents,
         T_rollout,
         dt=dt,
         alpha=0.1,
         term_len=200)

# compute the optimal K
A, B = WB_model.system()
sys = control.StateSpace(A, B, np.eye(4), np.zeros((4, 2)), 1)
K_lqr, _, _ = control.lqr(sys, Q, R)

# estiamte optimal K using SUSD
conveged, iters = S.search(x0, r=0.001, max_iter=100000)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"

#print "Estimated K:", K
#print "LQR K:", -K_lqr
print "Error K:", K - K_lqr

# simulate the trajectory
x = np.zeros([nb_states, Nsteps])
x_lqr = np.zeros([nb_states, Nsteps])
x[:, 0] = x0.flatten()
x_lqr[:, 0] = x0.flatten()
for t in range(Nsteps - 1):
    u = np.dot(K, x[:, t])
    x[:, t + 1] = WB_model.predict(x[:, t].reshape(-1, 1),
예제 #4
0
#print "Finite Difference Gains:", K

# estimate optimal K using RS
converged, iters = RS.search(x0.copy(), max_iter=250)
K = RS.K.reshape((nb_actions, nb_states))
print "Random Search took", iters, "iterations"
print "Random Search Gains:", K

# estimate optimal K using PG
converged, iters = PG.search(x0.copy(), r=0.5, epsilon=0.1, max_iter=250)
K = PG.K.reshape((nb_actions, nb_states))
print "Natural Policy Gradient Search took", iters, "iterations"
print "Natural Policy Gradient Gains:", K

# estiamte optimal K using SUSD
conveged, iters = S.search(x0.copy(), r=0.5, max_iter=250)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"
print "SUSD Gains:", K

C_lqr = 0
x = x0.copy()
Nsteps = int(T_rollout / dt)
for t in range(Nsteps - 1):
    # compute input
    u = np.dot(-K_lqr, x)

    # compute cost
    Ct = 0
    if nb_states == 1: Ct += (x * Q * x).flatten()
    else: Ct += np.sum(x * np.matmul(Q, x), axis=0)
    if nb_actions == 1: Ct += (u * R * u).flatten()
    else: Ct += np.sum(u * np.matmul(R, u), axis=0)

    C_lqr += Ct

    # simulate forward in time
    x = WB_model.predict(x, u).flatten()

# estimate optimal K using RS
print "Search using RS..."
converged, RS_iters = RS.search(x0.copy(), max_iter=400)
K_RS = RS.K.reshape((nb_actions, nb_states))

# estiamte optimal K using SUSD
print "Search using SUSD..."
conveged, S_iters = S.search(x0, r=0.1, max_iter=400)
K_S = S.K.reshape((nb_actions, nb_states))

# estimate optimal K using PG
print "Search using Natural Policy Gradient..."
converged, PG_iters = PG.search(x0, r=1, epsilon=0.1, max_iter=400)
K_PG = PG.K.reshape((nb_actions, nb_states))

print "LQR Gains:"
print -K_lqr
print "Random Search took", RS_iters, "iterations"
print "Random Search Gains:"
print K_RS
print "Natural Policy Gradient Search took", PG_iters, "iterations"
print "Natural Policy Gradient Gains:"
print K_PG
# create the SUSD learner
Q = np.eye(2)
R = np.array([1])
K0 = np.array([-10., -10.])
N_rollout = 3
T_rollout = 30

S = SUSD(WB_model, Q, R, K0, N_rollout, T_rollout)

# compute the optimal K
A, B = WB_model.system()
K_lqr, _, _ = control.lqr(A, B, Q, R)

# estiamte optimal K using SUSD
conveged, iters = S.search(x0)
K = S.K.reshape((nb_actions, nb_states))
print "SUSD Search took", iters, "iterations"

print "Estimated K:", K
print "LQR K:", -K_lqr

# simulate the trajectory
x = np.zeros([nb_states, Nsteps])
x_lqr = np.zeros([nb_states, Nsteps])
x[:, 0] = x0.flatten()
x_lqr[:, 0] = x0.flatten()
for t in range(Nsteps - 1):
    u = np.dot(K, x[:, t])
    x[:, t + 1] = WB_model.predict(x[:, t].reshape(-1, 1),
                                   u.reshape(-1, 1)).flatten()