nb_states = 2 nb_actions = 2 WB_model = WB_Model(dt) BB_model = BB_Model(nb_states, nb_actions) loaded = BB_model.load_model(path='linear_2D_') if loaded: print("2 Input Linear model loaded!") # create the SUSD learner Q = np.eye(2) R = np.eye(2) K0 = np.array([[-10., -10.],[-10., -10.]]) N_trajectories = 10 N_agents = 25 T_rollout = 10 S = SUSD(WB_model, Q, R, K0.copy(), N_agents, T_rollout) PG = Policy_Gradient(WB_model, Q, R, K0.copy(), N_trajectories, T_rollout) # compute the optimal K A, B = WB_model.system() K_lqr, _, _ = control.lqr(A, B, Q, R) # get optimal K cost C_lqr = 0 x = x0.copy() Nsteps = int(T_rollout/dt) for t in range(Nsteps): # compute input u = np.dot(-K_lqr, x) # compute cost
for idn in range(len(n)): # define simulation length Nsteps = 500 # define starting state x0 = np.array([-8, 9]) # load model predictor nb_states = 2 nb_actions = 1 wb_model = WB_Model(dt) # create the SUSD learner Q = np.eye(2) R = np.array([1]) K0 = np.array([0.1, 0.1]) N_rollout = n[idn] T_rollout = 20 S = SUSD(wb_model, Q, R, K0, N_rollout, T_rollout, alpha=0.35, term_len=100) # estimate optimal K using SUSD conveged, iters = S.search(x0, max_iter=100) K = S.K.reshape((nb_actions, nb_states)) print "SUSD Search took", iters, "iterations" print "Estimated K:"; print K z_list[idn] = S.z_buf cost_plot(z_list, n) plt.show()
nb_states = 4 nb_actions = 2 WB_model = WB_Model(dt) # create the SUSD learner Q = np.eye(4) R = np.eye(2) K0 = np.array([[-2., -2., -2., -2.], [-2., -2., -2., -2.]]) N_agents = 30 T_rollout = 10 S = SUSD(WB_model, Q, R, K0, N_agents, T_rollout, dt=dt, alpha=0.1, term_len=200) # compute the optimal K A, B = WB_model.system() sys = control.StateSpace(A, B, np.eye(4), np.zeros((4, 2)), 1) K_lqr, _, _ = control.lqr(sys, Q, R) # estiamte optimal K using SUSD conveged, iters = S.search(x0, r=0.001, max_iter=100000) K = S.K.reshape((nb_actions, nb_states)) print "SUSD Search took", iters, "iterations"
R = np.array([1]) K0 = np.array([-10., -10.]) N_points = 10 N_trajectories = 100 N_agents = 5 T_rollout = 10 RS = Random_Search(WB_model, Q, R, K0.copy(), N_points, T_rollout, 0.01 * np.eye(2), dt=dt) S = SUSD(WB_model, Q, R, K0.copy(), N_agents, T_rollout, dt=dt, alpha=0.9) PG = Policy_Gradient(WB_model, Q, R, K0.copy(), N_trajectories, T_rollout, dt=dt, alpha=9E-1) # compute the optimal K A, B = WB_model.system() K_lqr, _, _ = control.lqr(A, B, Q, R) print "LQR Gains:", -K_lqr # estimate optimal K using FD
N_agents = 30 T_rollout = 10 RS = Random_Search(WB_model, Q, R, K0.copy(), N_points, T_rollout, 0.5 * np.eye(12), dt=dt) S = SUSD(WB_model, Q, R, K0.copy(), N_agents, T_rollout, dt=dt, alpha=0.1, term_len=100) PG = Policy_Gradient(WB_model, Q, R, K0.copy(), N_trajectories, T_rollout, dt=dt, alpha=0.8) # compute the optimal K A, B = WB_model.system()
# load model predictor nb_states = 2 nb_actions = 1 model = BB_Model(nb_states, nb_actions) model.load_model(path='linear_said_') WB_model = WB_Model(dt) # create the SUSD learner Q = np.eye(2) R = np.array([1]) K0 = np.array([-10., -10.]) N_rollout = 3 T_rollout = 30 S = SUSD(WB_model, Q, R, K0, N_rollout, T_rollout) # compute the optimal K A, B = WB_model.system() K_lqr, _, _ = control.lqr(A, B, Q, R) # estiamte optimal K using SUSD conveged, iters = S.search(x0) K = S.K.reshape((nb_actions, nb_states)) print "SUSD Search took", iters, "iterations" print "Estimated K:", K print "LQR K:", -K_lqr # simulate the trajectory x = np.zeros([nb_states, Nsteps])
# load model predictor nb_states = 2 nb_actions = 2 bb_model = BB_Model(nb_states, nb_actions) loaded = bb_model.load_model(path='linear_2D_') if loaded: print("2 Input Linear model loaded!") WB_model = WB_Model(dt) # create the SUSD learner Q = np.eye(2) R = np.eye(2) K0 = np.array([[-1., -1.], [-1., -1.]]) N_agents = 10 T_rollout = 15 S = SUSD(bb_model, Q, R, K0, N_agents, T_rollout) # compute the optimal K A, B = WB_model.system() K_lqr, _, _ = control.lqr(A, B, Q, R) # estiamte optimal K using SUSD conveged, iters = S.search(x0) K = S.K.reshape((nb_actions, nb_states)) print "SUSD Search took", iters, "iterations" print "Estimated K:", K print "LQR K:", -K_lqr # simulate the trajectory x = np.zeros([nb_states, Nsteps])