x_goal = dynamics.augment_state(np.array([0.0, 0.0, 0.0, 0.0])) # Instantenous state cost. Q = np.eye(dynamics.state_size) Q[0, 0] = 1.0 Q[1, 1] = Q[4, 4] = 0.0 Q[0, 2] = Q[2, 0] = 1.0 Q[2, 2] = Q[3, 3] = 1.0**2 R = 0.1 * np.eye(dynamics.action_size) # Terminal state cost. Q_terminal = 100 * np.eye(dynamics.state_size) # Instantaneous control cost. R = np.array([[0.1]]) cost = PathQRCost(Q, R, x_path=x_path, u_path=u_path) #cost = PathQsRCost(Q,R,x_path=x_path,u_path=u_path) cost = QRCost(Q, R, Q_terminal=Q[0], x_goal=x_goal) # Random initial action path. us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size)) J_hist = [] ilqr = iLQR(dynamics, cost, N - 1) #xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration) controller = RecedingHorizonController(x0, ilqr) count = 0 plt.ion() fig = plt.figure() ax = fig.add_subplot(111)
N = 200 # Number of time steps in trajectory. x0 = np.array([10.0, 10.0, 5.0, 0.0 ]) # Initial state. # Random initial action path. us_init = np.random.uniform(-1, 1, (N, dynamics.action_size)) # In[9]: J_hist = [] ilqr = iLQR(dynamics, cost, N) xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration) cost2 = PathQRCost(Q, R, xs, us) ilqr2 = iLQR(dynamics, cost2, N) cntrl = RecedingHorizonController(x0, ilqr2) plt.ion() count = 0 fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlim([-3, 30]) ax.set_ylim([-3, 30]) x = [] y = []
x0 = x_path[0] x_path = np.array(x_path) u_path = np.array(u_path) # R = 0.05 * np.eye(dynamics.action_size) # +/-10 R = 0.1 * np.eye(dynamics.action_size) R[1,1] = 40.0 R[4,4] = 40.0 R[1,1] = 40.0 R[4,4] = 40.0 print(R) # # cost2 = PathQRCost(Q[0], R, x_path=x_path,u_path=u_path) # # # Random initial action path. us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size)) # J_hist = [] ilqr = iLQR(dynamics, cost2, N-1) xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration) # #Constrain the actions to see what's actually applied to the system. controller = RecedingHorizonController(x0, ilqr) count =0 plt.ion() fig = plt.figure()
]) dynamics = AutoDiffDynamics(f, x_inputs, u_inputs) Q = np.eye(dynamics.state_size) # Q[0, 2] = Q[2, 0] = -1 # Q[1, 3] = Q[3, 1] = -1 R = 0.1 * np.eye(dynamics.action_size) N = 199 # Number of time steps in trajectory. x0 = np.array([10.0, 10.0, 5.0, 0.0]) # Initial state. # Random initial action path. us_init = np.random.uniform(-1, 1, (N, dynamics.action_size)) cost = PathQRCost(Q, R, traj, us_init) J_hist = [] ilqr = iLQR(dynamics, cost, N) xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration) cost2 = PathQRCost(Q, R, xs, us) ilqr2 = iLQR(dynamics, cost2, N) cntrl = RecedingHorizonController(x0, ilqr2) plt.ion() count = 0 fig = plt.figure() ax = fig.add_subplot(111)