def setup(self): J_hist = [] def on_iteration(iteration_count, xs, us, J_opt, accepted, converged): J_hist.append(J_opt) info = "converged" if converged else ("accepted" if accepted else "failed") print("iteration", iteration_count, info, J_opt) def f(x, us, i): max_bounds = 8.0 min_bounds = -8.0 diff = (max_bounds - min_bounds) / 2.0 mean = (max_bounds + min_bounds) / 2.0 us = diff * np.tanh(us) + mean y = Model.runge_integrator(self._model.get_rbdl_model(), x, 0.01, us) return np.array(y) dynamics = FiniteDiffDynamics(f, 12, 6) x_path = [] u_path = [] count = 0 N = self.runner.get_length() while count < self.runner.get_length(): count += 1 self.runner.step() u_path.append(self.runner.ddx.flatten().tolist()) self.x.append(self.runner.x.flatten()) x = self.runner.x.flatten().tolist() + self.runner.dx.flatten().tolist() x_path.append(x) u_path = u_path[:-1] expSigma = self.runner.get_expSigma() size = expSigma[0].shape[0] Q = [np.zeros((size * 2, size * 2))] * len(expSigma) for ii in range(len(expSigma) - 2, -1, -1): Q[ii][:size, :size] = np.linalg.pinv(expSigma[ii]) x0 = x_path[0] x_path = np.array(x_path) us_path = np.array(u_path) us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size)) R = 0.1 * np.eye(dynamics.action_size) cost = PathQsRCost(Q, R, x_path=x_path, u_path=us_path) ilqr = iLQR(dynamics, cost, N - 1) xs, self.us = ilqr.fit(x0, us_init, on_iteration=on_iteration) R = 0.5 * np.eye(dynamics.action_size) cost2 = PathQsRCost(Q, R, x_path=x_path, u_path=self.us) ilqr2 = iLQR(dynamics, cost2, N - 1) self.cntrl = RecedingHorizonController(x0, ilqr2)
def main(T, dt): T = T dt = dt x_init = np.array([1.5, 1.5, -0.05, -0.05]) U_init = np.zeros((T, 2)) Q_f = np.eye(4) # terminal cost Q = np.zeros((4, 4)) # cost matrix for states ilqr_solver = iLQR(f, T, dt, x_init, U_init, compute_quadratic_approx_cost_torque, Q=Q, Q_f=Q_f) threshold = pow(10, -3) U = ilqr_solver.run_algorithm( threshold) # column i of U should be control input at time i # Execute the control sequence traj = execute_trajectory(x_init, U, dt) plot_trajectory(traj)
def setup(self): J_hist = [] def on_iteration(iteration_count, xs, us, J_opt, accepted, converged): J_hist.append(J_opt) info = "converged" if converged else ("accepted" if accepted else "failed") print("iteration", iteration_count, info, J_opt) def f(x, u, i): y = Model.runge_integrator(self._model.get_rbdl_model(), x, 0.01, u) return np.array(y) dynamics = FiniteDiffDynamics(f, 12, 6) x_path = [] u_path = [] count = 0 N = self.runner.get_length() while count < self.runner.get_length(): count += 1 self.runner.step() u_path.append(self.runner.ddx.flatten().tolist()) self.x.append(self.runner.x.flatten()) x = self.runner.x.flatten().tolist() + self.runner.dx.flatten().tolist() x_path.append(x) u_path = u_path[:-1] expSigma = self.runner.get_expSigma() size = expSigma[0].shape[0] Q = [np.zeros((size * 2, size * 2))] * len(expSigma) for ii in range(len(expSigma) - 2, -1, -1): Q[ii][:size, :size] = np.linalg.pinv(expSigma[ii]) x0 = x_path[0] x_path = np.array(x_path) u_path = np.array(u_path) #####change################################!!!!! changed R = 5.0e-4 * np.eye(dynamics.action_size) R[3,3] = 3.0e-3 R[4,4] = 3.0e-3 R[5,5] = 3.0e-3 #R = 0.1 * np.eye(dynamics.action_size) # cost2 = PathQsRCost(Q, R, x_path=x_path, u_path=u_path) # # # Random initial action path. us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size)) # J_hist = [] ilqr = iLQR(dynamics, cost2, N-1) self.xs, self.us = ilqr.fit(x0, us_init, on_iteration=on_iteration)
def main(): # (TODO)Consider a particular example (maybe an example from the paper)? T = 50 nX = 10 # Get a sequence of control input u_1,..., u_T solver = iLQR() U = solver.run_algorithm() # column i of U should be control input at time i X = np.zeros((nX, T)) # (TODO)Execute the control sequence X[:,0] = starting_state for t in range(T - 1): X[:,t+1] = f(X[:,t], U[:,t])
def test_lqr_mujoco(dynamics_cls): """Smoke test for MujcooFiniteDiff{Dynamics,Cost}. Jupyter notebook experiments/mujoco_control.ipynb has quantitative results attained; for efficiency, we only run for a few iterations here.""" env = gym.make("Reacher-v2").unwrapped env.seed(42) env.reset() dynamics = dynamics_cls(env) cost = MujocoFiniteDiffCost(env) N = 10 ilqr = iLQR(dynamics, cost, N) x0 = dynamics.get_state() us_init = np.array([env.action_space.sample() for _ in range(N)]) xs, us = ilqr.fit(x0, us_init, n_iterations=3) assert x0.shape == xs[0].shape assert xs.shape[0] == N + 1 assert us.shape == (N, 2) assert env.action_space.contains(us[0])
def main(T, dt): T = T dt = dt x_init = np.array([1.5, 1.5, -0.05, -0.05]) U_init = np.zeros((T, 2)) Q_f = np.eye(4) # terminal cost Q = np.zeros((4, 4)) # cost matrix for states ilqr_solver = iLQR(f, T, dt, x_init, U_init, compute_quadratic_approx_cost_torque, Q = Q, Q_f = Q_f) threshold = pow(10, -3) U = ilqr_solver.run_algorithm(threshold) # column i of U should be control input at time i # Execute the control sequence traj = execute_trajectory(x_init, U, dt) plot_trajectory(traj)
def control_ilqr(N=1000): pend = Pendulum() dynamics = FiniteDiffDynamics(pend.f, pend.state_size, pend.action_size) # The coefficients weigh how much your state error is worth to you vs # the size of your controls. You can favor a solution that uses smaller # controls by increasing R's coefficient. Q = 100 * np.eye(pend.state_size) R = 50 * np.eye(pend.action_size) Q_terminal = np.diag((1000, 1000, 1000)) cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=pend.x_goal) x0 = pend.initial_state # Initial state. us_init = np.random.uniform(-2, 2, (N, 1)) # Random initial action path. ilqr = iLQR(dynamics, cost, N) xs, us = ilqr.fit(x0, us_init) pend.close() return us
def control_ilqr(): cart = MountainCar() dynamics = FiniteDiffDynamics(cart.f, cart.state_size, cart.action_size) # The coefficients weigh how much your state error is worth to you vs # the size of your controls. You can favor a solution that uses smaller # controls by increasing R's coefficient. Q = 500 * np.eye(cart.state_size) R = 100 * np.eye(cart.action_size) Q_terminal = np.eye(cart.state_size)*1000 cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=cart.x_goal) # cost = BatchAutoDiffCost(cart.cost_function, cart.state_size, cart.action_size) N = 1000 # Number of time-steps in trajectory. x0 = cart.initial_state # Initial state. us_init = np.random.uniform(-1, 1, (N, 1)) # Random initial action path. ilqr = iLQR(dynamics, cost, N) xs, us = ilqr.fit(x0, us_init) cart.close() return us
def run_IterLinQuadReg_matrix(self, A, B, C, dist_info_sharing='AM', us_init=None): x_input, u_input = self.state_inputs() if np.ndim(A) != 2: if dist_info_sharing == 'GM': A = gmean(A, axis=0) B = gmean(B, axis=0) C = gmean(C, axis=0) print(A.shape, 'A', B.shape, 'B', C.shape, 'C') elif dist_info_sharing == 'AM': A = np.sum(A, axis=0) / A.shape[0] B = np.sum(B, axis=0, keepdims=True) / B.shape[0] B = B.T C = np.sum(C, axis=0) / C.shape[0] else: pass f = self.next_states_matrix(x_input, u_input, A, B, C) dynamics = AutoDiffDynamics(f, x_input, u_input) x_goal = self.augment_state(self.x_goal) if self.Q_terminal.all() == None: cost = QRCost(self.Q, self.R) else: cost = QRCost(self.Q, self.R, Q_terminal=self.Q_terminal, x_goal=x_goal) x0 = self.augment_state(self.x0) if us_init == None: us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size)) ilqr = iLQR(dynamics, cost, self.N) xs, us = ilqr.fit(x0, us_init, on_iteration=self.on_iteration) return xs, us
def run_IterLinQuadReg(self, us_init=None): x_input, u_input = self.state_inputs() x_dot_dot, theta_dot_dot, theta_prime = self.accel( x_input, u_input, self.xd) f = self.next_states(x_input, x_dot_dot, theta_dot_dot, theta_prime) dynamics = AutoDiffDynamics(f, x_input, u_input, hessians=False) x_goal = self.augment_state(self.x_goal) if self.Q_terminal.all() == None: cost = QRCost(self.Q, self.R) else: cost = QRCost(self.Q, self.R, Q_terminal=self.Q_terminal, x_goal=x_goal) x0 = self.augment_state(self.x0) if us_init == None: us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size)) ilqr = iLQR(dynamics, cost, self.N, hessians=False) xs, us = ilqr.fit(x0, us_init, on_iteration=self.on_iteration, n_iterations=1000) # print(ilqr._K,'this is capital K') return xs, us, ilqr._k, ilqr._K
def main_fuc(R0): # print(R0) R1 = R0[0] R2 = R0[1] R[0, 0] = R1 R[1, 1] = R1 R[2, 2] = R1 R[3, 3] = R2 R[4, 4] = R2 R[5, 5] = R2 us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size)) cost = PathQsRCost(Q, R, x_path=x_path, u_path=u_path) #print(cost) J_hist = [] ilqr = iLQR(dynamics, cost, N - 1) xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration) # len_us = len(us) # print(f"len(us)={len_us}") # print(us) # print(xs) # print(us) cost2 = PathQsRCost(Q, R, x_path, us) ilqr2 = iLQR(dynamics, cost2, N - 1) cntrl = RecedingHorizonControllerPath(x0, ilqr2) # print(J_hist) ####### if want to smooth J_hist # plt.figure() # plt.plot(J_hist) # plt.xlabel('Iteration') # plt.ylabel('Cost') # plt.grid() # plt.show( plt.ion() count = 0 fig = plt.figure() ax1 = fig.add_subplot(321) ax1.set_xlim([0, 177]) ax1.set_ylim([-0.4, 0.6]) ax2 = fig.add_subplot(323) ax2.set_xlim([0, 177]) ax2.set_ylim([-0.2, 2.0]) ax3 = fig.add_subplot(325) ax3.set_xlim([0, 177]) ax3.set_ylim([-0.3, 0.3]) ax4 = fig.add_subplot(322) ax4.set_xlim([0, 177]) ax4.set_ylim([-0.8, 0.6]) ax5 = fig.add_subplot(324) ax5.set_xlim([0, 177]) ax5.set_ylim([0.0, 1.7]) ax6 = fig.add_subplot(326) ax6.set_xlim([0, 177]) ax6.set_ylim([-0.3, 0.3]) x1 = [] y1 = [] x1_follow = [] y1_follow = [] x2 = [] y2 = [] x2_follow = [] y2_follow = [] x3 = [] y3 = [] x3_follow = [] y3_follow = [] x4 = [] y4 = [] x4_follow = [] y4_follow = [] x5 = [] y5 = [] x5_follow = [] y5_follow = [] x6 = [] y6 = [] x6_follow = [] y6_follow = [] us3 = [] line1, = ax1.plot(x1, y1, 'r-') line2, = ax1.plot(x1_follow, y1_follow, 'b-') line3, = ax2.plot(x2, y2, 'r-') line4, = ax2.plot(x2_follow, y2_follow, 'b-') line5, = ax3.plot(x3, y3, 'r-') line6, = ax3.plot(x3_follow, y3_follow, 'b-') line7, = ax4.plot(x4, y4, 'r-') line8, = ax4.plot(x4_follow, y4_follow, 'b-') line9, = ax5.plot(x5, y5, 'r-') line10, = ax5.plot(x5_follow, y5_follow, 'b-') line11, = ax6.plot(x6, y6, 'r-') line12, = ax6.plot(x6_follow, y6_follow, 'b-') #print(sys.getsizeof(cntrl.control(us))) # # with open('test.npy', 'wb') as f: # np.save(f, us) # print(len(us)) # print('step1') for xs2, us2 in tqdm(cntrl.control(us)): # print(us2[0]) # print(type(us2[0])) us3.append(us2[0]) x1.append(count) y1.append(xs2[0][0]) x1_follow.append(count) y1_follow.append(x_path[count][0]) x2.append(count) y2.append(xs2[0][1]) x2_follow.append(count) y2_follow.append(x_path[count][1]) x3.append(count) y3.append(xs2[0][2]) x3_follow.append(count) y3_follow.append(x_path[count][2]) x4.append(count) y4.append(xs2[0][3]) x4_follow.append(count) y4_follow.append(x_path[count][3]) x5.append(count) y5.append(xs2[0][4]) x5_follow.append(count) y5_follow.append(x_path[count][4]) x6.append(count) y6.append(xs2[0][5]) x6_follow.append(count) y6_follow.append(x_path[count][5]) # print(xs2[0][0:6]) # print(x_path[count][0:6]) count += 1 cntrl.set_state(xs2[1]) line1.set_ydata(y1) line1.set_xdata(x1) line2.set_ydata(y1_follow) line2.set_xdata(x1_follow) line3.set_ydata(y2) line3.set_xdata(x2) line4.set_ydata(y2_follow) line4.set_xdata(x2_follow) line5.set_ydata(y3) line5.set_xdata(x3) line6.set_ydata(y3_follow) line6.set_xdata(x3_follow) line7.set_ydata(y4) line7.set_xdata(x4) line8.set_ydata(y4_follow) line8.set_xdata(x4_follow) line9.set_ydata(y5) line9.set_xdata(x5) line10.set_ydata(y5_follow) line10.set_xdata(x5_follow) line11.set_ydata(y6) line11.set_xdata(x6) line12.set_ydata(y6_follow) line12.set_xdata(x6_follow) fig.canvas.draw() fig.canvas.flush_events() us = us[1:] if count == 177: break plt.ioff() # plt.show() KL1 = measure_d(y1_follow, y1) KL2 = measure_d(y2_follow, y2) KL3 = measure_d(y3_follow, y3) KL4 = measure_d(y4_follow, y4) KL5 = measure_d(y5_follow, y5) KL6 = measure_d(y6_follow, y6) KL = KL1 + KL2 + KL3 + KL4 + KL5 + KL6 fname = f'R1_{R1}_R2_{R2}_KL_{KL}.png' s_path = f'./figures_new/{fname}' plt.savefig(s_path) plt.close('all') print(KL) # if KL > KL0: # KL = KL0 gc.collect() return KL
Q[2, 2] = Q[3, 3] = 1.0**2 R = 0.1 * np.eye(dynamics.action_size) # Terminal state cost. Q_terminal = 100 * np.eye(dynamics.state_size) # Instantaneous control cost. R = np.array([[0.1]]) cost = PathQRCost(Q, R, x_path=x_path, u_path=u_path) #cost = PathQsRCost(Q,R,x_path=x_path,u_path=u_path) cost = QRCost(Q, R, Q_terminal=Q[0], x_goal=x_goal) # Random initial action path. us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size)) J_hist = [] ilqr = iLQR(dynamics, cost, N - 1) #xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration) controller = RecedingHorizonController(x0, ilqr) count = 0 plt.ion() fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlim([4, 12]) ax.set_ylim([5, 12]) x = [] y = [] line1, = ax.plot(x, y, 'r-') for xs2, us2 in controller.control(us_init):
lqr.solve() lqr_states, lqr_controls, lqr_costs = lqr.forward_pass(x0) #Us2 = np.random.random((T, control_dim)) Us2 = np.ones((T, control_dim)) #Us2 = np.asarray(lqr_controls) xt = x0.copy(); Xs2 = xt; for t in range(0, T-1): xt1 = dyn_func(xt, Us2[t]) Xs2 = np.vstack((Xs2, xt1)) xt = xt1 #Xs = lqr_states; Us = lqr_controls; Xs = Xs2; Us = Us2; original_ilqr_cost = np.sum([cost_func(x, u) for (x,u) in zip(Xs, Us)]) ilqr = iLQR(dyn_func, cost_func, Xs, Us); for _ in range(1): ilqr.backwards_pass() ilqr_states, ilqr_controls, ilqr_costs = ilqr.forward_pass(x0, update=True) for t in range(T): lqr_x = lqr_states[t] lqr_u = lqr_controls[t] ilqr_x = ilqr_states[t] ilqr_u = ilqr_controls[t] print("t={}, xlqr: {}".format(t, lqr_x)) print(" {}, xilqr: {} ".format(t, ilqr_x)); print(" {}, xilqr_orig: {} ".format(t, Xs[t])); print(" {}, ulqr: {}".format(t, lqr_u)) print(" {}, uilqr: {} ".format(t, ilqr_u));
print(R) # print(u_path) us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size)) # # cost = PathQRCost(Q[0], R, x_path=x_path, u_path=u_path) #print(cost) # # # Random initial action path. # J_hist = [] ilqr = iLQR(dynamics, cost, N-1) xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration) # len_us = len(us) # print(f"len(us)={len_us}") # print(us) # print(xs) #R = 0.01 * np.eye(dynamics.action_size) # # R[1,1] = 40.0 # R[4,4] = 40.0 # us = np.where(us>max_bounds,max_bounds,us) # us = np.where(us<min_bounds,min_bounds,us)
l_og = X0[:, 0] theta_og = X0[:, 1] x_og = (l_og + a) * np.cos(theta_og) y_og = (l_og + a) * np.sin(theta_og) Fl_og = np.append(U0[:, 0], 0) Ft_og = np.append(U0[:, 1], 0) # %% Generate Costs Q = np.diag([275, 275, 275, 275]) R = np.diag([.1, .1]) Q_terminal = np.diag([1000, 1000, 1000, 1000]) cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=xe) #%% Optimizing Trajectory J_hist = [] ilqr1 = iLQR(dynamics, cost, N) xs, us = ilqr1.fit(x0, U0, on_iteration=on_iteration) us = constrain(us, -max_torque, max_torque) # %% Post Processing l = xs[:, 0] theta = xs[:, 1] l_dot = xs[:, 2] theta_dot = xs[:, 3] Fl = np.append(us[:, 0], 0) Ft = np.append(us[:, 1], 0) x = (l + a + d) * np.cos(theta) y = (l + a + d) * np.sin(theta) ax = plt.figure(1) plt.clf() plt.plot(x, y, label="iLQR")