def run_ilqr_controller(env, render_flag, sim_env, tN): env.reset() if render_flag: env.render() time.sleep(0.01) total_reward = 0 num_steps = 0 q = [env.q] dq = [env.dq] u = [] total_cost = [] while True: U, cost = ilqr.calc_ilqr_input(env, sim_env, tN) total_cost = total_cost + cost action = U[0] u.append(np.copy(action)) _, reward, is_terminal, _ = env._step(action) q.append(np.copy(env.q)) dq.append(np.copy(env.dq)) if render_flag: env.render() time.sleep(0.01) total_reward += reward num_steps += 1 if is_terminal: break return q, dq, u, total_reward, num_steps, total_cost
def control_ilqr(env_name="TwoLinkArm-v0"): env, sim_env = gym.make(env_name), gym.make(env_name) U, X, costs, rewards = ilqr.calc_ilqr_input(env, sim_env, tN=100, max_iter=1000000) plot_costs_ilqr(costs, "iLQR: " + env_name) plot_rewards_ilqr(rewards, "iLQR: " + env_name) plot_states_and_control_ilqr(X, U, "iLQR: " + env_name) print("\nShowing optimal trajectory") final_reward = show_optimal_trajectory(env, U) print("Total Reward for optimal trajectory: {}".format(final_reward))
import matplotlib.patches as mpatches env_name = 'TwoLinkArm-v1' env = gym.make(env_name) sim_env = gym.make(env_name) initial_state = env.reset() total_cost = 0 num_steps = 0 tN = 50 max_iter = 100 x0 = copy.copy(env.state) X, U, cost, list_of_costs = calc_ilqr_input(env, sim_env, tN=tN, max_iter=max_iter, x0=x0) R = [] for i in range(tN): print("Control u = {}, reward={}".format(str(U[i]), total_cost)) x_next, cost_i, is_terminal, debug_info = env.step(U[i]) env.render() total_cost += cost_i R.append(cost_i) if is_terminal: break
print(l_uu) print(l_ux.shape) print(l_ux) print('check final cost.....................') ''' fl, flx, flxx = cost_final(env, X[100]) ''' print(fl) print(flx.shape) print(flx) print(flxx.shape) print(flxx) ''' optimal_U = calc_ilqr_input(env, sim_env, tN=100, max_iter=1000) X, cost_list, inter_cost_list, accu_inter_cost_list, inter_cost_sum, final_cost, cost_sum = simulate( env, x0, optimal_U) for i in range(len(optimal_U)): # print(i) # env.render() time.sleep(0.2) x, reward, done, info = env._step(optimal_U[i]) # print(x) # print(done) # f.write(str(optimal_U[i])+';'+str(x)+';'+str(reward)+';'+str(done)+'\n') f_q.write(str(x) + '\n') f_u.write(str(optimal_U[i]) + '\n') f_q.close()