def lunar_illustration(): env = gym.make('LunarLanderContinuous-v2') env._max_episode_steps = 1000 # We don't want it to time out. agent = build_lunar_lander(env) env = VideoMonitor(env) stats, score, traj = train(env, agent, return_trajectory=True, num_episodes=10) env.close() states = traj[0].state plt.plot(states[:, 0], label='x') plt.plot(states[:, 1], label='y') plt.plot(states[:, 2], label='vx') plt.plot(states[:, 3], label='vy') plt.plot(states[:, 4], label='theta') plt.plot(states[:, 5], label='vtheta') plt.legend() plt.grid() plt.ylim(-1.1, 1.1) plt.title('PID Control') plt.ylabel('Value') plt.xlabel('Steps') savepdf("pid_lunar_trajectory") plt.show()
def partB_discretization(): """ Part 2: Comparing Euler discretization and Exponential discretization for the Harmonic Oscillator. See (Her21, Subsection 8.2.6), and keep in mind that the following is the level of precision that your discrete planning method will experience. We will therefore use the > model.f_discrete(x, u) method which corresponds to f_k(x_k, u_k). """ plt.figure() N = 20 env = HarmonicOscilatorEnvironment(dt=T / N, k=k, m=m) # By default, the linear models will default to exponential discretization. dmodel = env.discrete_model # Get the discrete model; this is what we will use for planning. x_ei = [dmodel.reset()] # Get starting state steps = N u = [0] # No action for i in range(N): x_ei.append(dmodel.f_discrete(x_ei[-1], u)) x_ei = np.stack(x_ei) # Plotting: ts, x_true = solve_harmonic(dmodel.continuous_model, d=dmodel.reset()[0], tF=100) plt.plot(ts, x_true, 'k-', label="True solution") plt.plot(np.linspace(0, T, x_ei.shape[0]), x_ei[:, 0], 'o', label=f"using $f_k$ (Exponential integrator), $N={steps}$") # Same as above, but force system to use the Euler discretization N = 500 env = HarmonicOscilatorEnvironment(dt=T / N, k=k, m=m, discretization_method='euler', Tmax=100) # By default, the linear models will default to exponential discretization. dmodel = env.discrete_model # Get the discrete model; this is what we will use for planning. x_euler = [dmodel.reset()] # Get starting state steps = N u = [0] # No action for i in range(N): x_euler.append(dmodel.f_discrete(x_euler[-1], u)) x_euler = np.stack(x_euler) plt.plot(np.linspace(0, T, x_euler.shape[0]), x_euler[:, 0], '-',label=f"Using Euler discretization, $N={steps}$") setax() plt.legend() savepdf("harmonicC.pdf") plt.show()
def pid_explicit(): env = LocomotiveEnvironment(m=70, slope=0, dt=0.05, Tmax=15) # Can also visualize #from irlc import VideoMonitor #env = VideoMonitor(env) # Continue pid = PID(dt=0.05, Kp=40, Kd=0, Ki=0, target=0) x = [env.reset()] for _ in range(200): # Simulate for 200 steps, i.e. 0.05 * 200 seconds. x_cur = x[-1] # last state [position, velocity] # Use pid class to compute action u = pid.pi(x_cur[0]) u = np.clip(u, -100, 100) # clip actions. xp_, reward, done, _ = env.step(u) x.append(xp_) x = np.stack(x) plt.plot(x[:, 0], 'k-', label="PID state trajectory") savepdf("pid_basic") plt.show()
def partA_simulation(): """ Setup environment and obtain the path of the true solution """ env = HarmonicOscilatorEnvironment(dt=2, k=k, m=m, discretization_method='euler') model = env.discrete_model.continuous_model x0 = env.reset() N = 100 ts, x_true = solve_harmonic(model, d=x0[0], tF=100) """ Part 1: Obtain simulation paths using Euler and RK4 integration. See (Her21, Section 8.1) """ N = 500 plt.plot(ts, x_true, 'k-', label="True solution") x, u, tt = model.simulate(x0=x0, u_fun=0, t0=0, tF=T, N_steps=N, method='euler') plt.plot(tt, x[:, 0], '-', label=f"Euler integration, $N={N}$") N = 40 x, u, tt = model.simulate(x0=x0, u_fun=0, t0=0, tF=T, N_steps=N, method='rk4') # RK4 is the default plt.plot(tt, x[:, 0], 'o-', label=f"RK4 integration, $N={N}$") setax() plt.legend() savepdf("harmonicB.pdf") plt.show()
def cartpole_balance(RecedingHorizon=True, sigma=0.0, time_horizon_length=30, ddp=False): N = 300 ## Simulation Time steps np.random.seed(1) dt = 0.05 pole_length = 1.0 action_size = 1 max_force = 5.0 goal_angle = 0 init_angle = np.pi x_goal = np.array([0.0, 0.0, np.sin(goal_angle), np.cos(goal_angle), 0.0]) x0 = np.array([0, 0, np.sin(init_angle), np.cos(init_angle), 0]) Q = np.diag([1, 0.01, 30, 30, 0.01]) # diag([1.0, 1.0, 1.0, 1.0, 1.0]) R = 0.01 * np.eye(action_size) cost = QRCost(Q, R, QN=None, x_goal=x_goal) env = CartpoleSinCosEnvironment(dt=dt, cost=cost, l=pole_length, min_bounds=-max_force, max_bounds=max_force, ddp=ddp) x = [ x0, ] x_model = [ x0, ] x_current, x_ = x0, x0 u = np.zeros((N, 1)) for i in range(N): if RecedingHorizon: xs, us, J_hist = ilqr(env, time_horizon_length, x_current, n_iter=300, use_linesearch=True, verbose=False, ddp=ddp) u[i] = np.copy(us[0]) elif i == 0: xs, us, J_hist = ilqr(env, N + 1, x_current, n_iter=2000, use_linesearch=True, verbose=True, ddp=ddp) u = np.copy(us) x = np.vstack( (np.asarray(x0), env.step(x0, us + np.random.normal(0, sigma, N + 1).reshape(-1, 1), N_steps=N))) x_model = xs[:-1] if RecedingHorizon: """ Euler dynamics = bad """ x_ = env.f(x_current, u[i], i) x_model.append(x_) """ Truer dynamics RK4 = good """ us += np.random.normal(0, sigma, time_horizon_length).reshape(-1, 1) x_current = env.step(x_current, us, 1)[0] x.append(x_current) #env.render(x_current) print(f"Iteration {i}, x={x[i][0]}") #render_(x_model,env) #env.viewer.close() render_(x, env) env.viewer.close() import os, sys os.chdir(sys.path[0]) ss = "ddp" if ddp else "ilqr" ss1 = "mpc" if RecedingHorizon else "no_mpc" pickle.dump(x, open(f"Trajectories/xs_{ss}_{ss1}300.pkl", "wb")) pickle.dump(u, open(f"Trajectories/us_{ss}_{ss1}300.pkl", "wb")) pickle.dump(x_model, open(f"Trajectories/xs_predicted_{ss}_{ss1}300.pkl", "wb")) plt.plot(np.squeeze(x)[:, 4]) plt.plot(np.squeeze(u)) plt.legend(["Angle", "Action"], ) plt.title(ss + " " + ss1) from irlc import savepdf savepdf(f"cartpole_{ss}") plt.show()