def lunar_illustration():
    env = gym.make('LunarLanderContinuous-v2')
    env._max_episode_steps = 1000  # We don't want it to time out.

    agent = build_lunar_lander(env)
    env = VideoMonitor(env)
    stats, score, traj = train(env,
                               agent,
                               return_trajectory=True,
                               num_episodes=10)
    env.close()

    states = traj[0].state
    plt.plot(states[:, 0], label='x')
    plt.plot(states[:, 1], label='y')
    plt.plot(states[:, 2], label='vx')
    plt.plot(states[:, 3], label='vy')
    plt.plot(states[:, 4], label='theta')
    plt.plot(states[:, 5], label='vtheta')
    plt.legend()
    plt.grid()
    plt.ylim(-1.1, 1.1)
    plt.title('PID Control')
    plt.ylabel('Value')
    plt.xlabel('Steps')
    savepdf("pid_lunar_trajectory")
    plt.show()
def partB_discretization():
    """
    Part 2: Comparing Euler discretization and Exponential discretization for the Harmonic Oscillator.
    See (Her21, Subsection 8.2.6), and keep in mind that the following is the level of precision that your 
    discrete planning method will experience. We will therefore use the
    
    > model.f_discrete(x, u) 
    
    method which corresponds to f_k(x_k, u_k).
    """

    plt.figure()
    N = 20
    env = HarmonicOscilatorEnvironment(dt=T / N, k=k, m=m)  # By default, the linear models will default to exponential discretization.
    dmodel = env.discrete_model  # Get the discrete model; this is what we will use for planning.
    x_ei = [dmodel.reset()]  # Get starting state
    steps = N
    u = [0]  # No action
    for i in range(N):
        x_ei.append(dmodel.f_discrete(x_ei[-1], u))
    x_ei = np.stack(x_ei)

    # Plotting:
    ts, x_true = solve_harmonic(dmodel.continuous_model, d=dmodel.reset()[0], tF=100)
    plt.plot(ts, x_true, 'k-', label="True solution")
    plt.plot(np.linspace(0, T, x_ei.shape[0]), x_ei[:, 0], 'o', label=f"using $f_k$ (Exponential integrator), $N={steps}$")

    # Same as above, but force system to use the Euler discretization
    N = 500
    env = HarmonicOscilatorEnvironment(dt=T / N, k=k, m=m, discretization_method='euler', Tmax=100)  # By default, the linear models will default to exponential discretization.
    dmodel = env.discrete_model  # Get the discrete model; this is what we will use for planning.
    x_euler = [dmodel.reset()]  # Get starting state
    steps = N
    u = [0]  # No action
    for i in range(N):
        x_euler.append(dmodel.f_discrete(x_euler[-1], u))
    x_euler = np.stack(x_euler)
    plt.plot(np.linspace(0, T, x_euler.shape[0]), x_euler[:, 0], '-',label=f"Using Euler discretization, $N={steps}$")
    setax()
    plt.legend()
    savepdf("harmonicC.pdf")
    plt.show()
Ejemplo n.º 3
0
def pid_explicit():
    env = LocomotiveEnvironment(m=70, slope=0, dt=0.05, Tmax=15)
    # Can also visualize
    #from irlc import VideoMonitor
    #env = VideoMonitor(env)
    # Continue
    pid = PID(dt=0.05, Kp=40, Kd=0, Ki=0, target=0)
    x = [env.reset()]
    for _ in range(200):  # Simulate for 200 steps, i.e. 0.05 * 200 seconds.
        x_cur = x[-1]  # last state [position, velocity]
        # Use pid class to compute action
        u = pid.pi(x_cur[0])
        u = np.clip(u, -100, 100)  # clip actions.
        xp_, reward, done, _ = env.step(u)
        x.append(xp_)

    x = np.stack(x)
    plt.plot(x[:, 0], 'k-', label="PID state trajectory")
    savepdf("pid_basic")
    plt.show()
def partA_simulation():
    """  Setup environment and obtain the path of the true solution """
    env = HarmonicOscilatorEnvironment(dt=2, k=k, m=m, discretization_method='euler')
    model = env.discrete_model.continuous_model
    x0 = env.reset()
    N = 100
    ts, x_true = solve_harmonic(model, d=x0[0], tF=100)

    """
    Part 1: Obtain simulation paths using Euler and RK4 integration. See (Her21, Section 8.1)
    """
    N = 500
    plt.plot(ts, x_true, 'k-', label="True solution")
    x, u, tt = model.simulate(x0=x0, u_fun=0, t0=0, tF=T, N_steps=N, method='euler')
    plt.plot(tt, x[:, 0], '-', label=f"Euler integration, $N={N}$")

    N = 40
    x, u, tt = model.simulate(x0=x0, u_fun=0, t0=0, tF=T, N_steps=N, method='rk4') # RK4 is the default
    plt.plot(tt, x[:, 0], 'o-', label=f"RK4 integration, $N={N}$")
    setax()
    plt.legend()
    savepdf("harmonicB.pdf")
    plt.show()
def cartpole_balance(RecedingHorizon=True,
                     sigma=0.0,
                     time_horizon_length=30,
                     ddp=False):
    N = 300  ## Simulation Time steps
    np.random.seed(1)
    dt = 0.05
    pole_length = 1.0
    action_size = 1
    max_force = 5.0

    goal_angle = 0
    init_angle = np.pi
    x_goal = np.array([0.0, 0.0, np.sin(goal_angle), np.cos(goal_angle), 0.0])
    x0 = np.array([0, 0, np.sin(init_angle), np.cos(init_angle), 0])
    Q = np.diag([1, 0.01, 30, 30, 0.01])  # diag([1.0, 1.0, 1.0, 1.0, 1.0])
    R = 0.01 * np.eye(action_size)

    cost = QRCost(Q, R, QN=None, x_goal=x_goal)
    env = CartpoleSinCosEnvironment(dt=dt,
                                    cost=cost,
                                    l=pole_length,
                                    min_bounds=-max_force,
                                    max_bounds=max_force,
                                    ddp=ddp)

    x = [
        x0,
    ]
    x_model = [
        x0,
    ]
    x_current, x_ = x0, x0
    u = np.zeros((N, 1))
    for i in range(N):
        if RecedingHorizon:
            xs, us, J_hist = ilqr(env,
                                  time_horizon_length,
                                  x_current,
                                  n_iter=300,
                                  use_linesearch=True,
                                  verbose=False,
                                  ddp=ddp)
            u[i] = np.copy(us[0])

        elif i == 0:
            xs, us, J_hist = ilqr(env,
                                  N + 1,
                                  x_current,
                                  n_iter=2000,
                                  use_linesearch=True,
                                  verbose=True,
                                  ddp=ddp)
            u = np.copy(us)
            x = np.vstack(
                (np.asarray(x0),
                 env.step(x0,
                          us +
                          np.random.normal(0, sigma, N + 1).reshape(-1, 1),
                          N_steps=N)))
            x_model = xs[:-1]

        if RecedingHorizon:
            """ Euler dynamics = bad """
            x_ = env.f(x_current, u[i], i)
            x_model.append(x_)
            """ Truer dynamics RK4 = good """
            us += np.random.normal(0, sigma,
                                   time_horizon_length).reshape(-1, 1)
            x_current = env.step(x_current, us, 1)[0]
            x.append(x_current)

        #env.render(x_current)
        print(f"Iteration {i}, x={x[i][0]}")

    #render_(x_model,env)
    #env.viewer.close()

    render_(x, env)
    env.viewer.close()
    import os, sys
    os.chdir(sys.path[0])

    ss = "ddp" if ddp else "ilqr"
    ss1 = "mpc" if RecedingHorizon else "no_mpc"

    pickle.dump(x, open(f"Trajectories/xs_{ss}_{ss1}300.pkl", "wb"))
    pickle.dump(u, open(f"Trajectories/us_{ss}_{ss1}300.pkl", "wb"))
    pickle.dump(x_model,
                open(f"Trajectories/xs_predicted_{ss}_{ss1}300.pkl", "wb"))
    plt.plot(np.squeeze(x)[:, 4])
    plt.plot(np.squeeze(u))
    plt.legend(["Angle", "Action"], )

    plt.title(ss + " " + ss1)
    from irlc import savepdf
    savepdf(f"cartpole_{ss}")
    plt.show()