예제 #1
0
def run(num_envs=16,
        hidden_dim=256,
        batch_size=1024,
        iterations=1000,
        log_interval=10,
        runs=1):
    envs = [tl.make_nh_waypoint_3d() for i in range(num_envs)]
    envs = SubprocVecEnv(envs)
    t_env = tenv.WaypointEnv3D()
    state_dim = t_env.observation_space.shape[0]
    action_dim = t_env.action_space.shape[0]
    path = os.getcwd() + "/nh_waypoint_3d/"
    for i in range(runs):
        agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=3)
        opt = torch.optim.Adam(agent.parameters(), lr=1e-4)
        ep, rew, agent = tl.train_mp(envs,
                                     t_env,
                                     agent,
                                     opt,
                                     batch_size,
                                     iterations,
                                     log_interval,
                                     render=False,
                                     fname=path + "gaussian_" + str(2))
        if i == 0:
            csv_input = pd.DataFrame()
            csv_input["timesteps"] = ep
        csv_input["run" + str(i)] = rew
        csv_input.to_csv(path + "data.csv", index=False)
예제 #2
0
def run(hidden_dim=256):
    path = os.getcwd()+"/nh_waypoint_3d/"
    fname = path+"gaussian_2_term.pth.tar"
    t_env = tenv.WaypointEnv3D()
    state_dim = t_env.observation_space.shape[0]
    action_dim = t_env.action_space.shape[0]
    agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=3)
    print("Agent initialized, loading state dictionary.")
    agent.load_state_dict(torch.load(fname, map_location=lambda storage, loc: storage))
    print()
    print("State dictionary loaded")
    k = [tl.test(t_env, agent, render=True) for _ in range(100)]
    rewards = sum(k)/len(k)
    print("Mean reward: ", rewards)
예제 #3
0
def run(hidden_dim=256):
    path = os.getcwd() + "/_2d/soft_2d/"
    fname = path + "3-wps_.pth.tar"
    t_env = tenv.TrajectoryEnv2D()
    state_dim = t_env.observation_space.shape[0]
    action_dim = t_env.action_space.shape[0]
    agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=2, lookahead=3)
    print("Agent initialized, loading state dictionary.")
    agent.load_state_dict(
        torch.load(fname, map_location=lambda storage, loc: storage))
    print()
    print("State dictionary loaded")
    k = [tl.test(t_env, agent, render=True) for _ in range(100)]
    rewards = sum(k) / len(k)
    print("Mean reward: ", rewards)
예제 #4
0
def run(num_envs=16, hidden_dim=256, batch_size=1024, iterations=1000, log_interval=10, runs=3):
    envs = [tl.make_traj_2d() for i in range(num_envs)]
    envs = SubprocVecEnv(envs)
    t_env = tenv.TrajectoryEnv2D()

    t_env.num_fut_wp = int(cfg.waypoints-1)
    state_size = 10+7*(t_env.num_fut_wp+1)
    t_env.observation_space = gym.spaces.Box(-1, 1, shape=(state_size,))

    state_dim = t_env.observation_space.shape[0]
    action_dim = t_env.action_space.shape[0]
    path = os.getcwd()+"/_2d/traj_2d/"
    for i in range(runs):
        agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=2, lookahead=lookahead)
        opt = torch.optim.Adam(agent.parameters(), lr=cfg.lr)
        ep, rew, agent = tl.train_mp(envs, t_env, agent, opt, batch_size, iterations, log_interval, render=False, fname=path+wps+"-wps_")
        if i == 0:
            csv_input = pd.DataFrame()
            csv_input["timesteps"] = ep
        csv_input["run"+str(i)] = rew
        csv_input.to_csv(path+"data_wp-"+wps+".csv", index=False)
예제 #5
0
def run(hidden_dim=256):
    path = os.getcwd() + "/waypoint_3d/"
    fname = path + "gaussian_2_term.pth.tar"
    t_env = tenv.WaypointEnv3D()
    state_dim = t_env.observation_space.shape[0]
    action_dim = t_env.action_space.shape[0]
    agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=3)
    print("Agent initialized, loading state dictionary.")
    agent.load_state_dict(
        torch.load(fname, map_location=lambda storage, loc: storage))
    print()
    print("State dictionary loaded")
    uvws, pqrs, ts = test(t_env, agent)

    fig = plt.figure(figsize=(5, 5))
    ax = fig.add_subplot(111)
    ax.plot(ts, uvws)
    ax.set_xlabel('time (s)')
    ax.set_ylabel('u (m/s)')
    ax.set_xlim([0, 3])
    ax.set_ylim([-1.5, 1.5])
    plt.savefig('./figures/value_density.png')
    print("figure saved")
예제 #6
0
def run(hidden_dim=256):
    path = os.getcwd() + "/waypoint_2d/"
    fname = path + "gaussian_2_term.pth.tar"
    t_env = tenv.WaypointEnv2D()

    state_dim = t_env.observation_space.shape[0]
    action_dim = t_env.action_space.shape[0]

    agent = ag.Agent(state_dim, hidden_dim, action_dim)
    print("Agent initialized, loading state dictionary.")
    agent.load_state_dict(
        torch.load(fname, map_location=lambda storage, loc: storage))
    print()
    print("State dictionary loaded")

    xs = np.linspace(0, 3, 30)
    ys = np.linspace(-1.5, 1.5, 30)
    XS, YS = np.meshgrid(xs, ys)
    VALUE = np.zeros(XS.shape)

    for i, x in enumerate(xs):
        for j, y in enumerate(ys):
            arr = np.array([x, y])
            state = torch.Tensor(arr).to(device)
            value = agent.get_integrated_value(state).item()
            VALUE[i, j] = exp(-value)

    fig = plt.figure(figsize=(5, 5))
    ax = fig.add_subplot(111)
    ax.pcolormesh(XS, YS, VALUE, cmap="plasma")
    ax.set_xlabel('body x distance (m)')
    ax.set_ylabel('body y distance (m)')
    ax.set_xlim([0, 3])
    ax.set_ylim([-1.5, 1.5])
    plt.savefig('./figures/value_density.png')
    print("figure saved")