Esempio n. 1
0
def run(hidden_dim=256):
    path = os.getcwd()+"/nh_waypoint_3d/"
    fname = path+"gaussian_2_term.pth.tar"
    t_env = tenv.WaypointEnv3D()
    state_dim = t_env.observation_space.shape[0]
    action_dim = t_env.action_space.shape[0]
    agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=3)
    print("Agent initialized, loading state dictionary.")
    agent.load_state_dict(torch.load(fname, map_location=lambda storage, loc: storage))
    print()
    print("State dictionary loaded")
    k = [tl.test(t_env, agent, render=True) for _ in range(100)]
    rewards = sum(k)/len(k)
    print("Mean reward: ", rewards)
Esempio n. 2
0
def run(hidden_dim=256):
    path = os.getcwd() + "/_2d/soft_2d/"
    fname = path + "3-wps_.pth.tar"
    t_env = tenv.TrajectoryEnv2D()
    state_dim = t_env.observation_space.shape[0]
    action_dim = t_env.action_space.shape[0]
    agent = ag.Agent(state_dim, hidden_dim, action_dim, dim=2, lookahead=3)
    print("Agent initialized, loading state dictionary.")
    agent.load_state_dict(
        torch.load(fname, map_location=lambda storage, loc: storage))
    print()
    print("State dictionary loaded")
    k = [tl.test(t_env, agent, render=True) for _ in range(100)]
    rewards = sum(k) / len(k)
    print("Mean reward: ", rewards)