Ejemplo n.º 1
0
 def _make_env():
     env = gym.make(config['env.id'])
     env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
     env = TimeLimit(env, env.spec.max_episode_steps)
     if config['env.clip_action'] and isinstance(env.action_space, Box):
         env = ClipAction(env)
     return env
Ejemplo n.º 2
0
def test_episode_runner(env_id, num_env, init_seed, T):    
    if env_id == 'Sanity':
        make_env = lambda: TimeLimit(SanityEnv())
    else:
        make_env = lambda: gym.make(env_id)
    env = make_vec_env(make_env, num_env, init_seed)
    env = VecStepInfo(env)
    agent = RandomAgent(None, env, None)
    runner = EpisodeRunner()
    
    if num_env > 1:
        with pytest.raises(AssertionError):
            D = runner(agent, env, T)
    else:
        with pytest.raises(AssertionError):
            runner(agent, env.env, T)  # must be VecStepInfo
        D = runner(agent, env, T)
        for traj in D:
            assert isinstance(traj, Trajectory)
            assert len(traj) <= env.spec.max_episode_steps
            assert traj.numpy_observations.shape == (len(traj) + 1, *env.observation_space.shape)
            if isinstance(env.action_space, gym.spaces.Discrete):
                assert traj.numpy_actions.shape == (len(traj),)
            else:
                assert traj.numpy_actions.shape == (len(traj), *env.action_space.shape)
            assert traj.numpy_rewards.shape == (len(traj),)
            assert traj.numpy_dones.shape == (len(traj), )
            assert traj.numpy_masks.shape == (len(traj), )
            assert len(traj.step_infos) == len(traj)
            if traj.completed:
                assert np.allclose(traj.observations[-1], traj.step_infos[-1]['last_observation'])
Ejemplo n.º 3
0
def test_trajectory(init_seed, T):
    make_env = lambda: TimeLimit(SanityEnv())
    env = make_vec_env(make_env, 1, init_seed)  # single environment
    env = VecStepInfo(env)
    D = Trajectory()
    assert len(D) == 0
    assert not D.completed
    
    observation, _ = env.reset()
    D.add_observation(observation)
    for t in range(T):
        action = [env.action_space.sample()]
        next_observation, reward, step_info = env.step(action)
        # unbatched for [reward, step_info]
        reward, step_info = map(lambda x: x[0], [reward, step_info])
        if step_info.last:
            D.add_observation([step_info['last_observation']])
        else:
            D.add_observation(next_observation)
        D.add_action(action)
        D.add_reward(reward)
        D.add_step_info(step_info)
        observation = next_observation
        if step_info.last:
            with pytest.raises(AssertionError):
                D.add_observation(observation)
            break
    assert len(D) > 0
    assert len(D) <= T
    assert len(D) + 1 == len(D.observations)
    assert len(D) + 1 == len(D.numpy_observations)
    assert len(D) == len(D.actions)
    assert len(D) == len(D.numpy_actions)
    assert len(D) == len(D.rewards)
    assert len(D) == len(D.numpy_rewards)
    assert len(D) == len(D.numpy_dones)
    assert len(D) == len(D.numpy_masks)
    assert np.allclose(np.logical_not(D.numpy_dones), D.numpy_masks)
    assert len(D) == len(D.step_infos)
    if len(D) < T:
        assert step_info.last
        assert D.completed
        assert D.reach_terminal
        assert not D.reach_time_limit
        assert np.allclose(D.observations[-1], [step_info['last_observation']])
    if not step_info.last:
        assert not D.completed
        assert not D.reach_terminal
        assert not D.reach_time_limit
Ejemplo n.º 4
0
 def _make_env():
     env = gym.make(config['env.id'])
     env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
     env = TimeLimit(env, env.spec.max_episode_steps)
     env = NormalizeAction(env)
     return env