Exemple #1
0
def test_segment_runner(env_id):
    env = make_vec_env(SerialVecEnv, make_gym_env, env_id, 3, 0, True)
    env_spec = EnvSpec(env)

    agent = RandomAgent(None, env_spec)

    runner = SegmentRunner(None, agent, env)
    D = runner(4)

    assert len(D) == 3
    assert all([isinstance(d, Segment) for d in D])
    assert all([d.T == 4 for d in D])

    # Check if s in transition is equal to s_next in previous transition
    for d in D:
        for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]):
            assert np.allclose(s1.s_next, s2.s)

    # Long horizon
    D = runner(T=1000)
    for d in D:
        assert d.T == 1000

    with pytest.raises(AssertionError):
        env = make_vec_env(SerialVecEnv, make_gym_env, env_id, 3, 0, False)
        SegmentRunner(None, agent, env)
Exemple #2
0
 def test_make_vec_env(self):
     venv1 = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 5, 1,
                          True)
     venv2 = make_vec_env(ParallelVecEnv, make_gym_env, 'CartPole-v1', 5, 1,
                          True)
     assert isinstance(venv1, VecEnv) and isinstance(venv1, SerialVecEnv)
     assert isinstance(venv2, VecEnv) and isinstance(venv2, ParallelVecEnv)
     assert venv1.num_env == venv2.num_env
     env_spec1 = EnvSpec(venv1)
     assert env_spec1.num_env == venv1.num_env
     env_spec2 = EnvSpec(venv2)
     assert env_spec2.num_env == venv2.num_env
     assert venv1.observation_space == venv2.observation_space
     assert venv1.action_space == venv2.action_space
     assert venv1.reward_range == venv2.reward_range
     assert venv1.T == venv2.T
     o1 = venv1.reset()
     o2 = venv2.reset()
     # Two environments should have same random seeds, then same results under same actions
     assert np.allclose(o1, o2)
     a = [1] * 5
     o1, r1, d1, _ = venv1.step(a)
     o2, r2, d2, _ = venv2.step(a)
     assert np.allclose(o1, o2)
     assert np.allclose(r1, r2)
     assert np.allclose(d1, d2)
     assert not venv1.closed
     venv1.close()
     assert venv1.closed
     assert not venv2.closed
     venv2.close()
     assert venv2.closed
Exemple #3
0
def test_equivalence_vec_env():
    venv1 = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 5, 1)
    venv2 = make_vec_env(ParallelVecEnv, make_gym_env, 'CartPole-v1', 5, 1)
    assert venv1.observation_space == venv2.observation_space
    assert venv1.action_space == venv2.action_space
    assert venv1.num_env == venv2.num_env
    obs1 = venv1.reset()
    obs2 = venv2.reset()
    assert np.allclose(obs1, obs2)
    a = [1] * 5
    obs1, rewards1, dones1, _ = venv1.step(a)
    obs2, rewards2, dones2, _ = venv2.step(a)
    assert np.allclose(obs1, obs2)
    assert np.allclose(rewards1, rewards2)
    assert np.allclose(dones1, dones2)
Exemple #4
0
def test_episode_runner(env_id, num_env, init_seed, T):    
    if env_id == 'Sanity':
        make_env = lambda: TimeLimit(SanityEnv())
    else:
        make_env = lambda: gym.make(env_id)
    env = make_vec_env(make_env, num_env, init_seed)
    env = VecStepInfo(env)
    agent = RandomAgent(None, env, None)
    runner = EpisodeRunner()
    
    if num_env > 1:
        with pytest.raises(AssertionError):
            D = runner(agent, env, T)
    else:
        with pytest.raises(AssertionError):
            runner(agent, env.env, T)  # must be VecStepInfo
        D = runner(agent, env, T)
        for traj in D:
            assert isinstance(traj, Trajectory)
            assert len(traj) <= env.spec.max_episode_steps
            assert traj.numpy_observations.shape == (len(traj) + 1, *env.observation_space.shape)
            if isinstance(env.action_space, gym.spaces.Discrete):
                assert traj.numpy_actions.shape == (len(traj),)
            else:
                assert traj.numpy_actions.shape == (len(traj), *env.action_space.shape)
            assert traj.numpy_rewards.shape == (len(traj),)
            assert traj.numpy_dones.shape == (len(traj), )
            assert traj.numpy_masks.shape == (len(traj), )
            assert len(traj.step_infos) == len(traj)
            if traj.completed:
                assert np.allclose(traj.observations[-1], traj.step_infos[-1]['last_observation'])
Exemple #5
0
def test_final_state_from_segment():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    with pytest.raises(AssertionError):
        final_state_from_segment([1, 2, 3])

    D = BatchSegment(env_spec, 4)
    D.obs = np.random.randn(*D.obs.shape)
    D.done.fill(False)

    D.done[0, -1] = True
    D.info[0] = [{}, {}, {}, {'terminal_observation': [0.1, 0.2, 0.3, 0.4]}]

    D.done[1, 2] = True
    D.info[1] = [{}, {}, {'terminal_observation': [1, 2, 3, 4]}, {}]

    D.done[2, -1] = True
    D.info[2] = [{}, {}, {}, {'terminal_observation': [10, 20, 30, 40]}]

    final_states = final_state_from_segment(D)
    assert final_states.shape == (3, ) + env_spec.observation_space.shape
    assert np.allclose(final_states[0], [0.1, 0.2, 0.3, 0.4])
    assert np.allclose(final_states[1], D.numpy_observations[1, -1, ...])
    assert not np.allclose(final_states[1], [1, 2, 3, 4])
    assert np.allclose(final_states[2], [10, 20, 30, 40])

    with pytest.raises(AssertionError):
        final_state_from_episode(D)
Exemple #6
0
def test_terminal_state_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    with pytest.raises(AssertionError):
        terminal_state_from_episode([1, 2, 3])

    D = BatchEpisode(env_spec)
    D.obs[0] = [0.1, 0.2, 1.3]
    D.done[0] = [False, False, True]
    D.info[0] = [{}, {}, {'terminal_observation': 0.3}]

    D.obs[1] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    D.done[1] = [False] * 9

    D.obs[2] = [10, 15]
    D.done[2] = [False, True]
    D.info[2] = [{}, {'terminal_observation': 20}]

    terminal_states = terminal_state_from_episode(D)
    assert terminal_states.shape == (2, ) + env_spec.observation_space.shape
    assert np.allclose(terminal_states[0], 0.3)
    assert np.allclose(terminal_states[1], 20)

    D.done[0][-1] = False
    D.done[2][-1] = False
    assert terminal_state_from_episode(D) is None

    with pytest.raises(AssertionError):
        terminal_state_from_segment(D)
Exemple #7
0
def test_returns_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchEpisode(env_spec)
    D.r[0] = [1, 2, 3]
    D.done[0] = [False, False, True]
    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, False, False, False]
    D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8]
    D.done[2] = [False, False, False, False, False, False, False, True]

    out = returns_from_episode(D, 1.0)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [6, 5, 3, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [15, 14, 12, 9, 5, 0, 0, 0])
    assert np.allclose(out[2], [36, 35, 33, 30, 26, 21, 15, 8])
    del out

    out = returns_from_episode(D, 0.1)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [1.23, 2.3, 3, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [1.2345, 2.345, 3.45, 4.5, 5, 0, 0, 0])
    assert np.allclose(
        out[2], [1.2345678, 2.345678, 3.45678, 4.5678, 5.678, 6.78, 7.8, 8])

    with pytest.raises(AssertionError):
        returns_from_segment(D, 0.1)
Exemple #8
0
def test_returns_from_segment():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchSegment(env_spec, 5)
    D.r[0] = [1, 2, 3, 4, 5]
    D.done[0] = [False, False, False, False, False]
    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, True, False, False]
    D.r[2] = [1, 2, 3, 4, 5]
    D.done[2] = [True, False, False, False, True]

    out = returns_from_segment(D, 1.0)
    assert out.shape == (3, 5)
    assert np.allclose(out[0], [15, 14, 12, 9, 5])
    assert np.allclose(out[1], [6, 5, 3, 9, 5])
    assert np.allclose(out[2], [1, 14, 12, 9, 5])
    del out

    out = returns_from_segment(D, 0.1)
    assert out.shape == (3, 5)
    assert np.allclose(out[0], [1.2345, 2.345, 3.45, 4.5, 5])
    assert np.allclose(out[1], [1.23, 2.3, 3, 4.5, 5])
    assert np.allclose(out[2], [1, 2.345, 3.45, 4.5, 5])

    with pytest.raises(AssertionError):
        returns_from_episode(D, 0.1)
Exemple #9
0
def test_bootstrapped_returns_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchEpisode(env_spec)
    D.r[0] = [1, 2, 3]
    D.done[0] = [False, False, True]
    D.completed[0] = True

    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, False, False, False]
    D.completed[1] = False

    D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8]
    D.done[2] = [False, False, False, False, False, False, False, True]
    D.completed[2] = True

    last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1)

    out = bootstrapped_returns_from_episode(D, last_Vs, 1.0)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [6, 5, 3, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [35, 34, 32, 29, 25, 0, 0, 0])
    assert np.allclose(out[2], [36, 35, 33, 30, 26, 21, 15, 8])
    del out

    out = bootstrapped_returns_from_episode(D, last_Vs, 0.1)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [1.23, 2.3, 3, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [1.2347, 2.347, 3.47, 4.7, 7, 0, 0, 0])
    assert np.allclose(
        out[2], [1.2345678, 2.345678, 3.45678, 4.5678, 5.678, 6.78, 7.8, 8])

    with pytest.raises(AssertionError):
        bootstrapped_returns_from_segment(D, last_Vs, 0.1)
Exemple #10
0
def test_bootstrapped_returns_from_segment():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchSegment(env_spec, 5)
    D.r[0] = [1, 2, 3, 4, 5]
    D.done[0] = [False, False, False, False, False]
    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, True, False, False]
    D.r[2] = [1, 2, 3, 4, 5]
    D.done[2] = [True, False, False, False, True]

    last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1)

    out = bootstrapped_returns_from_segment(D, last_Vs, 1.0)
    assert out.shape == (3, 5)
    assert np.allclose(out[0], [25, 24, 22, 19, 15])
    assert np.allclose(out[1], [6, 5, 3, 29, 25])
    assert np.allclose(out[2], [1, 14, 12, 9, 5])
    del out

    out = bootstrapped_returns_from_segment(D, last_Vs, 0.1)
    assert out.shape == (3, 5)
    assert np.allclose(out[0], [1.2346, 2.346, 3.46, 4.6, 6])
    assert np.allclose(out[1], [1.23, 2.3, 3, 4.7, 7])
    assert np.allclose(out[2], [1, 2.345, 3.45, 4.5, 5])

    with pytest.raises(AssertionError):
        bootstrapped_returns_from_episode(D, last_Vs, 0.1)
Exemple #11
0
def make_env(config, seed):
    def _make_env():
        env = gym.make(config['env.id'])
        env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
        env = TimeLimit(env, env.spec.max_episode_steps)
        env = NormalizeAction(env)
        return env
    env = make_vec_env(_make_env, 1, seed)  # single environment
    return env
Exemple #12
0
def test_make_vec_env(env_id, num_env, init_seed):
    def make_env():
        return gym.make(env_id)

    env = make_vec_env(make_env, num_env, init_seed)
    assert isinstance(env, VecEnv)
    seeds = [x.keywords['seed'] for x in env.list_make_env]
    seeder = Seeder(init_seed)
    assert seeds == seeder(num_env)
Exemple #13
0
def make_env(config, seed):
    def _make_env():
        env = gym.make(config['env.id'])
        env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
        env = TimeLimit(env, env.spec.max_episode_steps)
        if config['env.clip_action'] and isinstance(env.action_space, Box):
            env = ClipAction(env)
        return env
    env = make_vec_env(_make_env, 1, seed)  # single environment
    return env
Exemple #14
0
def test_random_agent():
    env = make_gym_env('Pendulum-v0', 0)
    env_spec = EnvSpec(env)
    agent = RandomAgent(None, env_spec)
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and out['action'].shape == (1,)

    venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(venv)
    agent = RandomAgent(None, env_spec)
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and len(out['action']) == 3 and isinstance(out['action'][0], int)
Exemple #15
0
def test_episode_runner(vec_env, env_id):
    env = make_vec_env(vec_env, make_gym_env, env_id, 3, 0)
    env_spec = EnvSpec(env)

    if env_id == 'CartPole-v1':
        sticky_action = 1
    elif env_id == 'Pendulum-v0':
        sticky_action = [0.1]

    T = 30

    agent = StickyAgent(None, env_spec, sticky_action)
    runner = EpisodeRunner(None, agent, env)
    D = runner(T)

    assert D.N == 3
    assert D.maxT == max(D.Ts)

    seeder = Seeder(0)
    seed1, seed2, seed3 = seeder(3)
    env1 = make_gym_env(env_id, seed1)
    env2 = make_gym_env(env_id, seed2)
    env3 = make_gym_env(env_id, seed3)

    for n, ev in enumerate([env1, env2, env3]):
        obs = ev.reset()
        assert np.allclose(obs, D.observations[n][0])
        assert np.allclose(obs, D.numpy_observations[n, 0, ...])
        for t in range(T):
            obs, reward, done, info = ev.step(sticky_action)

            assert np.allclose(reward, D.rewards[n][t])
            assert np.allclose(reward, D.numpy_rewards[n, t])
            assert np.allclose(done, D.dones[n][t])
            assert done == D.numpy_dones[n, t]
            assert int(not done) == D.masks[n][t]
            assert int(not done) == D.numpy_masks[n, t]

            if done:
                assert np.allclose(obs, D.infos[n][t]['terminal_observation'])
                assert D.completes[n]
                assert np.allclose(0.0, D.numpy_observations[n, t + 1 + 1:,
                                                             ...])
                assert np.allclose(0.0, D.numpy_actions[n, t + 1:, ...])
                assert np.allclose(0.0, D.numpy_rewards[n, t + 1:])
                assert np.allclose(True, D.numpy_dones[n, t + 1:])
                assert np.allclose(0.0, D.numpy_masks[n, t + 1:])
                break
            else:
                assert np.allclose(obs, D.observations[n][t + 1])
Exemple #16
0
def test_gae_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchEpisode(env_spec)
    D.r[0] = [1, 2, 3]
    D.done[0] = [False, False, True]
    D.completed[0] = True

    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, False, False, False]
    D.completed[1] = False

    D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8]
    D.done[2] = [False, False, False, False, False, False, False, True]
    D.completed[2] = True

    all_Vs = [
        torch.tensor([[0.1], [0.5], [1.0]]),
        torch.tensor([[1.1], [1.5], [2.0]]),
        torch.tensor([[2.1], [2.5], [3.0]]),
        torch.tensor([[3.1], [3.5], [4.0]]),
        torch.tensor([[4.1], [4.5], [5.0]]),
        torch.tensor([[5.1], [5.5], [6.0]]),
        torch.tensor([[6.1], [6.5], [7.0]]),
        torch.tensor([[7.1], [7.5], [8.0]])
    ]
    last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1)

    all_Vs = torch.stack(all_Vs, 1)

    out = gae_from_episode(D, all_Vs, last_Vs, 1.0, 0.5)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [3.725, 3.45, 0.9, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [6.40625, 8.8125, 11.625, 15.25, 20.5, 0, 0, 0])
    assert np.allclose(out[2],
                       [5.84375, 7.6875, 9.375, 10.75, 11.5, 11., 8, 0.])
    del out

    out = gae_from_episode(D, all_Vs, last_Vs, 0.1, 0.2)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [1.03256, 1.128, 0.9, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [0.665348, 0.7674, 0.87, 1, 2.5, 0, 0, 0])
    assert np.allclose(out[2], [
        0.206164098, 0.308204915, 0.410245728, 0.5122864, 0.61432, 0.716, 0.8,
        0
    ])

    with pytest.raises(AssertionError):
        gae_from_segment(D, all_Vs, last_Vs, 0.1, 0.2)
Exemple #17
0
def test_trajectory(init_seed, T):
    make_env = lambda: TimeLimit(SanityEnv())
    env = make_vec_env(make_env, 1, init_seed)  # single environment
    env = VecStepInfo(env)
    D = Trajectory()
    assert len(D) == 0
    assert not D.completed
    
    observation, _ = env.reset()
    D.add_observation(observation)
    for t in range(T):
        action = [env.action_space.sample()]
        next_observation, reward, step_info = env.step(action)
        # unbatched for [reward, step_info]
        reward, step_info = map(lambda x: x[0], [reward, step_info])
        if step_info.last:
            D.add_observation([step_info['last_observation']])
        else:
            D.add_observation(next_observation)
        D.add_action(action)
        D.add_reward(reward)
        D.add_step_info(step_info)
        observation = next_observation
        if step_info.last:
            with pytest.raises(AssertionError):
                D.add_observation(observation)
            break
    assert len(D) > 0
    assert len(D) <= T
    assert len(D) + 1 == len(D.observations)
    assert len(D) + 1 == len(D.numpy_observations)
    assert len(D) == len(D.actions)
    assert len(D) == len(D.numpy_actions)
    assert len(D) == len(D.rewards)
    assert len(D) == len(D.numpy_rewards)
    assert len(D) == len(D.numpy_dones)
    assert len(D) == len(D.numpy_masks)
    assert np.allclose(np.logical_not(D.numpy_dones), D.numpy_masks)
    assert len(D) == len(D.step_infos)
    if len(D) < T:
        assert step_info.last
        assert D.completed
        assert D.reach_terminal
        assert not D.reach_time_limit
        assert np.allclose(D.observations[-1], [step_info['last_observation']])
    if not step_info.last:
        assert not D.completed
        assert not D.reach_terminal
        assert not D.reach_time_limit
Exemple #18
0
def test_random_agent(env_id, num_env):
    make_env = lambda: gym.make(env_id)
    env = make_env()
    agent = RandomAgent(None, env, 'cpu')
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert out['raw_action'] in env.action_space
    del env, agent, out
    
    env = make_vec_env(make_env, num_env, 0)
    agent = RandomAgent(None, env, 'cpu')
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert len(out['raw_action']) == num_env
    assert all(action in env.action_space for action in out['raw_action'])
Exemple #19
0
def test_random_policy():
    env = make_gym_env('Pendulum-v0', 0)
    env_spec = EnvSpec(env)
    policy = RandomPolicy(None, env_spec)
    out = policy(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and out['action'].shape == (1, )

    venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v0', 3, 0, False)
    env_spec = EnvSpec(venv)
    policy = RandomPolicy(None, env_spec)
    out = policy(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and len(out['action']) == 3 and isinstance(
        out['action'][0], int)
Exemple #20
0
def test_vec_clip_action():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'MountainCarContinuous-v0',
                       2, 0)
    clipped_env = VecClipAction(env)

    action = [[0.5], [1000]]

    env.reset()
    _, rewards, _, _ = env.step(action)

    clipped_env.reset()
    _, rewards_clipped, _, _ = clipped_env.step(action)

    assert rewards[0] == rewards_clipped[0]
    assert abs(rewards[1]) > abs(rewards_clipped[1])
Exemple #21
0
def test_get_wrapper(env_id):
    env = make_vec_env(SerialVecEnv, make_gym_env, env_id, 3, 0)
    env = VecStandardize(env)
    env = VecClipAction(env)

    out = get_wrapper(env, 'VecClipAction')
    assert out.__class__.__name__ == 'VecClipAction'
    del out

    out = get_wrapper(env, 'VecStandardize')
    assert out.__class__.__name__ == 'VecStandardize'
    del out

    out = get_wrapper(env, 'SerialVecEnv')
    assert out.__class__.__name__ == 'SerialVecEnv'
Exemple #22
0
def test_td0_error_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchEpisode(env_spec)
    D.r[0] = [1, 2, 3]
    D.done[0] = [False, False, True]
    D.completed[0] = True

    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, False, False, False]
    D.completed[1] = False

    D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8]
    D.done[2] = [False, False, False, False, False, False, False, True]
    D.completed[2] = True

    all_Vs = [
        torch.tensor([[0.1], [0.5], [1.0]]),
        torch.tensor([[1.1], [1.5], [2.0]]),
        torch.tensor([[2.1], [2.5], [3.0]]),
        torch.tensor([[3.1], [3.5], [4.0]]),
        torch.tensor([[4.1], [4.5], [5.0]]),
        torch.tensor([[5.1], [5.5], [6.0]]),
        torch.tensor([[6.1], [6.5], [7.0]]),
        torch.tensor([[7.1], [7.5], [8.0]])
    ]
    last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1)

    all_Vs = torch.stack(all_Vs, 1)

    out = td0_error_from_episode(D, all_Vs, last_Vs, 1.0)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [2.0, 3, 0.9, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [2, 3, 4, 5, 20.5, 0, 0, 0])
    assert np.allclose(out[2], [2, 3, 4, 5, 6, 7, 8, 0])
    del out

    out = td0_error_from_episode(D, all_Vs, last_Vs, 0.1)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [1.01, 1.11, 0.9, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [0.65, 0.75, 0.85, 0.95, 2.5, 0, 0, 0])
    assert np.allclose(out[2], [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0])

    with pytest.raises(AssertionError):
        td0_error_from_segment(D, all_Vs, last_Vs, 0.1)
Exemple #23
0
 def init(self, seed, config):
     # Make environment
     # Remember to seed it in each working function !
     self.env = make_vec_env(vec_env_class=SerialVecEnv, 
                             make_env=make_gym_env, 
                             env_id=config['env.id'], 
                             num_env=1, 
                             init_seed=seed)
     self.env_spec = EnvSpec(self.env)
     
     # Make agent
     self.network = Network(config=config, env_spec=self.env_spec)
     if self.env_spec.control_type == 'Discrete':
         self.policy = CategoricalPolicy(config=config, network=self.network, env_spec=self.env_spec)
     elif self.env_spec.control_type == 'Continuous':
         self.policy = GaussianPolicy(config=config, network=self.network, env_spec=self.env_spec)
     self.agent = Agent(policy=self.policy, config=config)
Exemple #24
0
    def _prepare(self, config):
        self.env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'],
                                config['train.N'], 0)
        self.env = VecClipAction(self.env)
        if config['env.standardize']:
            self.env = VecStandardize(self.env,
                                      use_obs=True,
                                      use_reward=False,
                                      clip_obs=10.0,
                                      clip_reward=10.0,
                                      gamma=0.99,
                                      eps=1e-08)
        self.env_spec = EnvSpec(self.env)

        self.device = torch.device('cpu')

        self.agent = Agent(config, self.env_spec, self.device)
Exemple #25
0
def test_vec_monitor(env_id, vec_env_class, num_env, seed, runner_class):
    env = make_vec_env(vec_env_class, make_gym_env, env_id, num_env, seed)
    env_spec = EnvSpec(env)
    env = VecMonitor(env)
    agent = RandomAgent(None, env_spec)
    runner = runner_class(None, agent, env)
    D = runner(1050)

    for infos in D.infos:
        for info in infos:
            if 'terminal_observation' in info:
                assert 'episode' in info
                assert 'return' in info['episode'] and isinstance(
                    info['episode']['return'], np.float32)
                assert 'horizon' in info['episode'] and isinstance(
                    info['episode']['horizon'], np.int32)
                assert 'time' in info['episode'] and isinstance(
                    info['episode']['time'], float)
Exemple #26
0
def test_sticky_agent():
    sticky_action = 0
    
    env = make_gym_env('CartPole-v1', 0)
    env_spec = EnvSpec(env)
    agent = StickyAgent(None, env_spec, sticky_action)
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and isinstance(out['action'], int)
    assert out['action'] == sticky_action
    
    venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(venv)
    agent = StickyAgent(None, env_spec, sticky_action)
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and len(out['action']) == 3 and isinstance(out['action'][0], int)
    assert np.allclose(out['action'], [0, 0, 0])
Exemple #27
0
 def test_rolling(self, vec_env_class):
     venv = make_vec_env(vec_env_class,
                         make_gym_env,
                         'CartPole-v1',
                         5,
                         1,
                         rolling=False)
     venv.reset()
     for _ in range(100):
         observations, rewards, dones, infos = venv.step(
             [venv.action_space.sample()] * 5)
     assert all(
         [len(x) == 5 for x in [observations, rewards, dones, infos]])
     assert all(
         [x == [None] * 5 for x in [observations, rewards, dones, infos]])
     venv.reset()
     result = venv.step([venv.action_space.sample()] * 5)
     assert all([None not in result[i] for i in [1, 2, 3]])
Exemple #28
0
def test_rolling_segment_runner(vec_env, env_id):
    env = make_vec_env(vec_env, make_gym_env, env_id, 3, 0)
    env_spec = EnvSpec(env)

    if env_id == 'CartPole-v1':
        sticky_action = 1
    elif env_id == 'Pendulum-v0':
        sticky_action = [0.1]

    T = 30

    agent = StickyAgent(None, env_spec, sticky_action)
    runner = RollingSegmentRunner(None, agent, env)
    D = runner(T)

    assert D.N == 3
    assert D.T == T

    seeder = Seeder(0)
    seed1, seed2, seed3 = seeder(3)
    env1 = make_gym_env(env_id, seed1)
    env2 = make_gym_env(env_id, seed2)
    env3 = make_gym_env(env_id, seed3)

    for n, ev in enumerate([env1, env2, env3]):
        obs = ev.reset()
        assert np.allclose(obs, D.numpy_observations[n, 0, ...])
        for t in range(T):
            obs, reward, done, info = ev.step(sticky_action)
            if done:
                info['terminal_observation'] = obs
                obs = ev.reset()

            assert np.allclose(obs, D.numpy_observations[n, t + 1, ...])
            assert np.allclose(sticky_action, D.numpy_actions[n, t, ...])
            assert np.allclose(reward, D.numpy_rewards[n, t])
            assert done == D.numpy_dones[n, t]
            assert int(not done) == D.numpy_masks[n, t]

            if done:
                assert np.allclose(info['terminal_observation'],
                                   D.infos[n][t]['terminal_observation'])
Exemple #29
0
    def test_vec_env(self, vec_env_class):
        # unpack class
        v_id, vec_env_class = vec_env_class

        venv = make_vec_env(vec_env_class, make_gym_env, 'CartPole-v1', 5, 1,
                            True)
        assert isinstance(venv, VecEnv)
        assert v_id in [0, 1]
        if v_id == 0:
            isinstance(venv, SerialVecEnv)
        elif v_id == 1:
            assert isinstance(venv, ParallelVecEnv)

        assert venv.num_env == 5
        assert not venv.closed and venv.viewer is None
        assert venv.unwrapped is venv
        assert isinstance(venv.observation_space, Box)
        assert isinstance(venv.action_space, Discrete)
        assert venv.T == 500
        assert venv.max_episode_reward == 475.0
        assert venv.reward_range == (-float('inf'), float('inf'))
        obs = venv.reset()
        assert len(obs) == 5
        assert np.asarray(obs).shape == (5, 4)
        assert all([not np.allclose(obs[0], obs[i]) for i in [1, 2, 3, 4]])
        a = [1] * 5
        obs, rewards, dones, infos = venv.step(a)
        assert all([len(item) == 5 for item in [obs, rewards, dones, infos]])
        assert all([not np.allclose(obs[0], obs[i]) for i in [1, 2, 3, 4]])

        # EnvSpec
        env_spec = EnvSpec(venv)
        assert isinstance(env_spec.action_space, Discrete)
        assert isinstance(env_spec.observation_space, Box)
        assert env_spec.control_type == 'Discrete'
        assert env_spec.T == 500
        assert env_spec.max_episode_reward == 475.0
        assert env_spec.reward_range == (-float('inf'), float('inf'))
        assert env_spec.is_vec_env

        venv.close()
        assert venv.closed
Exemple #30
0
def make_env(config, seed, mode):
    assert mode in ['train', 'eval']

    def _make_env():
        env = gym.make(config['env.id'])
        if config['env.clip_action'] and isinstance(env.action_space, Box):
            env = ClipAction(env)
        return env

    env = make_vec_env(_make_env, 1, seed)  # single environment
    env = VecMonitor(env)
    if mode == 'train':
        if config['env.standardize_obs']:
            env = VecStandardizeObservation(env, clip=5.)
        if config['env.standardize_reward']:
            env = VecStandardizeReward(env,
                                       clip=10.,
                                       gamma=config['agent.gamma'])
        env = VecStepInfo(env)
    return env