Ejemplo n.º 1
0
def test_frame_stack():
    base_obs_space = {
        "a_{}".format(idx): Box(low=np.float32(0.),
                                high=np.float32(10.),
                                shape=[2, 3])
        for idx in range(2)
    }
    base_obs = {
        "a_{}".format(idx): np.zeros([2, 3]) + np.arange(3) + idx
        for idx in range(2)
    }
    base_env = DummyEnv(base_obs, base_obs_space, base_act_spaces)
    env = frame_stack(base_env, 4)
    obs = env.reset()
    assert obs.shape == (2, 3, 4)
    first_obs = env.step(2)
    assert np.all(np.equal(first_obs[:, :, -1], base_obs["a_1"]))
    assert np.all(np.equal(first_obs[:, :, :-1], 0))

    base_obs = {"a_{}".format(idx): idx + 3 for idx in range(2)}
    base_env = DummyEnv(base_obs, base_act_spaces, base_act_spaces)
    env = frame_stack(base_env, 4)
    obs = env.reset()
    assert env.observation_spaces[env.agent_selection].n == 5**4
    first_obs = env.step(2)
    assert first_obs == 4
    second_obs = env.step(2)
    assert second_obs == 3 + 3 * 5
    for x in range(100):
        nth_obs = env.step(2)
    assert nth_obs == ((3 * 5 + 3) * 5 + 3) * 5 + 3
Ejemplo n.º 2
0
 def env_creator(args):
     env = game_env.env(obs_type='grayscale_image')
     #env = clip_reward(env, lower_bound=-1, upper_bound=1)
     #env = sticky_actions(env, repeat_action_probability=0.25)
     env = resize(env, 84, 84)
     #env = color_reduction(env, mode='full')
     env = frame_stack(env, 4)
     env = agent_indicator(env, type_only=False)
     #env = frame_skip(env, 4)
     return env
Ejemplo n.º 3
0
def test_basic_wrappers():
    wrappers = [
        aec_wrappers.color_reduction(new_dummy(), "R"),
        aec_wrappers.down_scale(new_dummy(), x_scale=5, y_scale=10),
        aec_wrappers.dtype(new_dummy(), np.int32),
        aec_wrappers.flatten(new_dummy()),
        aec_wrappers.reshape(new_dummy(), (64, 3)),
        aec_wrappers.normalize_obs(new_dummy(), env_min=-1, env_max=5.),
        aec_wrappers.frame_stack(new_dummy(), 8),
        aec_wrappers.pad_observations(new_dummy()),
    ]
    for env in wrappers:
        obs = env.reset()
        first_obs = env.observe("a_1")
Ejemplo n.º 4
0
                                shape=[8, 8, 3])
        for idx in range(2)
    }
    base_act_spaces = {"a_{}".format(idx): Discrete(5) for idx in range(2)}

    return DummyEnv(base_obs, base_obs_space, base_act_spaces)


wrappers = [
    aec_wrappers.color_reduction(new_dummy(), "R"),
    aec_wrappers.down_scale(new_dummy(), x_scale=5, y_scale=10),
    aec_wrappers.dtype(new_dummy(), np.int32),
    aec_wrappers.flatten(new_dummy()),
    aec_wrappers.reshape(new_dummy(), (64, 3)),
    aec_wrappers.normalize_obs(new_dummy(), env_min=-1, env_max=5.),
    aec_wrappers.frame_stack(new_dummy(), 8),
    aec_wrappers.pad_observations(new_dummy()),
    aec_wrappers.pad_action_space(new_dummy()),
    aec_wrappers.continuous_actions(new_dummy()),
    aec_wrappers.agent_indicator(new_dummy(), True),
    aec_wrappers.agent_indicator(new_dummy(), False),
]


@pytest.mark.parametrize("env", wrappers)
def test_basic_wrappers(env):
    obs = env.reset()
    act_space = env.action_spaces[env.agent_selection]
    obs_space = env.observation_spaces[env.agent_selection]
    first_obs = env.observe("a_0")
    print(obs_space.low.shape)