Beispiel #1
0
def test_gray_scale_observation(env_id, keep_dim):
    gray_env = AtariPreprocessing(gym.make(env_id),
                                  screen_size=84,
                                  grayscale_obs=True)
    rgb_env = AtariPreprocessing(gym.make(env_id),
                                 screen_size=84,
                                 grayscale_obs=False)
    wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
    assert rgb_env.observation_space.shape[-1] == 3

    seed = 0
    gray_env.seed(seed)
    wrapped_env.seed(seed)

    gray_obs = gray_env.reset()
    wrapped_obs = wrapped_env.reset()

    if keep_dim:
        assert wrapped_env.observation_space.shape[-1] == 1
        assert len(wrapped_obs.shape) == 3
        wrapped_obs = wrapped_obs.squeeze(-1)
    else:
        assert len(wrapped_env.observation_space.shape) == 2
        assert len(wrapped_obs.shape) == 2

    # ALE gray scale is slightly different, but no more than by one shade
    assert np.allclose(gray_obs.astype("int32"),
                       wrapped_obs.astype("int32"),
                       atol=1)
Beispiel #2
0
    if environment_used != "CartPole-v1":
        env = GrayScaleObservation(env)
        env = ResizeObservation(env, resolution)
        env = FrameStack(env, 4)
        agent = VizdoomAgent(env.action_space, resolution , eta, test_mode, environment_used)
    else:
        agent = RandomAgent(env.action_space, env.observation_space, eta, test_mode, environment_used)

    # You provide the directory to write to (can be an existing
    # directory, including one with existing data -- all monitor files
    # will be namespaced). You can also dump to a tempdir if you'd
    # like: tempfile.mkdtemp().
    outdir = '/tmp/random-agent-results'
    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)

    mem = Memory()

    episode_count = 50
    reward = 0
    done = False

    rewardPerEp = numpy.array([])
    ep = numpy.array([])
    interaction = numpy.array([])

    for i in range(episode_count):
        env.reset()
        actual_state, reward, done, info = env.step(env.action_space.sample())