Example #1
0
def test_normalize_observation_vector_env():
    env_fns = [make_env(0), make_env(1)]
    envs = gym.vector.SyncVectorEnv(env_fns)
    envs.reset()
    obs, reward, _, _ = envs.step(envs.action_space.sample())
    np.testing.assert_almost_equal(obs, np.array([[1], [2]]), decimal=4)
    np.testing.assert_almost_equal(reward, np.array([1, 2]), decimal=4)

    env_fns = [make_env(0), make_env(1)]
    envs = gym.vector.SyncVectorEnv(env_fns)
    envs = NormalizeObservation(envs)
    envs.reset()
    assert_almost_equal(
        envs.obs_rms.mean,
        np.mean([0.5]),  # the mean of first observations [[0, 1]]
        decimal=4,
    )
    obs, reward, _, _ = envs.step(envs.action_space.sample())
    assert_almost_equal(
        envs.obs_rms.mean,
        np.mean([
            1.0
        ]),  # the mean of first and second observations [[0, 1], [1, 2]]
        decimal=4,
    )
Example #2
0
def test_normalize_observation(return_info: bool):
    env = DummyRewardEnv(return_reward_idx=0)
    env = NormalizeObservation(env)
    env.reset(return_info=return_info)
    env.step(env.action_space.sample())
    assert_almost_equal(env.obs_rms.mean, 0.5, decimal=4)
    env.step(env.action_space.sample())
    assert_almost_equal(env.obs_rms.mean, 1.0, decimal=4)
Example #3
0
def test_normalize_reset_info():
    env = DummyRewardEnv(return_reward_idx=0)
    env = NormalizeObservation(env)
    obs = env.reset()
    assert isinstance(obs, np.ndarray)
    del obs
    obs = env.reset(return_info=False)
    assert isinstance(obs, np.ndarray)
    del obs
    obs, info = env.reset(return_info=True)
    assert isinstance(obs, np.ndarray)
    assert isinstance(info, dict)