def test_normalize_observation_vector_env(): env_fns = [make_env(0), make_env(1)] envs = gym.vector.SyncVectorEnv(env_fns) envs.reset() obs, reward, _, _ = envs.step(envs.action_space.sample()) np.testing.assert_almost_equal(obs, np.array([[1], [2]]), decimal=4) np.testing.assert_almost_equal(reward, np.array([1, 2]), decimal=4) env_fns = [make_env(0), make_env(1)] envs = gym.vector.SyncVectorEnv(env_fns) envs = NormalizeObservation(envs) envs.reset() assert_almost_equal( envs.obs_rms.mean, np.mean([0.5]), # the mean of first observations [[0, 1]] decimal=4, ) obs, reward, _, _ = envs.step(envs.action_space.sample()) assert_almost_equal( envs.obs_rms.mean, np.mean([ 1.0 ]), # the mean of first and second observations [[0, 1], [1, 2]] decimal=4, )
def test_normalize_observation(return_info: bool): env = DummyRewardEnv(return_reward_idx=0) env = NormalizeObservation(env) env.reset(return_info=return_info) env.step(env.action_space.sample()) assert_almost_equal(env.obs_rms.mean, 0.5, decimal=4) env.step(env.action_space.sample()) assert_almost_equal(env.obs_rms.mean, 1.0, decimal=4)
def test_normalize_reset_info(): env = DummyRewardEnv(return_reward_idx=0) env = NormalizeObservation(env) obs = env.reset() assert isinstance(obs, np.ndarray) del obs obs = env.reset(return_info=False) assert isinstance(obs, np.ndarray) del obs obs, info = env.reset(return_info=True) assert isinstance(obs, np.ndarray) assert isinstance(info, dict)