def _make_warmstart_cartpole(): """Warm-start VecNormalize by stepping through CartPole""" venv = DummyVecEnv([lambda: gym.make("CartPole-v1")]) venv = VecNormalize(venv) venv.reset() venv.get_original_obs() for _ in range(100): actions = [venv.action_space.sample()] venv.step(actions) return venv
def test_sync_vec_normalize(): env = DummyVecEnv([make_env]) assert unwrap_vec_normalize(env) is None env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.) assert isinstance(unwrap_vec_normalize(env), VecNormalize) env = VecFrameStack(env, 1) assert isinstance(unwrap_vec_normalize(env), VecNormalize) eval_env = DummyVecEnv([make_env]) eval_env = VecNormalize(eval_env, training=False, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.) eval_env = VecFrameStack(eval_env, 1) env.reset() # Initialize running mean for _ in range(100): env.step([env.action_space.sample()]) obs = env.reset() original_obs = env.get_original_obs() # Normalization must be different assert not np.allclose(obs, eval_env.normalize_obs(original_obs)) sync_envs_normalization(env, eval_env) # Now they must be synced assert np.allclose(obs, eval_env.normalize_obs(original_obs))