Esempio n. 1
0
def _make_warmstart_dict_env():
    """Warm-start VecNormalize by stepping through BitFlippingEnv"""
    venv = DummyVecEnv([make_dict_env])
    venv = VecNormalize(venv)
    venv.reset()
    venv.get_original_obs()

    for _ in range(100):
        actions = [venv.action_space.sample()]
        venv.step(actions)
    return venv
Esempio n. 2
0
def _make_warmstart_cartpole():
    """Warm-start VecNormalize by stepping through CartPole"""
    venv = DummyVecEnv([lambda: gym.make("CartPole-v1")])
    venv = VecNormalize(venv)
    venv.reset()
    venv.get_original_obs()

    for _ in range(100):
        actions = [venv.action_space.sample()]
        venv.step(actions)
    return venv
def _make_warmstart(env_fn, **kwargs):
    """Warm-start VecNormalize by stepping through 100 actions."""
    venv = DummyVecEnv([env_fn])
    venv = VecNormalize(venv, **kwargs)
    venv.reset()
    venv.get_original_obs()

    for _ in range(100):
        actions = [venv.action_space.sample()]
        venv.step(actions)
    return venv
Esempio n. 4
0
def test_sync_vec_normalize():
    env = DummyVecEnv([make_env])

    assert unwrap_vec_normalize(env) is None

    env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)

    assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    env = VecFrameStack(env, 1)

    assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    eval_env = DummyVecEnv([make_env])
    eval_env = VecNormalize(eval_env, training=False, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)
    eval_env = VecFrameStack(eval_env, 1)

    env.reset()
    # Initialize running mean
    for _ in range(100):
        env.step([env.action_space.sample()])

    obs = env.reset()
    original_obs = env.get_original_obs()
    dummy_rewards = np.random.rand(10)
    # Normalization must be different
    assert not np.allclose(obs, eval_env.normalize_obs(original_obs))

    sync_envs_normalization(env, eval_env)

    # Now they must be synced
    assert np.allclose(obs, eval_env.normalize_obs(original_obs))
    assert np.allclose(env.normalize_reward(dummy_rewards), eval_env.normalize_reward(dummy_rewards))
Esempio n. 5
0
def test_sync_vec_normalize(make_env):
    env = DummyVecEnv([make_env])

    assert unwrap_vec_normalize(env) is None

    env = VecNormalize(env,
                       norm_obs=True,
                       norm_reward=True,
                       clip_obs=100.0,
                       clip_reward=100.0)

    assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    if not isinstance(env.observation_space, spaces.Dict):
        env = VecFrameStack(env, 1)
        assert isinstance(unwrap_vec_normalize(env), VecNormalize)

    eval_env = DummyVecEnv([make_env])
    eval_env = VecNormalize(eval_env,
                            training=False,
                            norm_obs=True,
                            norm_reward=True,
                            clip_obs=100.0,
                            clip_reward=100.0)

    if not isinstance(env.observation_space, spaces.Dict):
        eval_env = VecFrameStack(eval_env, 1)

    env.seed(0)
    env.action_space.seed(0)

    env.reset()
    # Initialize running mean
    latest_reward = None
    for _ in range(100):
        _, latest_reward, _, _ = env.step([env.action_space.sample()])

    # Check that unnormalized reward is same as original reward
    original_latest_reward = env.get_original_reward()
    assert np.allclose(original_latest_reward,
                       env.unnormalize_reward(latest_reward))

    obs = env.reset()
    dummy_rewards = np.random.rand(10)
    original_obs = env.get_original_obs()
    # Check that unnormalization works
    assert allclose(original_obs, env.unnormalize_obs(obs))
    # Normalization must be different (between different environments)
    assert not allclose(obs, eval_env.normalize_obs(original_obs))

    # Test syncing of parameters
    sync_envs_normalization(env, eval_env)
    # Now they must be synced
    assert allclose(obs, eval_env.normalize_obs(original_obs))
    assert allclose(env.normalize_reward(dummy_rewards),
                    eval_env.normalize_reward(dummy_rewards))
def make_env(env_id,
             rank=0,
             seed=0,
             log_dir=None,
             wrapper_class=None,
             env_kwargs=None):
    if log_dir is not None:
        os.makedirs(log_dir, exist_ok=True)
    if env_kwargs is None:
        env_kwargs = {}

    def _init():
        env = gym.make(env_id, **env_kwargs)
        env.seed(seed * 128 + rank)
        return env

    return _init


env = DummyVecEnv([make_env("Walker2DBulletEnv-v0", 0, 0, log_dir="tmp")])
env = VecNormalize.load("tmp/vecnormalize1.pkl", env)
env.training = False
print(dir(env))
env.reset()
for i in range(10):
    env.step([env.action_space.sample()])

obs = env.step([env.action_space.sample()])
original_obs = env.get_original_obs()
print(obs)
print(original_obs)