def test_nested_ob(self): """Test.""" env = make_env('CartPole-v1') qf = QFunction( FeedForwardBase(env.observation_space, env.action_space)) env = NestedVecObWrapper(env) buffer = ReplayBuffer(2000, 1) data_manager = ReplayBufferDataManager(buffer, env, act_fn=BufferActor(qf), device='cpu', learning_starts=50, update_period=2) for _ in range(11): data_manager.step_until_update() assert buffer.num_in_buffer == 70 batch = data_manager.sample(32) data_manager.act(batch['obs']) assert batch['action'].shape == batch['reward'].shape assert batch['action'].shape == batch['done'].shape if isinstance(batch['obs'], list): assert batch['obs'][0].shape == batch['next_obs'][0].shape assert len(batch['obs'][0].shape) == 2 else: assert batch['obs'].shape == batch['next_obs'].shape assert len(batch['obs'].shape) == 2 assert len(batch['action'].shape) == 1
def test_nested_observations(self): """Test nested observations.""" logger.configure('./.test') env = make_env('CartPole-v1', 1) env = NestedVecObWrapper(env) env = NestedVecObWrapper(env) env = VecObsNormWrapper(env, log_prob=1.) print(env.observation_space) env.reset() assert env.t == 0 for _ in range(100): _, _, done, _ = env.step( np.array([env.action_space.sample() for _ in range(1)])) if done: env.reset() assert env.t == 100 state = env.state_dict() assert state['t'] == env.t state['t'] = 0 env.load_state_dict(state) assert env.t == 0 env.eval() env.reset() for _ in range(3): env.step(np.array([env.action_space.sample()])) assert env.t == 0 env.train() for _ in range(3): env.step(np.array([env.action_space.sample()])) assert env.t == 3 print(env.mean) print(env.std) shutil.rmtree('./.test')
def test_vec(self): """Test vec wrapper.""" logger.configure('./.test') nenv = 10 env = make_env('CartPole-v1', nenv) env = VecObsNormWrapper(env, log_prob=1.) print(env.observation_space) env.reset() assert env.t == 0 for _ in range(5): env.step( np.array([env.action_space.sample() for _ in range(nenv)])) state = env.state_dict() assert state['t'] == env.t assert np.allclose(state['mean'], env.mean) assert np.allclose(state['std'], env.std) state['t'] = 0 env.load_state_dict(state) assert env.t == 0 env.eval() env.reset() for _ in range(10): env.step( np.array([env.action_space.sample() for _ in range(nenv)])) assert env.t == 0 env.train() print(env.mean) print(env.std) shutil.rmtree('./.test')
def test_rnd_env(self): """Test vec frame stack wrapper.""" nenv = 2 env = make_env('LunarLander-v2', nenv=nenv) rnd = RND(RNDNet, torch.optim.Adam, 0.99, env.observation_space.shape, 'cpu') emb = InverseDynamicsEmbedding(env, EmbeddingNet, PredictionNet, Loss, torch.optim.Adam, 'cpu') ngu = NGU(rnd, emb, 50, 'cpu') env = NGUVecEnv(env, ngu) env.reset() _, r, _, _ = env.step( np.array([env.action_space.sample() for _ in range(nenv)])) assert r.shape == (nenv, 2) for _ in range(1000): _, r, done, _ = env.step( np.array([env.action_space.sample() for _ in range(nenv)])) assert r.shape == (nenv, 2) if np.any(done): env.reset(force=False)
def test_vec(self): """Test vec wrapper.""" nenv = 10 env = make_env('CartPole-v1', nenv) env = VecRewardNormWrapper(env, gamma=0.99) env.reset() for _ in range(5): out = env.step( np.array([env.action_space.sample() for _ in range(nenv)])) print(out[1]) c = env.rn.count print(c) assert c == 5 * nenv state = env.state_dict() env.load_state_dict(state) assert c == env.rn.count env.eval() env.reset() for _ in range(10): env.step( np.array([env.action_space.sample() for _ in range(nenv)])) env.train() assert c == env.rn.count
def env_continuous(nenv): """Create continuous env.""" return make_env('LunarLanderContinuous-v2', nenv=nenv)
def env_discrete(nenv): """Create discrete env.""" return make_env('CartPole-v1', nenv=nenv)
def env_fn(nenv): """Environment function.""" return make_env('LunarLanderContinuous-v2', nenv=nenv)
def env_fn(nenv): return make_env('LunarLanderContinuous-v2', nenv)
def _env(nenv): """Create a training environment.""" return make_env("LunarLander-v2", nenv)