def test_nested_ob(self): """Test.""" env = make_env('CartPole-v1') qf = QFunction( FeedForwardBase(env.observation_space, env.action_space)) env = NestedVecObWrapper(env) buffer = ReplayBuffer(2000, 1) data_manager = ReplayBufferDataManager(buffer, env, act_fn=BufferActor(qf), device='cpu', learning_starts=50, update_period=2) for _ in range(11): data_manager.step_until_update() assert buffer.num_in_buffer == 70 batch = data_manager.sample(32) data_manager.act(batch['obs']) assert batch['action'].shape == batch['reward'].shape assert batch['action'].shape == batch['done'].shape if isinstance(batch['obs'], list): assert batch['obs'][0].shape == batch['next_obs'][0].shape assert len(batch['obs'][0].shape) == 2 else: assert batch['obs'].shape == batch['next_obs'].shape assert len(batch['obs'].shape) == 2 assert len(batch['action'].shape) == 1
def qf_fn(env): """Create a qfunction.""" return QFunction(QFBase(env.observation_space, env.action_space))
def nature_dqn_fn(env): """Create nature dqn qfunction.""" return QFunction(NatureDQN(env.observation_space, env.action_space))
def qf_fn(env): """Create a qfunction network.""" return QFunction( AppendActionFeedForwardQFBase(env.observation_space, env.action_space))
def qf_fn_discrete(env): """Create a qfunction network.""" return QFunction(FeedForwardQFBase(env.observation_space, env.action_space))
def qf_fn(env): return QFunction( NatureDQN(env.observation_space, env.action_space))