def test_nested_ob(self):
            """Test."""
            env = make_env('CartPole-v1')
            qf = QFunction(
                FeedForwardBase(env.observation_space, env.action_space))
            env = NestedVecObWrapper(env)
            buffer = ReplayBuffer(2000, 1)
            data_manager = ReplayBufferDataManager(buffer,
                                                   env,
                                                   act_fn=BufferActor(qf),
                                                   device='cpu',
                                                   learning_starts=50,
                                                   update_period=2)

            for _ in range(11):
                data_manager.step_until_update()
            assert buffer.num_in_buffer == 70

            batch = data_manager.sample(32)
            data_manager.act(batch['obs'])
            assert batch['action'].shape == batch['reward'].shape
            assert batch['action'].shape == batch['done'].shape
            if isinstance(batch['obs'], list):
                assert batch['obs'][0].shape == batch['next_obs'][0].shape
                assert len(batch['obs'][0].shape) == 2
            else:
                assert batch['obs'].shape == batch['next_obs'].shape
                assert len(batch['obs'].shape) == 2
            assert len(batch['action'].shape) == 1
Beispiel #2
0
 def qf_fn(env):
     """Create a qfunction."""
     return QFunction(QFBase(env.observation_space, env.action_space))
Beispiel #3
0
def nature_dqn_fn(env):
    """Create nature dqn qfunction."""
    return QFunction(NatureDQN(env.observation_space, env.action_space))
Beispiel #4
0
def qf_fn(env):
    """Create a qfunction network."""
    return QFunction(
        AppendActionFeedForwardQFBase(env.observation_space, env.action_space))
Beispiel #5
0
def qf_fn_discrete(env):
    """Create a qfunction network."""
    return QFunction(FeedForwardQFBase(env.observation_space,
                                       env.action_space))
Beispiel #6
0
 def qf_fn(env):
     return QFunction(
         NatureDQN(env.observation_space, env.action_space))