def test_per_nstep(self): """ PrioritizedReplayBuffer.on_episode_end() ignores Exception Ref: https://gitlab.com/ymd_h/cpprb/-/issues/111 """ rb = PrioritizedReplayBuffer(32, { "rew": {}, "done": {} }, Nstep={ "size": 4, "rew": "rew", "gamma": 0.5 }) for _ in range(10): rb.add(rew=0.5, done=0.0) rb.add(rew=0.5, done=1.0) rb.on_episode_end() s = rb.sample(16) self.assertIn("discounts", s)
def test_save_cache_with_stack_compress(self): rb = PrioritizedReplayBuffer(32, env_dict={ 'done': { 'dtype': 'bool' }, 'a': { 'shape': (3) } }, stack_compress='a') a = np.array([0, 1, 2]) for i in range(3): done = i == 2 rb.add(a=a, done=done) if done: rb.on_episode_end() a += 1 rb.add(a=np.ones(3), done=False) a_ = rb.get_all_transitions()["a"] np.testing.assert_allclose( a_, np.asarray([[0., 1., 2.], [1., 2., 3.], [2., 3., 4.], [1., 1., 1.]]))
observation = env.reset() # Warming up for n_step in range(100): action = env.action_space.sample() # Random Action next_observation, reward, done, info = env.step(action) rb.add(obs=observation, act=action, rew=reward, next_obs=next_observation, done=done) observation = next_observation if done: observation = env.reset() rb.on_episode_end() n_episode = 0 observation = env.reset() for n_step in range(N_iteration): if np.random.rand() < egreedy: action = env.action_space.sample() else: Q = tf.squeeze(model(observation.reshape(1, -1))) action = np.argmax(Q) next_observation, reward, done, info = env.step(action) rb.add(obs=observation, act=action, rew=reward,