def test_multi_add(self): nb = NstepBuffer({'obs': {}, 'done': {}}, {"size": 4}) self.assertIs(nb.add(obs=(1, 1), done=(0, 0)), None) np.testing.assert_allclose( nb.add(obs=(1, 1), done=(0, 0))['obs'], np.array((1), dtype=np.float32))
def test_gamma_multi_step(self): nb = NstepBuffer({'rew': {}, 'done': {}}, {"size": 4, "rew": "rew", "gamma": 0.5}) self.assertIs(nb.add(rew=(1,1),done=(0,0)),None) np.testing.assert_allclose(nb.add(rew=(1,1), done=(0,0))['discounts'], np.array((0.5*0.5*0.5), dtype=np.float32))
def test_next(self): nb = NstepBuffer({'next_obs': {}, 'done': {}},{"size": 4, "next": "next_obs"}) self.assertIs(nb.add(next_obs=1,done=0),None) self.assertIs(nb.add(next_obs=1,done=0),None) self.assertIs(nb.add(next_obs=1,done=0),None) for i in range(5): with self.subTest(i=i): np.testing.assert_allclose(nb.add(next_obs=(i),done=0)["next_obs"], np.array(i,dtype=np.float32).reshape(-1,1))
def test_nstep_buffer(self): buffer = NstepBuffer({"obs": {}, "rew": {}, "done": {}, "next_obs": {}}, Nstep={"size": 3, "rew": "rew", "next": "next_obs"}) obs = jnp.asarray([1]) rew = jnp.asarray([1]) done = jnp.asarray([1]) next_obs = jnp.asarray([1]) for i in range(4): with self.subTest(i=i): buffer.add(obs=obs, rew=rew, done=done, next_obs=next_obs)
def test_gamma(self): nb = NstepBuffer({'rew': {}, 'done': {}}, {"size": 4, "rew": "rew", "gamma": 0.5}) self.assertIs(nb.add(rew=1,done=0),None) self.assertIs(nb.add(rew=1,done=0),None) self.assertIs(nb.add(rew=1,done=0),None) for i in range(5): with self.subTest(i=i): np.testing.assert_allclose(nb.add(rew=1,done=0)["discounts"], 0.5*0.5*0.5)
def test_rew(self): nb = NstepBuffer({"rew": {}, "done": {}}, {"size": 4, "rew": "rew", "gamma": 0.5}) self.assertIs(nb.add(rew=1,done=0),None) self.assertIs(nb.add(rew=1,done=0),None) self.assertIs(nb.add(rew=1,done=0),None) for i in range(5): with self.subTest(i=i): np.testing.assert_allclose(nb.add(rew=1,done=0)["rew"], 1 + 0.5 + 0.5*0.5 + 0.5*0.5*0.5)
def test_gamma_multi_step(self): nb = NstepBuffer({ 'rew': {}, 'done': {} }, { "size": 4, "rew": "rew", "gamma": 0.5 }) self.assertIs(nb.add(rew=(1, 1), done=(0, 0)), None) np.testing.assert_allclose( nb.add(rew=(1, 1), done=(0, 0))['done'], np.asarray([[0]]))
def test_gamma_with_done(self): nb = NstepBuffer({"rew": {}, "done": {}}, {"size": 4, "rew": "rew", "gamma": 0.5}) self.assertIs(nb.add(rew=1,done=0),None) self.assertIs(nb.add(rew=1,done=1),None) self.assertIs(nb.add(rew=1,done=0),None) np.testing.assert_allclose(nb.add(rew=1,done=0)["discounts"], 0.5) np.testing.assert_allclose(nb.add(rew=1,done=0)["discounts"], 1) np.testing.assert_allclose(nb.add(rew=1,done=0)["discounts"], 0.5*0.5*0.5)
def test_gamma_large_step_add(self): nb = NstepBuffer({'rew': {}, 'done': {}}, {"size": 4, "rew": "rew", "gamma": 0.5}) np.testing.assert_allclose(nb.add(rew=(1,1,1,1,1), done=(0,0,0,0,0))['discounts'], np.array((0.5*0.5*0.5, 0.5*0.5*0.5), dtype=np.float32).reshape(-1,1))
def test_gamma_with_done(self): nb = NstepBuffer({ "rew": {}, "done": {} }, { "size": 4, "rew": "rew", "gamma": 0.5 }) self.assertIs(nb.add(rew=1, done=0), None) self.assertIs(nb.add(rew=1, done=1), None) self.assertIs(nb.add(rew=1, done=0), None) np.testing.assert_allclose( nb.add(rew=1, done=0)["done"], np.asarray([[1]])) np.testing.assert_allclose( nb.add(rew=1, done=0)["done"], np.asarray([[1]])) np.testing.assert_allclose( nb.add(rew=1, done=0)["done"], np.asarray([[0]]))
def test_gamma_large_step_add(self): nb = NstepBuffer({ 'rew': {}, 'done': {} }, { "size": 4, "rew": "rew", "gamma": 0.5 }) np.testing.assert_allclose( nb.add(rew=(1, 1, 1, 1, 1), done=(0, 0, 0, 0, 0))['done'], np.asarray([[0], [0]]))
def test_large_step_add(self): nb = NstepBuffer({'obs': {}, 'done': {}}, {"size": 4}) np.testing.assert_allclose( nb.add(obs=(1, 1, 1, 1, 1), done=(0, 0, 0, 0, 0))['obs'], np.array((1, 1), dtype=np.float32).reshape(-1, 1))