def test_default_dtype(self): buffer_size = 256 rb = ReplayBuffer(buffer_size,{"done": {}}, default_dtype = np.float32) rb.add(done=1) self.assertEqual(rb.sample(1)["done"][0].dtype,np.float32)
def test_multi_processing2(self): buffer_size = 256 rb = ReplayBuffer(buffer_size,{"done": {}}) self.assertEqual(rb.get_next_index(),0) self.assertEqual(rb.get_stored_size(),0) p = Process(target=add,args=[rb]) q = Process(target=add,args=[rb]) p.start() q.start() p.join() q.join() self.assertEqual(rb.get_next_index() ,200) self.assertEqual(rb.get_stored_size(),200)
def test_dtypes(self): buffer_size = 1 for d in [ np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc, np.int_, np.uint, np.longlong, np.ulonglong, np.half, np.double, np.longdouble, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.intp, np.float16, np.float32, np.float64 ]: if np.dtype(d).itemsize > 8: # 128bit is not supported continue with self.subTest(dtype=d): rb = MPReplayBuffer(buffer_size, {"a": {"dtype": d}}) rb.add(a=np.zeros((1, ), dtype=d)) a = rb.get_all_transitions()["a"].ravel() self.assertEqual(a.dtype, d) np.testing.assert_allclose(a, np.zeros((1, ), dtype=d))
def test_multi_processing(self): buffer_size = 256 rb = ReplayBuffer(buffer_size,{"obs": {"dtype": int}}) self.assertEqual(rb.get_next_index(),0) self.assertEqual(rb.get_stored_size(),0) p = Process(target=add_args,args=[rb, [{"obs": i} for i in range(100)]]) p.start() p.join() self.assertEqual(rb.get_next_index(),100) self.assertEqual(rb.get_stored_size(),100) s = rb.get_all_transitions() np.testing.assert_allclose(s["obs"].ravel(),np.arange(100,dtype=int))
def test_np_float16(self): buffer_size = 256 rb = MPReplayBuffer(buffer_size, {"done": {"dtype": np.float16}}) self.assertEqual(rb.get_next_index(), 0) self.assertEqual(rb.get_stored_size(), 0) p = Process(target=add_args, args=[rb, [{ "done": i } for i in range(100)]]) p.start() p.join() self.assertEqual(rb.get_next_index(), 100) self.assertEqual(rb.get_stored_size(), 100) done = rb.get_all_transitions()["done"] np.testing.assert_allclose(done.ravel(), np.arange(100, dtype=np.float16)) self.assertEqual(done.dtype, np.float16)
def test_add(self): buffer_size = 256 obs_shape = (15,15) act_dim = 5 rb = ReplayBuffer(buffer_size,{"obs":{"shape": obs_shape}, "act":{"shape": act_dim}, "rew":{}, "next_obs": {"shape": obs_shape}, "done": {}}) self.assertEqual(rb.get_next_index(),0) self.assertEqual(rb.get_stored_size(),0) obs = np.zeros(obs_shape) act = np.ones(act_dim) rew = 1 next_obs = obs + 1 done = 0 rb.add(obs=obs,act=act,rew=rew,next_obs=next_obs,done=done) self.assertEqual(rb.get_next_index(),1) self.assertEqual(rb.get_stored_size(),1) with self.assertRaises(KeyError): rb.add(obs=obs) self.assertEqual(rb.get_next_index(),1) self.assertEqual(rb.get_stored_size(),1) obs = np.stack((obs,obs)) act = np.stack((act,act)) rew = (1,0) next_obs = np.stack((next_obs,next_obs)) done = (0.0,1.0) rb.add(obs=obs,act=act,rew=rew,next_obs=next_obs,done=done) self.assertEqual(rb.get_next_index(),3) self.assertEqual(rb.get_stored_size(),3)
def test_buffer(self): buffer_size = 256 obs_shape = (15,15) act_dim = 5 N = 512 erb = ReplayBuffer(buffer_size,{"obs":{"shape": obs_shape}, "act":{"shape": act_dim}, "rew":{}, "next_obs":{"shape": obs_shape}, "done":{}}) for i in range(N): obs = np.full(obs_shape,i,dtype=np.double) act = np.full(act_dim,i,dtype=np.double) rew = i next_obs = obs + 1 done = 0 erb.add(obs=obs,act=act,rew=rew,next_obs=next_obs,done=done) erb._encode_sample(range(buffer_size)) erb.sample(32) erb.clear() self.assertEqual(erb.get_next_index(),0) self.assertEqual(erb.get_stored_size(),0)