def test_len_replay_buffer_no_trajectory(): capacity = 10 buffer = replay_buffer.ReplayBuffer(capacity, (2,), (1,)) assert len(buffer) == 0 for i in range(15): buffer.add(np.zeros(2), np.zeros(1), np.zeros(2), 0, False) if i < capacity: assert len(buffer) == i + 1 else: assert len(buffer) == capacity
def test_get_all(): capacity = 20 buffer = replay_buffer.ReplayBuffer(capacity, (1, ), (1, )) dummy = np.ones(1) for i in range(capacity): buffer.add(dummy, dummy, dummy, i, False) assert np.allclose(buffer.get_all().rewards, np.arange(i + 1)) buffer.add(dummy, dummy, dummy, -1, False) assert np.allclose(buffer.get_all().rewards, np.array([-1] + list(range(1, capacity))))
def test_buffer_close_trajectory_not_done(): capacity = 10 dummy = np.zeros(1) buffer = replay_buffer.ReplayBuffer(capacity, (1,), (1,), max_trajectory_length=5) for i in range(3): buffer.add(dummy, dummy, dummy, i, False) buffer.close_trajectory() for i in range(3, 8): buffer.add(dummy, dummy, dummy, i, i == 7) assert buffer.trajectory_indices == [(0, 3), (3, 8)] assert np.allclose(buffer.reward[:8], np.arange(8))
def test_get_iterators(): buffer = replay_buffer.ReplayBuffer(1000, (1,), (1,)) dummy = np.ones(1) for i in range(900): buffer.add(dummy, dummy, dummy, i, False) train_iter, val_iter = buffer.get_iterators(32, 0.1) assert train_iter.num_stored == 810 and val_iter.num_stored == 90 all_rewards = [] for it in [train_iter, val_iter]: for batch in it: _, _, _, reward, _ = batch.astuple() all_rewards.extend(reward) assert sorted(all_rewards) == list(range(900))
def test_sample_trajectories(): buffer = replay_buffer.ReplayBuffer(15, (1,), (1,), max_trajectory_length=10) dummy = np.zeros(1) for i in range(7): buffer.add(dummy, dummy, dummy, i, i == 6) for i in range(10): buffer.add(dummy, dummy, dummy, 100 + i, i == 9) for _ in range(100): o, a, no, r, d = buffer.sample_trajectory().astuple() assert len(o) == 7 or len(o) == 10 assert d.sum() == 1 and d[-1] if len(o) == 7: assert r.sum() == 21 else: assert r.sum() == 1045
def test_buffer_with_trajectory_len_and_loop_behavior(): capacity = 10 buffer = replay_buffer.ReplayBuffer(capacity, (2, ), (1, ), max_trajectory_length=5) assert len(buffer) == 0 dones = [4, 7, 12] # check that dones before capacity don't do anything weird for how_many in range(1, 15): done = how_many in dones buffer.add(np.zeros(2), np.zeros(1), np.zeros(2), how_many, done) if how_many < dones[-1]: assert len(buffer) == how_many else: assert len(buffer) == dones[-1] # Buffer should have reset and added elements 13 and 14 assert buffer.cur_idx == 2 assert buffer.reward[0] == 13 assert buffer.reward[1] == 14 # now we'll add longer trajectory at the end, num_stored should increase old_size = len(buffer) dones[-1] = 14 number_after_done = 3 for how_many in range(buffer.cur_idx + 1, dones[-1] + number_after_done + 1): done = how_many in dones buffer.add(np.zeros(2), np.zeros(1), np.zeros(2), 100 + how_many, done) if how_many <= old_size: assert len(buffer) == old_size else: assert len(buffer) == min(how_many, dones[-1]) assert buffer.cur_idx == number_after_done # now we'll add a shorter trajectory at the end, num_stored should not change old_size = len(buffer) dones[-1] = 10 number_after_done = 5 for how_many in range(buffer.cur_idx + 1, dones[-1] + number_after_done + 1): done = how_many in dones buffer.add(np.zeros(2), np.zeros(1), np.zeros(2), how_many, done) assert len(buffer) == old_size assert buffer.cur_idx == number_after_done assert np.all(buffer.reward[:14].astype(int) == np.array( [11, 12, 13, 14, 15, 6, 7, 8, 9, 10, 111, 112, 113, 114], dtype=int))
def test_trajectory_contents(): buffer = replay_buffer.ReplayBuffer(20, (1, ), (1, ), max_trajectory_length=10) dummy = np.zeros(1) traj_lens = [4, 10, 1, 7, 8, 1, 4, 7, 5] trajectories = [ (0, 4), (4, 14), (14, 15), (15, 22), (0, 8), (8, 9), (9, 13), (13, 20), (0, 5), ] def _check_buffer_trajectories_coherence(): for traj in buffer.trajectory_indices: for v, idx in enumerate(range(traj[0], traj[1])): assert buffer.reward[idx] == v for tr_idx, l in enumerate(traj_lens): for i in range(l): buffer.add(dummy, dummy, dummy, i, i == l - 1) if tr_idx < 4: # here trajectories should just get appended assert buffer.trajectory_indices == trajectories[:tr_idx + 1] elif tr_idx in [4, 5, 6]: # the next few trajectories should remove (0, 4) and (4, 14) assert buffer.trajectory_indices == trajectories[2:tr_idx + 1] elif tr_idx == 7: # the penultimate trajectory should remove everything up to (0, 8) assert buffer.trajectory_indices == trajectories[4:tr_idx + 1] else: # the last trajectory should remove (0, 8) # (just checking that ending at exactly capacity works well) assert buffer.trajectory_indices == trajectories[5:] _check_buffer_trajectories_coherence()