예제 #1
0
def test_len_replay_buffer_no_trajectory():
    capacity = 10
    buffer = replay_buffer.ReplayBuffer(capacity, (2,), (1,))
    assert len(buffer) == 0
    for i in range(15):
        buffer.add(np.zeros(2), np.zeros(1), np.zeros(2), 0, False)
        if i < capacity:
            assert len(buffer) == i + 1
        else:
            assert len(buffer) == capacity
예제 #2
0
def test_get_all():
    capacity = 20
    buffer = replay_buffer.ReplayBuffer(capacity, (1, ), (1, ))
    dummy = np.ones(1)
    for i in range(capacity):
        buffer.add(dummy, dummy, dummy, i, False)
        assert np.allclose(buffer.get_all().rewards, np.arange(i + 1))
    buffer.add(dummy, dummy, dummy, -1, False)
    assert np.allclose(buffer.get_all().rewards,
                       np.array([-1] + list(range(1, capacity))))
예제 #3
0
def test_buffer_close_trajectory_not_done():
    capacity = 10
    dummy = np.zeros(1)
    buffer = replay_buffer.ReplayBuffer(capacity, (1,), (1,), max_trajectory_length=5)
    for i in range(3):
        buffer.add(dummy, dummy, dummy, i, False)
    buffer.close_trajectory()

    for i in range(3, 8):
        buffer.add(dummy, dummy, dummy, i, i == 7)

    assert buffer.trajectory_indices == [(0, 3), (3, 8)]
    assert np.allclose(buffer.reward[:8], np.arange(8))
예제 #4
0
def test_get_iterators():
    buffer = replay_buffer.ReplayBuffer(1000, (1,), (1,))
    dummy = np.ones(1)
    for i in range(900):
        buffer.add(dummy, dummy, dummy, i, False)

    train_iter, val_iter = buffer.get_iterators(32, 0.1)
    assert train_iter.num_stored == 810 and val_iter.num_stored == 90
    all_rewards = []
    for it in [train_iter, val_iter]:
        for batch in it:
            _, _, _, reward, _ = batch.astuple()
            all_rewards.extend(reward)
    assert sorted(all_rewards) == list(range(900))
예제 #5
0
def test_sample_trajectories():
    buffer = replay_buffer.ReplayBuffer(15, (1,), (1,), max_trajectory_length=10)
    dummy = np.zeros(1)

    for i in range(7):
        buffer.add(dummy, dummy, dummy, i, i == 6)
    for i in range(10):
        buffer.add(dummy, dummy, dummy, 100 + i, i == 9)

    for _ in range(100):
        o, a, no, r, d = buffer.sample_trajectory().astuple()
        assert len(o) == 7 or len(o) == 10
        assert d.sum() == 1 and d[-1]
        if len(o) == 7:
            assert r.sum() == 21
        else:
            assert r.sum() == 1045
예제 #6
0
def test_buffer_with_trajectory_len_and_loop_behavior():
    capacity = 10
    buffer = replay_buffer.ReplayBuffer(capacity, (2, ), (1, ),
                                        max_trajectory_length=5)
    assert len(buffer) == 0
    dones = [4, 7,
             12]  # check that dones before capacity don't do anything weird
    for how_many in range(1, 15):
        done = how_many in dones
        buffer.add(np.zeros(2), np.zeros(1), np.zeros(2), how_many, done)
        if how_many < dones[-1]:
            assert len(buffer) == how_many
        else:
            assert len(buffer) == dones[-1]
    # Buffer should have reset and added elements 13 and 14
    assert buffer.cur_idx == 2
    assert buffer.reward[0] == 13
    assert buffer.reward[1] == 14

    # now we'll add longer trajectory at the end, num_stored should increase
    old_size = len(buffer)
    dones[-1] = 14
    number_after_done = 3
    for how_many in range(buffer.cur_idx + 1,
                          dones[-1] + number_after_done + 1):
        done = how_many in dones
        buffer.add(np.zeros(2), np.zeros(1), np.zeros(2), 100 + how_many, done)
        if how_many <= old_size:
            assert len(buffer) == old_size
        else:
            assert len(buffer) == min(how_many, dones[-1])
    assert buffer.cur_idx == number_after_done

    # now we'll add a shorter trajectory at the end, num_stored should not change
    old_size = len(buffer)
    dones[-1] = 10
    number_after_done = 5
    for how_many in range(buffer.cur_idx + 1,
                          dones[-1] + number_after_done + 1):
        done = how_many in dones
        buffer.add(np.zeros(2), np.zeros(1), np.zeros(2), how_many, done)
        assert len(buffer) == old_size
    assert buffer.cur_idx == number_after_done

    assert np.all(buffer.reward[:14].astype(int) == np.array(
        [11, 12, 13, 14, 15, 6, 7, 8, 9, 10, 111, 112, 113, 114], dtype=int))
예제 #7
0
def test_trajectory_contents():
    buffer = replay_buffer.ReplayBuffer(20, (1, ), (1, ),
                                        max_trajectory_length=10)
    dummy = np.zeros(1)
    traj_lens = [4, 10, 1, 7, 8, 1, 4, 7, 5]
    trajectories = [
        (0, 4),
        (4, 14),
        (14, 15),
        (15, 22),
        (0, 8),
        (8, 9),
        (9, 13),
        (13, 20),
        (0, 5),
    ]

    def _check_buffer_trajectories_coherence():
        for traj in buffer.trajectory_indices:
            for v, idx in enumerate(range(traj[0], traj[1])):
                assert buffer.reward[idx] == v

    for tr_idx, l in enumerate(traj_lens):
        for i in range(l):
            buffer.add(dummy, dummy, dummy, i, i == l - 1)
        if tr_idx < 4:
            # here trajectories should just get appended
            assert buffer.trajectory_indices == trajectories[:tr_idx + 1]
        elif tr_idx in [4, 5, 6]:
            # the next few trajectories should remove (0, 4) and (4, 14)
            assert buffer.trajectory_indices == trajectories[2:tr_idx + 1]
        elif tr_idx == 7:
            # the penultimate trajectory should remove everything up to (0, 8)
            assert buffer.trajectory_indices == trajectories[4:tr_idx + 1]
        else:
            # the last trajectory should remove (0, 8)
            # (just checking that ending at exactly capacity works well)
            assert buffer.trajectory_indices == trajectories[5:]

        _check_buffer_trajectories_coherence()