Ejemplo n.º 1
0
def test_per_buffer_reset_alpha():
    # Assign
    per_buffer = PERBuffer(10, 10, alpha=0.1)
    for _ in range(30):
        per_buffer.add(reward=np.random.randint(0, 1e5),
                       priority=np.random.random())

    # Act
    old_experiences = per_buffer.sample()
    per_buffer.reset_alpha(0.5)
    new_experiences = per_buffer.sample()

    # Assert
    assert old_experiences is not None and new_experiences is not None
    old_index, new_index = np.array(old_experiences['index']), np.array(
        new_experiences['index'])
    old_weight, new_weight = np.array(old_experiences['weight']), np.array(
        new_experiences['weight'])
    old_reward, new_reward = np.array(old_experiences['reward']), np.array(
        new_experiences['reward'])
    old_sort, new_sort = np.argsort(old_index), np.argsort(new_index)
    assert all([
        i1 == i2 for (i1, i2) in zip(old_index[old_sort], new_index[new_sort])
    ])
    assert all([
        w1 != w2
        for (w1, w2) in zip(old_weight[old_sort], new_weight[new_sort])
    ])
    assert all([
        r1 == r2
        for (r1, r2) in zip(old_reward[old_sort], new_reward[new_sort])
    ])
Ejemplo n.º 2
0
def test_per_buffer_reset_alpha():
    # Assign
    per_buffer = PERBuffer(10, 10, alpha=0.1)
    for _ in range(30):
        per_buffer.add(reward=np.random.randint(0, 1e5),
                       priority=np.random.random())

    # Act
    old_experiences = per_buffer.sample_list()
    per_buffer.reset_alpha(0.5)
    new_experiences = per_buffer.sample_list()

    # Assert
    assert old_experiences is not None and new_experiences is not None
    sorted_new_experiences = sorted(new_experiences, key=lambda k: k.index)
    sorted_old_experiences = sorted(old_experiences, key=lambda k: k.index)
    for (new_sample, old_sample) in zip(sorted_new_experiences,
                                        sorted_old_experiences):
        assert new_sample.index == old_sample.index
        assert new_sample.weight != old_sample.weight
        assert new_sample.reward == old_sample.reward