def test_per_buffer_reset_alpha(): # Assign per_buffer = PERBuffer(10, 10, alpha=0.1) for _ in range(30): per_buffer.add(reward=np.random.randint(0, 1e5), priority=np.random.random()) # Act old_experiences = per_buffer.sample() per_buffer.reset_alpha(0.5) new_experiences = per_buffer.sample() # Assert assert old_experiences is not None and new_experiences is not None old_index, new_index = np.array(old_experiences['index']), np.array( new_experiences['index']) old_weight, new_weight = np.array(old_experiences['weight']), np.array( new_experiences['weight']) old_reward, new_reward = np.array(old_experiences['reward']), np.array( new_experiences['reward']) old_sort, new_sort = np.argsort(old_index), np.argsort(new_index) assert all([ i1 == i2 for (i1, i2) in zip(old_index[old_sort], new_index[new_sort]) ]) assert all([ w1 != w2 for (w1, w2) in zip(old_weight[old_sort], new_weight[new_sort]) ]) assert all([ r1 == r2 for (r1, r2) in zip(old_reward[old_sort], new_reward[new_sort]) ])
def test_per_buffer_reset_alpha(): # Assign per_buffer = PERBuffer(10, 10, alpha=0.1) for _ in range(30): per_buffer.add(reward=np.random.randint(0, 1e5), priority=np.random.random()) # Act old_experiences = per_buffer.sample_list() per_buffer.reset_alpha(0.5) new_experiences = per_buffer.sample_list() # Assert assert old_experiences is not None and new_experiences is not None sorted_new_experiences = sorted(new_experiences, key=lambda k: k.index) sorted_old_experiences = sorted(old_experiences, key=lambda k: k.index) for (new_sample, old_sample) in zip(sorted_new_experiences, sorted_old_experiences): assert new_sample.index == old_sample.index assert new_sample.weight != old_sample.weight assert new_sample.reward == old_sample.reward