def test_sample_statistics_correct():
    """Tests that sampled experiences correspond to expected statistics"""
    tries = 5
    for random_seed in range(tries):
        for num_actions in range(1, 7):
            for buffer_size in [random.randint(55, 9999) for _ in range(10)]:
                for batch_size in [random.randint(8, 200) for _ in range(10)]:
                    buffer = Action_Balanced_Replay_Buffer(buffer_size, batch_size, random.randint(0, 2000000), num_actions)
                    for _ in range(500):
                        random_action = random.randint(0, num_actions - 1)
                        buffer.add_experience(1, random_action, 1, 0, 0)
                    states, actions, rewards, next_states, dones = buffer.sample()
                    actions = [action.item() for action in actions]
                    assert len(actions) == batch_size
                    count = Counter(actions)
                    action_count = count[0]
                    for action in range(num_actions):
                        assert abs(count[action] - action_count) < 2, print(count[action])
Example #2
0
    def put_adapted_experiences_in_a_replay_buffer(self, action_id_to_actions):
        """Adds experiences to the replay buffer after re-imagining that the actions taken were macro-actions according to
         action_rules as well as primitive actions.

         NOTE that we want to put both primitive actions and macro-actions into replay buffer so that it can learn that
         its better to do a macro-action rather than the same primitive actions (which we will enforce with reward penalty)
         """

        actions_to_action_id = {v: k for k, v in action_id_to_actions.items()}

        self.num_actions = len(action_id_to_actions)

        print(actions_to_action_id)

        for key in actions_to_action_id.keys():
            assert isinstance(key, tuple)
            assert isinstance(actions_to_action_id[key], int)

        episodes = len(self.states)
        for data_type in [
                self.states, self.next_states, self.rewards, self.actions,
                self.dones
        ]:
            assert len(data_type) == episodes

        max_action_length = self.calculate_max_action_length(
            actions_to_action_id)

        if self.action_balanced_replay_buffer:
            print("Using action balanced replay buffer")
            replay_buffer = Action_Balanced_Replay_Buffer(
                self.buffer_size,
                self.batch_size,
                self.seed,
                num_actions=self.num_actions,
                self.use_GPU)
        else:
            print("Using ordinary replay buffer")
            replay_buffer = Replay_Buffer(self.buffer_size, self.batch_size,
                                          self.seed)

        for episode_ix in range(episodes):
            self.add_adapted_experience_for_an_episode(episode_ix,
                                                       actions_to_action_id,
                                                       max_action_length,
                                                       replay_buffer)

        return replay_buffer
def test_add_experience():
    """Tests that add_experience works correctly"""
    buffer = Action_Balanced_Replay_Buffer(6, 4, 0, 3)

    rewards = [0 for _ in range(4)]
    next_states = [0 for _ in range(4)]
    states = [0 for _ in range(4)]
    dones = [0 for _ in range(4)]
    actions = [0, 1, 2, 0]

    for state, action, reward, next_state, done in zip(states, actions, rewards, next_states, dones):
        buffer.add_experience(state, action, reward, next_state, done)

    assert len(buffer.memories[0]) == 2
    assert len(buffer.memories[1]) == 1
    assert len(buffer.memories[2]) == 1

    buffer.add_experience(99, 0, 0, 0, 0)
    assert len(buffer.memories[0]) == 2
    assert buffer.memories[0][1].state == 99

    buffer = Action_Balanced_Replay_Buffer(6, 4, 0, 3)
    buffer.add_experience(states, actions, rewards, next_states, dones)
    assert len(buffer.memories[0]) == 2
    assert len(buffer.memories[1]) == 1
    assert len(buffer.memories[2]) == 1

    buffer.add_experience(99, 0, 0, 0, 0)
    assert len(buffer.memories[0]) == 2
    assert buffer.memories[0][1].state == 99
def test_sample_correctly():
    """Tests that sample works correctly"""
    buffer = Action_Balanced_Replay_Buffer(20, 4, 0, 3)
    buffer.add_experience(3, 2, 1, 0, 0)
    buffer.add_experience(2, 0, 1, 0, 0)
    buffer.add_experience(1, 1, 1, 0, 0)
    states, actions, rewards, next_states, dones = buffer.sample()

    for var in [states, actions, rewards, next_states, dones]:
        assert len(var) == 4

    num_occurances = 0
    tries = 50

    for random_seed in range(tries):
        buffer = Action_Balanced_Replay_Buffer(20, 4, random_seed, 3)
        buffer.add_experience(3, 2, 1, 0, 0)
        buffer.add_experience(2, 0, 1, 0, 0)
        buffer.add_experience(1, 1, 1, 0, 0)
        states, actions, rewards, next_states, dones = buffer.sample()
        if states[2] == 3.0: num_occurances += 1
        print(states)
    assert num_occurances < tries/2
    assert num_occurances > tries/5
def test_add_experience_throws_error():
    """Tests that add_experience works correctly"""
    buffer = Action_Balanced_Replay_Buffer(20, 4, 0, 3)
    with pytest.raises(KeyError):
        buffer.add_experience(3, 99, 1, 0, 0)
        buffer.sample()

    buffer = Action_Balanced_Replay_Buffer(20, 4, 0, 3)
    buffer.add_experience(3, 2, 1, 0, 0)

    with pytest.raises(AssertionError):
        buffer.sample()