Exemple #1
0
def test_max_interval_tree():
    tree = MinSegmentTree(4)

    tree[0] = 1.0
    tree[2] = 0.5
    tree[3] = 3.0

    assert np.isclose(tree.min(), 0.5)
    assert np.isclose(tree.min(0, 2), 1.0)
    assert np.isclose(tree.min(0, 3), 0.5)
    assert np.isclose(tree.min(0, -1), 0.5)
    assert np.isclose(tree.min(2, 4), 0.5)
    assert np.isclose(tree.min(3, 4), 3.0)

    tree[2] = 0.7

    assert np.isclose(tree.min(), 0.7)
    assert np.isclose(tree.min(0, 2), 1.0)
    assert np.isclose(tree.min(0, 3), 0.7)
    assert np.isclose(tree.min(0, -1), 0.7)
    assert np.isclose(tree.min(2, 4), 0.7)
    assert np.isclose(tree.min(3, 4), 3.0)

    tree[2] = 4.0

    assert np.isclose(tree.min(), 1.0)
    assert np.isclose(tree.min(0, 2), 1.0)
    assert np.isclose(tree.min(0, 3), 1.0)
    assert np.isclose(tree.min(0, -1), 1.0)
    assert np.isclose(tree.min(2, 4), 3.0)
    assert np.isclose(tree.min(2, 3), 4.0)
    assert np.isclose(tree.min(2, -1), 4.0)
    assert np.isclose(tree.min(3, 4), 3.0)
Exemple #2
0
    def __init__(self, size, alpha):
        """Create Prioritized Replay buffer.

        Parameters
        ----------
        size: int
            Max number of transitions to store in the buffer. When the buffer
            overflows the old memories are dropped.
        alpha: float
            how much prioritization is used
            (0 - no prioritization, 1 - full prioritization)

        See Also
        --------
        ReplayBuffer.__init__
        """
        super(PrioritizedReplayBuffer, self).__init__(size)
        assert alpha >= 0
        self._alpha = alpha

        it_capacity = 1
        while it_capacity < size:
            it_capacity *= 2

        self._it_sum = SumSegmentTree(it_capacity)
        self._it_min = MinSegmentTree(it_capacity)
        self._max_priority = 1.0
    def __init__(self, size, alpha, epsilon, timesteps, initial_p, final_p):
        super(DoublePrioritizedReplayBuffer, self).__init__(size)
        assert alpha > 0
        self._alpha = alpha
        self._epsilon = epsilon
        self._beta_schedule = LinearSchedule(timesteps, initial_p=initial_p, final_p=final_p)
        it_capacity = 1
        while it_capacity < size:
            it_capacity *= 2

        self._it_sum = SumSegmentTree(it_capacity)
        self._it_min = MinSegmentTree(it_capacity)
        self._max_priority = 1.0

        self._it_sum2 = SumSegmentTree(it_capacity)
        self._it_min2 = MinSegmentTree(it_capacity)
        self._max_priority2 = 1.0
 def __init__(self, size, alpha):
     super(PrioritizedReplayBuffer, self).__init__(size)
     assert alpha > 0
     self._alpha = alpha
     it_capacity = 1
     while it_capacity < size:
         it_capacity *= 2
     self._it_sum = SumSegmentTree(it_capacity)
     self._it_min = MinSegmentTree(it_capacity)
     self._max_priority = 1.0
Exemple #5
0
    def __init__(self, size, frame_history_len, alpha):
        super().__init__(size, frame_history_len)

        #init the alpha value
        self._alpha = alpha

        it_capacity = 1
        while it_capacity < size:
            it_capacity *= 2

        self._it_sum = SumSegmentTree(it_capacity)
        self._it_min = MinSegmentTree(it_capacity)
        self._max_priority = 1.0
Exemple #6
0
    def __init__(self, size, frame_history_len, alpha, lander=False):
        """This is a memory efficient implementation of the replay buffer.

        The sepecific memory optimizations use here are:
            - only store each frame once rather than k times
              even if every observation normally consists of k last frames
            - store frames as np.uint8 (actually it is most time-performance
              to cast them back to float32 on GPU to minimize memory transfer
              time)
            - store frame_t and frame_(t+1) in the same buffer.

        For the tipical use case in Atari Deep RL buffer with 1M frames the total
        memory footprint of this buffer is 10^6 * 84 * 84 bytes ~= 7 gigabytes

        Warning! Assumes that returning frame of zeros at the beginning
        of the episode, when there is less frames than `frame_history_len`,
        is acceptable.

        Parameters
        ----------
        size: int
            Max number of transitions to store in the buffer. When the buffer
            overflows the old memories are dropped.
        frame_history_len: int
            Number of memories to be retried for each observation.
        """
        self.lander = lander

        self.size = size
        self.frame_history_len = frame_history_len

        self.next_idx = 0
        self.num_in_buffer = 0

        self.obs = None
        self.action = None
        self.reward = None
        self.done = None
        assert alpha >= 0
        assert alpha <= 1
        self.alpha = alpha

        it_capacity = 1
        while it_capacity < size:
            it_capacity *= 2
        self._it_sum = SumSegmentTree(it_capacity)
        self._it_min = MinSegmentTree(it_capacity)
        self._max_priority = 1.0
 def __init__(self,
              limit,
              alpha,
              transition_small_epsilon=1e-6,
              demo_epsilon=0.2,
              nb_rollout_steps=100):
     super(PrioritizedMemory, self).__init__(limit, nb_rollout_steps)
     assert alpha > 0
     self._alpha = alpha
     self._transition_small_epsilon = transition_small_epsilon
     self._demo_epsilon = demo_epsilon
     it_capacity = 1
     while it_capacity < self.maxsize:
         it_capacity *= 2  # Size must be power of 2
     self._it_sum = SumSegmentTree(it_capacity)
     self._it_min = MinSegmentTree(it_capacity)
     self._max_priority = 1.0
Exemple #8
0
    def __init__(self, buffer_shapes, size_in_transitions, T, sample_transitions, alpha, env_name):
        """Create Prioritized Replay buffer.
        """
        super(PrioritizedReplayBuffer, self).__init__(buffer_shapes, size_in_transitions, T, sample_transitions)
        assert alpha >= 0
        self._alpha = alpha

        it_capacity = 1
        self.size_in_transitions = size_in_transitions
        while it_capacity < size_in_transitions:
            it_capacity *= 2

        self._it_sum = SumSegmentTree(it_capacity)
        self._it_min = MinSegmentTree(it_capacity)
        self._max_priority = 1.0

        self.T = T
        self.buffers['td'] = np.zeros([self.size, self.T])
        self.buffers['e'] = np.zeros([self.size, self.T])
        self.env_name = env_name
Exemple #9
0
    def __init__(self, max_steps, num_processes, gamma, prio_alpha, obs_shape,
                 action_space, recurrent_hidden_state_size, device):
        self.max_steps = max_steps
        self.num_processes = num_processes
        self.gamma = gamma
        self.device = device

        # stored episode data
        self.obs = torch.zeros(max_steps, *obs_shape)
        self.recurrent_hidden_states = torch.zeros(
            max_steps, recurrent_hidden_state_size)
        self.returns = torch.zeros(max_steps, 1)
        if action_space.__class__.__name__ == 'Discrete':
            self.actions = torch.zeros(max_steps, 1).long()
        else:
            self.actions = torch.zeros(max_steps, action_space.shape[0])
        self.masks = torch.ones(max_steps, 1)
        self.next_idx = 0
        self.num_steps = 0

        # store (full) episode stats
        self.episode_step_count = 0
        self.episode_rewards = deque()
        self.episode_steps = deque()

        # currently running (accumulating) episodes
        self.running_episodes = [[] for _ in range(num_processes)]

        if prio_alpha > 0:
            """
            Sampling priority is enabled if prio_alpha > 0
            Priority algorithm ripped from OpenAI Baselines
            https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py
            """
            self.prio_alpha = prio_alpha
            tree_capacity = 1 << math.ceil(math.log2(self.max_steps))
            self.prio_sum_tree = SumSegmentTree(tree_capacity)
            self.prio_min_tree = MinSegmentTree(tree_capacity)
            self.prio_max = 1.0
        else:
            self.prio_alpha = 0
Exemple #10
0
    def __init__(self,
                 size,
                 alpha1,
                 alpha2=1.0,
                 candidates_size=5,
                 env_id='PongNoFrameskip-v4'):
        """Create a Double Prioritized State Recycled ReplayBuffer

        :param size: int
            Max number of transitions to store in the buffer.
        :param alpha1: float
            The rate of the prioritization of sampling.
        :param alpha2: float
            The rate of the prioritization of replacement.
        :param candidates_size: int
            The number of the candidates chosen in replacement.
        :param env_id: str
            The name of the gym [atari] environment.
        """
        super().__init__(size)
        assert alpha1 >= 0
        self._alpha1 = alpha1
        assert alpha2 >= 0
        self._alpha2 = alpha2
        assert candidates_size > 0
        self.candidates_size = candidates_size
        self.env_id = env_id

        it_capacity = 1
        while it_capacity < size:
            it_capacity *= 2

        self._it_sum = SumSegmentTree(it_capacity)
        self._it_min = MinSegmentTree(it_capacity)
        self._it_inverse_sum = SumSegmentTree(it_capacity)
        self._max_priority = 1.0