def test_max_interval_tree(): tree = MinSegmentTree(4) tree[0] = 1.0 tree[2] = 0.5 tree[3] = 3.0 assert np.isclose(tree.min(), 0.5) assert np.isclose(tree.min(0, 2), 1.0) assert np.isclose(tree.min(0, 3), 0.5) assert np.isclose(tree.min(0, -1), 0.5) assert np.isclose(tree.min(2, 4), 0.5) assert np.isclose(tree.min(3, 4), 3.0) tree[2] = 0.7 assert np.isclose(tree.min(), 0.7) assert np.isclose(tree.min(0, 2), 1.0) assert np.isclose(tree.min(0, 3), 0.7) assert np.isclose(tree.min(0, -1), 0.7) assert np.isclose(tree.min(2, 4), 0.7) assert np.isclose(tree.min(3, 4), 3.0) tree[2] = 4.0 assert np.isclose(tree.min(), 1.0) assert np.isclose(tree.min(0, 2), 1.0) assert np.isclose(tree.min(0, 3), 1.0) assert np.isclose(tree.min(0, -1), 1.0) assert np.isclose(tree.min(2, 4), 3.0) assert np.isclose(tree.min(2, 3), 4.0) assert np.isclose(tree.min(2, -1), 4.0) assert np.isclose(tree.min(3, 4), 3.0)
def __init__(self, size, alpha): """Create Prioritized Replay buffer. Parameters ---------- size: int Max number of transitions to store in the buffer. When the buffer overflows the old memories are dropped. alpha: float how much prioritization is used (0 - no prioritization, 1 - full prioritization) See Also -------- ReplayBuffer.__init__ """ super(PrioritizedReplayBuffer, self).__init__(size) assert alpha >= 0 self._alpha = alpha it_capacity = 1 while it_capacity < size: it_capacity *= 2 self._it_sum = SumSegmentTree(it_capacity) self._it_min = MinSegmentTree(it_capacity) self._max_priority = 1.0
def __init__(self, size, alpha, epsilon, timesteps, initial_p, final_p): super(DoublePrioritizedReplayBuffer, self).__init__(size) assert alpha > 0 self._alpha = alpha self._epsilon = epsilon self._beta_schedule = LinearSchedule(timesteps, initial_p=initial_p, final_p=final_p) it_capacity = 1 while it_capacity < size: it_capacity *= 2 self._it_sum = SumSegmentTree(it_capacity) self._it_min = MinSegmentTree(it_capacity) self._max_priority = 1.0 self._it_sum2 = SumSegmentTree(it_capacity) self._it_min2 = MinSegmentTree(it_capacity) self._max_priority2 = 1.0
def __init__(self, size, alpha): super(PrioritizedReplayBuffer, self).__init__(size) assert alpha > 0 self._alpha = alpha it_capacity = 1 while it_capacity < size: it_capacity *= 2 self._it_sum = SumSegmentTree(it_capacity) self._it_min = MinSegmentTree(it_capacity) self._max_priority = 1.0
def __init__(self, size, frame_history_len, alpha): super().__init__(size, frame_history_len) #init the alpha value self._alpha = alpha it_capacity = 1 while it_capacity < size: it_capacity *= 2 self._it_sum = SumSegmentTree(it_capacity) self._it_min = MinSegmentTree(it_capacity) self._max_priority = 1.0
def __init__(self, size, frame_history_len, alpha, lander=False): """This is a memory efficient implementation of the replay buffer. The sepecific memory optimizations use here are: - only store each frame once rather than k times even if every observation normally consists of k last frames - store frames as np.uint8 (actually it is most time-performance to cast them back to float32 on GPU to minimize memory transfer time) - store frame_t and frame_(t+1) in the same buffer. For the tipical use case in Atari Deep RL buffer with 1M frames the total memory footprint of this buffer is 10^6 * 84 * 84 bytes ~= 7 gigabytes Warning! Assumes that returning frame of zeros at the beginning of the episode, when there is less frames than `frame_history_len`, is acceptable. Parameters ---------- size: int Max number of transitions to store in the buffer. When the buffer overflows the old memories are dropped. frame_history_len: int Number of memories to be retried for each observation. """ self.lander = lander self.size = size self.frame_history_len = frame_history_len self.next_idx = 0 self.num_in_buffer = 0 self.obs = None self.action = None self.reward = None self.done = None assert alpha >= 0 assert alpha <= 1 self.alpha = alpha it_capacity = 1 while it_capacity < size: it_capacity *= 2 self._it_sum = SumSegmentTree(it_capacity) self._it_min = MinSegmentTree(it_capacity) self._max_priority = 1.0
def __init__(self, limit, alpha, transition_small_epsilon=1e-6, demo_epsilon=0.2, nb_rollout_steps=100): super(PrioritizedMemory, self).__init__(limit, nb_rollout_steps) assert alpha > 0 self._alpha = alpha self._transition_small_epsilon = transition_small_epsilon self._demo_epsilon = demo_epsilon it_capacity = 1 while it_capacity < self.maxsize: it_capacity *= 2 # Size must be power of 2 self._it_sum = SumSegmentTree(it_capacity) self._it_min = MinSegmentTree(it_capacity) self._max_priority = 1.0
def __init__(self, buffer_shapes, size_in_transitions, T, sample_transitions, alpha, env_name): """Create Prioritized Replay buffer. """ super(PrioritizedReplayBuffer, self).__init__(buffer_shapes, size_in_transitions, T, sample_transitions) assert alpha >= 0 self._alpha = alpha it_capacity = 1 self.size_in_transitions = size_in_transitions while it_capacity < size_in_transitions: it_capacity *= 2 self._it_sum = SumSegmentTree(it_capacity) self._it_min = MinSegmentTree(it_capacity) self._max_priority = 1.0 self.T = T self.buffers['td'] = np.zeros([self.size, self.T]) self.buffers['e'] = np.zeros([self.size, self.T]) self.env_name = env_name
def __init__(self, max_steps, num_processes, gamma, prio_alpha, obs_shape, action_space, recurrent_hidden_state_size, device): self.max_steps = max_steps self.num_processes = num_processes self.gamma = gamma self.device = device # stored episode data self.obs = torch.zeros(max_steps, *obs_shape) self.recurrent_hidden_states = torch.zeros( max_steps, recurrent_hidden_state_size) self.returns = torch.zeros(max_steps, 1) if action_space.__class__.__name__ == 'Discrete': self.actions = torch.zeros(max_steps, 1).long() else: self.actions = torch.zeros(max_steps, action_space.shape[0]) self.masks = torch.ones(max_steps, 1) self.next_idx = 0 self.num_steps = 0 # store (full) episode stats self.episode_step_count = 0 self.episode_rewards = deque() self.episode_steps = deque() # currently running (accumulating) episodes self.running_episodes = [[] for _ in range(num_processes)] if prio_alpha > 0: """ Sampling priority is enabled if prio_alpha > 0 Priority algorithm ripped from OpenAI Baselines https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py """ self.prio_alpha = prio_alpha tree_capacity = 1 << math.ceil(math.log2(self.max_steps)) self.prio_sum_tree = SumSegmentTree(tree_capacity) self.prio_min_tree = MinSegmentTree(tree_capacity) self.prio_max = 1.0 else: self.prio_alpha = 0
def __init__(self, size, alpha1, alpha2=1.0, candidates_size=5, env_id='PongNoFrameskip-v4'): """Create a Double Prioritized State Recycled ReplayBuffer :param size: int Max number of transitions to store in the buffer. :param alpha1: float The rate of the prioritization of sampling. :param alpha2: float The rate of the prioritization of replacement. :param candidates_size: int The number of the candidates chosen in replacement. :param env_id: str The name of the gym [atari] environment. """ super().__init__(size) assert alpha1 >= 0 self._alpha1 = alpha1 assert alpha2 >= 0 self._alpha2 = alpha2 assert candidates_size > 0 self.candidates_size = candidates_size self.env_id = env_id it_capacity = 1 while it_capacity < size: it_capacity *= 2 self._it_sum = SumSegmentTree(it_capacity) self._it_min = MinSegmentTree(it_capacity) self._it_inverse_sum = SumSegmentTree(it_capacity) self._max_priority = 1.0