def __init__(self, capacity=None, alpha=0.6, beta0=0.4, betasteps=2e5, eps=0.01, normalize_by_max=True, error_min=0, error_max=1): self.memory = PrioritizedBuffer(capacity=capacity) PriorityWeightError.__init__( self, alpha, beta0, betasteps, eps, normalize_by_max, error_min=error_min, error_max=error_max)
class PrioritizedReplayBuffer(ReplayBuffer, PriorityWeightError): """Stochastic Prioritization https://arxiv.org/pdf/1511.05952.pdf \S3.3 proportional prioritization Args: capacity (int) alpha, beta0, betasteps, eps (float) normalize_by_max (bool) """ def __init__(self, capacity=None, alpha=0.6, beta0=0.4, betasteps=2e5, eps=0.01, normalize_by_max=True, error_min=0, error_max=1): self.memory = PrioritizedBuffer(capacity=capacity) PriorityWeightError.__init__( self, alpha, beta0, betasteps, eps, normalize_by_max, error_min=error_min, error_max=error_max) def sample(self, n): assert len(self.memory) >= n sampled, probabilities, min_prob = self.memory.sample(n) weights = self.weights_from_probabilities(probabilities, min_prob) for e, w in zip(sampled, weights): e['weight'] = w return sampled def update_errors(self, errors): self.memory.set_last_priority(self.priority_from_errors(errors))
def __init__( self, capacity=None, alpha=0.6, beta0=0.4, betasteps=2e5, eps=1e-8, normalize_by_max=True, default_priority_func=None, uniform_ratio=0, wait_priority_after_sampling=True, return_sample_weights=True, error_min=None, error_max=None, ): self.current_episode = [] self.episodic_memory = PrioritizedBuffer( capacity=None, wait_priority_after_sampling=wait_priority_after_sampling) self.memory = RandomAccessQueue(maxlen=capacity) self.capacity_left = capacity self.default_priority_func = default_priority_func self.uniform_ratio = uniform_ratio self.return_sample_weights = return_sample_weights PriorityWeightError.__init__(self, alpha, beta0, betasteps, eps, normalize_by_max, error_min=error_min, error_max=error_max)
def __init__(self, capacity=None, alpha=0.6, beta0=0.4, betasteps=2e5, eps=1e-8, normalize_by_max=True): self.memory = PrioritizedBuffer(capacity=capacity) PriorityWeightError.__init__(self, alpha, beta0, betasteps, eps, normalize_by_max)
class PrioritizedReplayBuffer(ReplayBuffer, PriorityWeightError): """Stochastic Prioritization https://arxiv.org/pdf/1511.05952.pdf Section 3.3 proportional prioritization Args: capacity (int): capacity in terms of number of transitions alpha (float): Exponent of errors to compute probabilities to sample beta0 (float): Initial value of beta betasteps (int): Steps to anneal beta to 1 eps (float): To revisit a step after its error becomes near zero normalize_by_max (bool): Method to normalize weights. ``'batch'`` or ``True`` (default): divide by the maximum weight in the sampled batch. ``'memory'``: divide by the maximum weight in the memory. ``False``: do not normalize """ def __init__(self, capacity=None, alpha=0.6, beta0=0.4, betasteps=2e5, eps=0.01, normalize_by_max=True, error_min=0, error_max=1, num_steps=1): self.capacity = capacity assert num_steps > 0 self.num_steps = num_steps self.memory = PrioritizedBuffer(capacity=capacity) self.last_n_transitions = collections.defaultdict( lambda: collections.deque([], maxlen=num_steps)) PriorityWeightError.__init__(self, alpha, beta0, betasteps, eps, normalize_by_max, error_min=error_min, error_max=error_max) def sample(self, n): assert len(self.memory) >= n sampled, probabilities, min_prob = self.memory.sample(n) weights = self.weights_from_probabilities(probabilities, min_prob) for e, w in zip(sampled, weights): e[0]['weight'] = w return sampled def update_errors(self, errors): self.memory.set_last_priority(self.priority_from_errors(errors))
def __init__(self, capacity=None, alpha=0.6, beta0=0.4, betasteps=2e5, eps=0.01, normalize_by_max=True, error_min=0, error_max=1, num_steps=1): self.capacity = capacity assert num_steps > 0 self.num_steps = num_steps self.memory = PrioritizedBuffer(capacity=capacity) self.last_n_transitions = collections.defaultdict( lambda: collections.deque([], maxlen=num_steps)) PriorityWeightError.__init__( self, alpha, beta0, betasteps, eps, normalize_by_max, error_min=error_min, error_max=error_max)
class PrioritizedEpisodicReplayBuffer ( EpisodicReplayBuffer, PriorityWeightError): def __init__(self, capacity=None, alpha=0.6, beta0=0.4, betasteps=2e5, eps=1e-8, normalize_by_max=True, default_priority_func=None, uniform_ratio=0, wait_priority_after_sampling=True, return_sample_weights=True, error_min=None, error_max=None, ): self.current_episode = [] self.episodic_memory = PrioritizedBuffer( capacity=None, wait_priority_after_sampling=wait_priority_after_sampling) self.memory = RandomAccessQueue(maxlen=capacity) self.capacity_left = capacity self.default_priority_func = default_priority_func self.uniform_ratio = uniform_ratio self.return_sample_weights = return_sample_weights PriorityWeightError.__init__( self, alpha, beta0, betasteps, eps, normalize_by_max, error_min=error_min, error_max=error_max) def sample_episodes(self, n_episodes, max_len=None): """Sample n unique samples from this replay buffer""" assert len(self.episodic_memory) >= n_episodes episodes, probabilities, min_prob = self.episodic_memory.sample( n_episodes, uniform_ratio=self.uniform_ratio) if max_len is not None: episodes = [random_subseq(ep, max_len) for ep in episodes] if self.return_sample_weights: weights = self.weights_from_probabilities(probabilities, min_prob) return episodes, weights else: return episodes def update_errors(self, errors): self.episodic_memory.set_last_priority( self.priority_from_errors(errors)) def stop_current_episode(self): if self.current_episode: if self.default_priority_func is not None: priority = self.default_priority_func(self.current_episode) else: priority = None self.memory.extend(self.current_episode) self.episodic_memory.append(self.current_episode, priority=priority) if self.capacity_left is not None: self.capacity_left -= len(self.current_episode) self.current_episode = [] while self.capacity_left is not None and self.capacity_left < 0: discarded_episode = self.episodic_memory.popleft() self.capacity_left += len(discarded_episode) assert not self.current_episode