예제 #1
0
 def __init__(self, capacity=None,
              alpha=0.6, beta0=0.4, betasteps=2e5, eps=0.01,
              normalize_by_max=True, error_min=0, error_max=1):
     self.memory = PrioritizedBuffer(capacity=capacity)
     PriorityWeightError.__init__(
         self, alpha, beta0, betasteps, eps, normalize_by_max,
         error_min=error_min, error_max=error_max)
예제 #2
0
class PrioritizedReplayBuffer(ReplayBuffer, PriorityWeightError):
    """Stochastic Prioritization

    https://arxiv.org/pdf/1511.05952.pdf \S3.3
    proportional prioritization

    Args:
        capacity (int)
        alpha, beta0, betasteps, eps (float)
        normalize_by_max (bool)
    """

    def __init__(self, capacity=None,
                 alpha=0.6, beta0=0.4, betasteps=2e5, eps=0.01,
                 normalize_by_max=True, error_min=0, error_max=1):
        self.memory = PrioritizedBuffer(capacity=capacity)
        PriorityWeightError.__init__(
            self, alpha, beta0, betasteps, eps, normalize_by_max,
            error_min=error_min, error_max=error_max)

    def sample(self, n):
        assert len(self.memory) >= n
        sampled, probabilities, min_prob = self.memory.sample(n)
        weights = self.weights_from_probabilities(probabilities, min_prob)
        for e, w in zip(sampled, weights):
            e['weight'] = w
        return sampled

    def update_errors(self, errors):
        self.memory.set_last_priority(self.priority_from_errors(errors))
예제 #3
0
 def __init__(
     self,
     capacity=None,
     alpha=0.6,
     beta0=0.4,
     betasteps=2e5,
     eps=1e-8,
     normalize_by_max=True,
     default_priority_func=None,
     uniform_ratio=0,
     wait_priority_after_sampling=True,
     return_sample_weights=True,
     error_min=None,
     error_max=None,
 ):
     self.current_episode = []
     self.episodic_memory = PrioritizedBuffer(
         capacity=None,
         wait_priority_after_sampling=wait_priority_after_sampling)
     self.memory = RandomAccessQueue(maxlen=capacity)
     self.capacity_left = capacity
     self.default_priority_func = default_priority_func
     self.uniform_ratio = uniform_ratio
     self.return_sample_weights = return_sample_weights
     PriorityWeightError.__init__(self,
                                  alpha,
                                  beta0,
                                  betasteps,
                                  eps,
                                  normalize_by_max,
                                  error_min=error_min,
                                  error_max=error_max)
예제 #4
0
 def __init__(self,
              capacity=None,
              alpha=0.6,
              beta0=0.4,
              betasteps=2e5,
              eps=1e-8,
              normalize_by_max=True):
     self.memory = PrioritizedBuffer(capacity=capacity)
     PriorityWeightError.__init__(self, alpha, beta0, betasteps, eps,
                                  normalize_by_max)
예제 #5
0
class PrioritizedReplayBuffer(ReplayBuffer, PriorityWeightError):
    """Stochastic Prioritization

    https://arxiv.org/pdf/1511.05952.pdf Section 3.3
    proportional prioritization

    Args:
        capacity (int): capacity in terms of number of transitions
        alpha (float): Exponent of errors to compute probabilities to sample
        beta0 (float): Initial value of beta
        betasteps (int): Steps to anneal beta to 1
        eps (float): To revisit a step after its error becomes near zero
        normalize_by_max (bool): Method to normalize weights. ``'batch'`` or
            ``True`` (default): divide by the maximum weight in the sampled
            batch. ``'memory'``: divide by the maximum weight in the memory.
            ``False``: do not normalize
    """
    def __init__(self,
                 capacity=None,
                 alpha=0.6,
                 beta0=0.4,
                 betasteps=2e5,
                 eps=0.01,
                 normalize_by_max=True,
                 error_min=0,
                 error_max=1,
                 num_steps=1):
        self.capacity = capacity
        assert num_steps > 0
        self.num_steps = num_steps
        self.memory = PrioritizedBuffer(capacity=capacity)
        self.last_n_transitions = collections.defaultdict(
            lambda: collections.deque([], maxlen=num_steps))
        PriorityWeightError.__init__(self,
                                     alpha,
                                     beta0,
                                     betasteps,
                                     eps,
                                     normalize_by_max,
                                     error_min=error_min,
                                     error_max=error_max)

    def sample(self, n):
        assert len(self.memory) >= n
        sampled, probabilities, min_prob = self.memory.sample(n)
        weights = self.weights_from_probabilities(probabilities, min_prob)
        for e, w in zip(sampled, weights):
            e[0]['weight'] = w
        return sampled

    def update_errors(self, errors):
        self.memory.set_last_priority(self.priority_from_errors(errors))
예제 #6
0
 def __init__(self, capacity=None,
              alpha=0.6, beta0=0.4, betasteps=2e5, eps=0.01,
              normalize_by_max=True, error_min=0,
              error_max=1, num_steps=1):
     self.capacity = capacity
     assert num_steps > 0
     self.num_steps = num_steps
     self.memory = PrioritizedBuffer(capacity=capacity)
     self.last_n_transitions = collections.defaultdict(
         lambda: collections.deque([], maxlen=num_steps))
     PriorityWeightError.__init__(
         self, alpha, beta0, betasteps, eps, normalize_by_max,
         error_min=error_min, error_max=error_max)
예제 #7
0
class PrioritizedEpisodicReplayBuffer (
        EpisodicReplayBuffer, PriorityWeightError):

    def __init__(self, capacity=None,
                 alpha=0.6, beta0=0.4, betasteps=2e5, eps=1e-8,
                 normalize_by_max=True,
                 default_priority_func=None,
                 uniform_ratio=0,
                 wait_priority_after_sampling=True,
                 return_sample_weights=True,
                 error_min=None,
                 error_max=None,
                 ):
        self.current_episode = []
        self.episodic_memory = PrioritizedBuffer(
            capacity=None,
            wait_priority_after_sampling=wait_priority_after_sampling)
        self.memory = RandomAccessQueue(maxlen=capacity)
        self.capacity_left = capacity
        self.default_priority_func = default_priority_func
        self.uniform_ratio = uniform_ratio
        self.return_sample_weights = return_sample_weights
        PriorityWeightError.__init__(
            self, alpha, beta0, betasteps, eps, normalize_by_max,
            error_min=error_min, error_max=error_max)

    def sample_episodes(self, n_episodes, max_len=None):
        """Sample n unique samples from this replay buffer"""
        assert len(self.episodic_memory) >= n_episodes
        episodes, probabilities, min_prob = self.episodic_memory.sample(
            n_episodes, uniform_ratio=self.uniform_ratio)
        if max_len is not None:
            episodes = [random_subseq(ep, max_len) for ep in episodes]
        if self.return_sample_weights:
            weights = self.weights_from_probabilities(probabilities, min_prob)
            return episodes, weights
        else:
            return episodes

    def update_errors(self, errors):
        self.episodic_memory.set_last_priority(
            self.priority_from_errors(errors))

    def stop_current_episode(self):
        if self.current_episode:
            if self.default_priority_func is not None:
                priority = self.default_priority_func(self.current_episode)
            else:
                priority = None
            self.memory.extend(self.current_episode)
            self.episodic_memory.append(self.current_episode,
                                        priority=priority)
            if self.capacity_left is not None:
                self.capacity_left -= len(self.current_episode)
            self.current_episode = []
            while self.capacity_left is not None and self.capacity_left < 0:
                discarded_episode = self.episodic_memory.popleft()
                self.capacity_left += len(discarded_episode)
        assert not self.current_episode