コード例 #1
0
    def append_samples(self, samples):
        """Write the samples into the buffer and advance the time cursor.
        Handle wrapping of the cursor if necessary (boundary doesn't need to
        align with length of ``samples``).  Compute and store returns with
        newly available rewards."""
        T, B = get_leading_dims(samples,
                                n_dim=2)  # samples.env.reward.shape[:2]
        assert B == self.B
        t = self.t

        #! Now wrapping, assuming we have fix_size filled, so wrap above fix_size
        t_adv = 0
        if t + T > self.T:  # Wrap.
            # idxs = np.arange(t, t + T) % self.T
            num_miss = t + T - self.T
            idxs = np.concatenate(
                (np.arange(t,
                           self.T), np.arange(self.fix_T,
                                              self.fix_T + num_miss)))
            # print(len(idxs), t, T, self.T, num_miss)
            t_adv = self.fix_T
        else:
            idxs = slice(t, t + T)
        self.samples[idxs] = samples

        self.compute_returns(T)
        if not self._buffer_full and t + T >= self.T:
            self._buffer_full = True  # Only changes on first around.\

        #! similarly here
        self.t = (t + T) % self.T + t_adv
        return T, idxs  # Pass these on to subclass.
コード例 #2
0
    def append_samples(self, samples):
        """
        Modified from BaseNStepReturnBuffer to check if replay 
        should be saved each time we append new samples. This
        occurs when the replay fills up, where we intercept 
        the sample writing right before wrapping.
        """
        T, B = get_leading_dims(samples, n_dim=2)
        assert B == self.B
        t = self.t

        if t + T > self.T:  # wrap, writing to disk when full
            cutoff = t + T - self.T
            tail_idxs = slice(t, self.T)
            head_idxs = slice(0, cutoff)
            self.samples[tail_idxs] = samples[:-cutoff]
            self.save_replay_buffer()
            self.samples[head_idxs] = samples[-cutoff:]
            idxs = np.arange(t, t + T) % self.T  # for subclasses
        elif t + T == self.T:  # filled, write to disk after
            idxs = slice(t, t + T)
            self.samples[idxs] = samples
            self.save_replay_buffer()
        else:
            idxs = slice(t, t + T)
            self.samples[idxs] = samples

        self.compute_returns(T)
        if not self._buffer_full and t + T >= self.T:
            self._buffer_full = True
        self.t = (t + T) % self.T

        return T, idxs
コード例 #3
0
ファイル: mtgail.py プロジェクト: qxcv/mtil
 def __init__(self, total_n_samples, example_samples):
     self.total_n_samples = total_n_samples
     replay_samples = DiscrimReplaySamples(
         all_observation=example_samples.env.observation,
         all_action=example_samples.agent.action)
     T, B = get_leading_dims(replay_samples, n_dim=2)
     assert total_n_samples >= T * B > 0, (total_n_samples, T * B)
     self.circ_buf = buffer_from_example(replay_samples[0, 0],
                                         (total_n_samples, ))
     self.samples_in_buffer = 0
     self.ptr = 0
コード例 #4
0
ファイル: rl_with_ul_replay.py プロジェクト: kevinghst/rl_ul
 def append_samples(self, samples):
     T, B = get_leading_dims(samples, n_dim=2)
     assert B == self.B
     t = self.t
     if t + T > self.T:  # Wrap.
         idxs = np.arange(t, t + T) % self.T
     else:
         idxs = slice(t, t + T)
     self.samples[idxs] = samples
     if not self._buffer_full and t + T >= self.T:
         self._buffer_full = True
     self.t = (t + T) % self.T
     return T, idxs
コード例 #5
0
ファイル: rl_with_ul_replay.py プロジェクト: kevinghst/rl_ul
 def append_samples(self, samples):
     T, B = get_leading_dims(samples, n_dim=2)
     assert B == self.replay_buffer.B
     t = self.replay_buffer.t
     if t + T > self.replay_buffer.T:  # Wrap.
         idxs = np.arange(t, t + T) % self.T
     else:
         idxs = slice(t, t + T)
     self.samples_reward[idxs] = samples.reward
     self.samples_done[idxs] = samples.done
     new_returns = self.compute_ul_returns(T)
     priorities = 1 + self.alpha * new_returns**self.beta
     self.priority_tree.advance(T, priorities=priorities)
     return self.replay_buffer.append_samples(samples)
コード例 #6
0
 def append_samples(self, samples):
     T, B = get_leading_dims(samples, n_dim=2)  # samples.env.reward.shape[:2]
     assert B == self.B
     t = self.t
     if t + T > self.T:  # Wrap.
         idxs = np.arange(t, t + T) % self.T
     else:
         idxs = slice(t, t + T)
     self.samples[idxs] = samples
     self.compute_returns(T)
     if not self._buffer_full and t + T >= self.T:
         self._buffer_full = True  # Only changes on first around.
     self.t = (t + T) % self.T
     return T, idxs  # Pass these on to subclass.
コード例 #7
0
ファイル: rl_with_ul_replay.py プロジェクト: kevinghst/rl_ul
 def append_samples(self, samples):
     T, B = get_leading_dims(samples, n_dim=2)
     assert B == self.B
     t = self.t
     if t + T > self.T:  # Wrap.
         idxs = np.arange(t, t + T) % self.T
     else:
         idxs = slice(t, t + T)
     self.samples[idxs] = samples
     new_returns = self.compute_returns(T)
     if not self._buffer_full and t + T >= self.T:
         self._buffer_full = True
     self.t = (t + T) % self.T
     priorities = 1 + self.alpha * new_returns**self.beta
     self.priority_tree.advance(T, priorities=priorities)
     return T, idxs
コード例 #8
0
 def append_samples(self, samples):
     """Write the samples into the buffer and advance the time cursor.
     Handle wrapping of the cursor if necessary (boundary doesn't need to
     align with length of ``samples``).  Compute and store returns with
     newly available rewards."""
     T, B = get_leading_dims(samples, n_dim=2)  # samples.env.reward.shape[:2]
     assert B == self.B
     t = self.t
     if t + T > self.T:  # Wrap.
         idxs = np.arange(t, t + T) % self.T
     else:
         idxs = slice(t, t + T)
     self.samples[idxs] = samples # This copies value instead of copying reference.
     self.compute_returns(T)
     if not self._buffer_full and t + T >= self.T:
         self._buffer_full = True  # Only changes on first around.
     self.t = (t + T) % self.T
     return T, idxs  # Pass these on to subclass.
コード例 #9
0
 def __init__(self, example, **kwargs):
     field_names = [f for f in example._fields if f != "observation"]
     global BufferSamples
     BufferSamples = namedarraytuple("BufferSamples", field_names)
     buffer_example = BufferSamples(*(v for k, v in example.items()
         if k != "observation"))
     super().__init__(example=buffer_example, **kwargs)
     # Equivalent to image.shape[0] if observation is image array (C,H,W):
     self.n_frames = n_frames = get_leading_dims(example.observation,
         n_dim=1)[0]
     logger.log(f"Frame-based buffer using {n_frames}-frame sequences.")
     # frames: oldest stored at t; duplicate n_frames - 1 beginning & end.
     self.samples_frames = buffer_from_example(example.observation[0],
         (self.T + n_frames - 1, self.B),
         share_memory=self.async_)  # [T+n_frames-1,B,H,W]
     # new_frames: shifted so newest stored at t; no duplication.
     self.samples_new_frames = self.samples_frames[n_frames - 1:]  # [T,B,H,W]
     self.off_forward = max(self.off_forward, n_frames - 1)
コード例 #10
0
 def append_samples(self, samples):
     with self.rw_lock.write_lock:
         self._async_pull()  # Updates from other writers.
         T, B = get_leading_dims(samples,
                                 n_dim=2)  # samples.env.reward.shape[:2]
         num_new_sequences = B
         if self.t + num_new_sequences >= self.buffer_size:
             num_new_sequences = self.buffer_size - self.t
         B_idxs = np.arange(self.t, self.t + num_new_sequences)
         self.samples_prev_rnn_state[B_idxs] = samples.prev_rnn_state[
             0, :num_new_sequences]
         self.samples[:, self.t:self.t +
                      num_new_sequences] = self.SamplesToBuffer(
                          *(v[:, :num_new_sequences]
                            for k, v in samples.items()
                            if k != "prev_rnn_state"))
         self._buffer_full = self._buffer_full or (
             self.t + num_new_sequences) == self.buffer_size
         self.t = (self.t + num_new_sequences) % self.buffer_size
         self._async_push()  # Updates to other writers + readers.
コード例 #11
0
ファイル: model_based.py プロジェクト: jaztsong/rlpyt
 def append_samples(self, samples):
     """Write the samples into the buffer and advance the time cursor.
     Handle wrapping of the cursor if necessary (boundary doesn't need to
     align with length of ``samples``).  Compute and store returns with
     newly available rewards."""
     # filter out the invalid states
     after_done = samples.done.squeeze().roll(1)
     #fill the very first element as valid
     after_done[0] = False 
     # Extract all the valid samples
     samples = samples[(after_done == False).nonzero().squeeze()]
     T, B = get_leading_dims(samples, n_dim=2)  # samples.env.reward.shape[:2]
     assert B == self.B
     t = self.t
     if t + T > self.T:  # Wrap.
         idxs = np.arange(t, t + T) % self.T
     else:
         idxs = slice(t, t + T)
     self.samples[idxs] = samples
     if not self._buffer_full and t + T >= self.T:
         self._buffer_full = True  # Only changes on first around.
     self.t = (t + T) % self.T
     return T, idxs  # Pass these on to subclass.
コード例 #12
0
    def append_samples(self, samples):
        """
        Appends all samples except for the `observation` as normal.
        Only the new frame in each observation is recorded.
        
        Modified from `FrameBufferMixin` append_samples to appropriately
        store frames when the buffer wraps over (and is written to disk).
        """
        t, fm1 = self.t, self.n_frames - 1
        buffer_samples = BufferSamples(*(v for k, v in samples.items()
                                         if k != "observation"))

        if t == 0:  # starting: write early frames
            for f in range(fm1):
                self.samples_frames[f] = samples.observation[0, :, f]

        T, B = get_leading_dims(samples, n_dim=2)
        if t + T > self.T:  # wrap, store tail frames before saving
            cutoff = t + T - self.T
            tail_idxs = slice(t, self.T)
            head_idxs = slice(0, cutoff)
            self.samples_new_frames[
                tail_idxs] = samples.observation[:-cutoff, :, -1]
            _, idxs = super().append_samples(
                buffer_samples)  # saved here; idxs for subclasses
            self.samples_new_frames[head_idxs] = samples.observation[
                -cutoff:, :, -1]
            if fm1 > 0:  # copy any duplicate frames
                self.samples_frames[:fm1] = self.samples_frames[-fm1:]
        else:
            idxs = slice(t, t + T)
            self.samples_new_frames[idxs] = samples.observation[:, :, -1]
            super().append_samples(
                buffer_samples)  # may still save replay if new t == 0

        return T, idxs
コード例 #13
0
ファイル: mtgail.py プロジェクト: qxcv/mtil
 def append_samples(self, samples):
     """Append samples drawn drawn from a sampler. Should be namedarraytuple
     with leading dimensions `(time_steps, batch_size)`."""
     replay_samples = DiscrimReplaySamples(
         all_observation=samples.env.observation,
         all_action=samples.agent.action)
     T, B = get_leading_dims(replay_samples, n_dim=2)
     # if there's not enough room for a single full round of sampling then
     # the buffer is _probably_ too small.
     assert T * B <= self.total_n_samples, \
         f"There's not enough room in this buffer for a single full " \
         f"batch! T*B={T*B} > total_n_samples={self.total_n_samples}"
     flat_samples = buffer_func(
         replay_samples, lambda t: t.reshape((T * B, ) + t.shape[2:]))
     n_copied = 0
     while n_copied < T * B:
         # only copy to the end
         n_to_copy = min(T * B - n_copied, self.total_n_samples - self.ptr)
         self.circ_buf[self.ptr:self.ptr + n_to_copy] \
             = flat_samples[n_copied:n_copied + n_to_copy]
         n_copied += n_to_copy
         self.ptr = (self.ptr + n_to_copy) % self.total_n_samples
         self.samples_in_buffer = min(self.total_n_samples,
                                      self.samples_in_buffer + n_to_copy)