Exemple #1
0
    def add(
        self, transition: Tuple[np.ndarray, np.ndarray, float, np.ndarray, bool]
    ) -> Tuple[Any, ...]:
        """Add a new experience to memory.
        If the buffer is empty, it is respectively initialized by size of arguments.
        """
        self.n_step_buffer.append(transition)

        # single step transition is not ready
        if len(self.n_step_buffer) < self.n_step:
            return ()

        if self.length == 0:
            state, action = transition[:2]
            self._initialize_buffers(state, action)

        # add a multi step transition
        reward, next_state, done = get_n_step_info(self.n_step_buffer, self.gamma)
        curr_state, action = self.n_step_buffer[0][:2]

        self.obs_buf[self.idx] = curr_state
        self.acts_buf[self.idx] = action
        self.rews_buf[self.idx] = reward
        self.next_obs_buf[self.idx] = next_state
        self.done_buf[self.idx] = done

        self.idx += 1
        self.idx = self.demo_size if self.idx % self.buffer_size == 0 else self.idx
        self.length = min(self.length + 1, self.buffer_size)

        # return a single step transition to insert to replay buffer
        return self.n_step_buffer[0]
def check_case2(maxlen: int):
    """Test when there are no terminal within n_step."""
    done_index = maxlen
    n_step_buffer = generate_dummy_buffer(maxlen, done_index)
    reward, next_state, _ = get_n_step_info(n_step_buffer, gamma=1)
    assert reward * 2 == maxlen * (maxlen - 1)
    assert next_state == maxlen
def check_case3(maxlen: int):
    """Test when the terminal states exist within n_step."""
    done_index = random.randint(1, maxlen - 1)
    n_step_buffer = generate_dummy_buffer(maxlen, done_index)
    reward, next_state, _ = get_n_step_info(n_step_buffer, gamma=1)
    assert reward * 2 == done_index * (done_index + 1)
    assert next_state == done_index + 1
def check_case1(maxlen: int):
    """Test when the transition is terminal state."""
    done_index = 0
    n_step_buffer = generate_dummy_buffer(maxlen, done_index)
    reward, next_state, _ = get_n_step_info(n_step_buffer, gamma=1)
    assert reward == done_index
    assert next_state == done_index + 1
Exemple #5
0
    def add(
        self,
        transition: Tuple[np.ndarray, np.ndarray, torch.Tensor, float,
                          np.ndarray, bool],
    ) -> Tuple[Any, ...]:  # delete here
        """Add a new experience to memory.
        If the buffer is empty, it is respectively initialized by size of arguments.
        Add transitions to local buffer until it's full,
        and move thoese transitions to global buffer.
        """
        self.n_step_buffer.append(transition)

        # single step transition is not ready
        if len(self.n_step_buffer) < self.n_step:
            return ()

        if self.length == 0 and self.idx == 0:
            state, action, hidden_state = transition[:3]
            self._initialize_buffers(state, action, hidden_state)

        # add a multi step transition
        reward, _, done = get_n_step_info(self.n_step_buffer, self.gamma)
        curr_state, action, hidden_state = self.n_step_buffer[0][:3]

        self.local_obs_buf[self.idx] = curr_state
        self.local_acts_buf[self.idx] = action
        self.local_rews_buf[self.idx] = reward
        self.local_hiddens_buf[self.idx] = hidden_state
        self.local_done_buf[self.idx] = done

        self.idx += 1
        if done and self.idx < self.sequence_size:
            self.idx = self.sequence_size

        if self.idx % self.sequence_size == 0:
            self.obs_buf[self.episode_idx] = self.local_obs_buf
            self.acts_buf[self.episode_idx] = self.local_acts_buf
            self.rews_buf[self.episode_idx] = self.local_rews_buf
            self.hiddens_buf[self.episode_idx] = self.local_hiddens_buf
            self.done_buf[self.episode_idx] = self.local_done_buf

            self.idx = self.overlap_size
            self.episode_idx += 1
            self._overlap_local_buffers()
            self.episode_idx = (0 if self.episode_idx %
                                self.max_len == 0 else self.episode_idx)
            self.length = min(self.length + 1, self.max_len)

        # return a single step transition to insert to replay buffer
        return self.n_step_buffer[0]