Beispiel #1
0
    def __init__(self,
                 py_client: types.ReverbClient,
                 table_name: Text,
                 max_sequence_length: int,
                 priority: Union[float, int] = 1,
                 bypass_partial_episodes: bool = False):
        """Creates an instance of the ReverbAddEpisodeObserver.

    **Note**: This observer is designed to work with py_drivers only, and does
    not support batches.

    TODO(b/158865335): Optionally truncate long episodes and add to buffer.

    Args:
      py_client: Python client for the reverb replay server.
      table_name: The table name where samples will be written to.
      max_sequence_length: An integer. `max_sequence_length` used
        to write to the replay buffer tables. This defines the size of the
        internal buffer controlling the `upper` limit of the number of timesteps
        which can be referenced in a single prioritized item. Note that this is
        the maximum number of trajectories across all the cached episodes that
        you are writing into the replay buffer (e.g. `number_of_episodes`).
        `max_sequence_length` is not a limit of how many timesteps or
        items that can be inserted into the replay buffer. Note that,
        since `max_sequence_length` controls the size of internal buffer, it is
        suggested not to set this value to a very large number. If the number of
        steps in an episode is more than `max_sequence_length`, only items up to
        `max_sequence_length` is written into the table.
      priority: Initial priority for the table.
      bypass_partial_episodes: If `False` (default) and an episode length is
        greater than `max_sequence_length`, a `ValueError` is raised. If set to
        `True`, the episodes with length more than `max_sequence_length` do not
        cause a `ValueError`. These episodes are bypassed (will NOT be written
        into the replay buffer) and an error message is shown to the user.
        Note that in this case (`bypass_partial_episodes=True`), the steps for
        episodes with length more than `max_sequence_length` are wasted and
        thrown away. This decision is made to guarantee that the replay buffer
        always has FULL episodes. Note that, `max_sequence_length` is just an
        upper bound.

    Raises:
      ValueError: If `table_name` is not a string.
      ValueError: If `priority` is not numeric.
      ValueError: If max_sequence_length is not positive.
    """
        if max_sequence_length <= 0:
            raise ValueError(
                "`max_sequence_length` must be an integer greater equal one.")

        self._table_name = table_name
        self._max_sequence_length = max_sequence_length
        self._priority = priority

        self._py_client = py_client
        self._writer = py_client.writer(
            max_sequence_length=self._max_sequence_length)
        self._cached_steps = 0
        self._bypass_partial_episodes = bypass_partial_episodes
        self._overflow_episode = False
Beispiel #2
0
    def __init__(self,
                 py_client: types.ReverbClient,
                 table_name: Union[Text, Sequence[Text]],
                 sequence_length: int,
                 stride_length: int = 1,
                 priority: Union[float, int] = 1,
                 pad_end_of_episodes: bool = False):
        """Creates an instance of the ReverbAddTrajectoryObserver.

    If multiple table_names and sequence lengths are provided data will only be
    stored once but be available for sampling with multiple sequence lengths
    from the respective reverb tables.

    **Note**: This observer is designed to work with py_drivers only, and does
    not support batches.

    Args:
      py_client: Python client for the reverb replay server.
      table_name: The table name(s) where samples will be written to.
      sequence_length: The sequence_length used to write to the given table.
      stride_length: The integer stride for the sliding window for overlapping
        sequences.  The default value of `1` creates an item for every window.
        Using `L = sequence_length` this means items are created for times `{0,
        1, .., L-1}, {1, 2, .., L}, ...`.  In contrast, `stride_length = L` will
        create an item only for disjoint windows `{0, 1, ..., L-1}, {L, ..., 2 *
        L - 1}, ...`.
      priority: Initial priority for new samples in the RB.
      pad_end_of_episodes: At the end of an episode, the cache is dropped by
        default. When `pad_end_of_episodes = True`, the cache gets padded with
        boundary steps (last->first) with `0` values everywhere and padded items
        of `sequence_length` are written to Reverb. The last padded item starts
        with a boundary step from the episode. This ensures that the last few
        steps are not less likely to get sampled compared to middle steps, this
        is most useful for environments that have useful rewards at the end of
        episodes. Note: because we do not pad at the beginning of an episode,
        for `sequence_length = N > 1` scenarios, the first `N-1` steps in an
        episode are sampled less frequently than all other steps. This generally
        does not impact training performance. However, if you have an
        environment where the only meaningful rewards are at the beginning of
        the episodes, you may consider filing a feature request to support
        padding in the front as well.
    """
        if isinstance(table_name, Text):
            self._table_names = [table_name]
        else:
            self._table_names = table_name
        self._sequence_length = sequence_length
        self._stride_length = stride_length
        self._priority = priority
        self._pad_end_of_episodes = pad_end_of_episodes

        self._py_client = py_client
        # TODO(b/153700282): Use a single writer with max_sequence_length=max(...)
        # once Reverb Dataset with emit_timesteps=True returns properly shaped
        # sequences.
        self._writer = py_client.trajectory_writer(
            num_keep_alive_refs=sequence_length + 1)
        self._cached_steps = 0
        self._last_trajectory = None
Beispiel #3
0
    def __init__(self,
                 py_client: types.ReverbClient,
                 table_name: Text,
                 sequence_length: int,
                 stride_length: int = 1,
                 priority: Union[float, int] = 1):
        """Creates an instance of the ReverbAddTrajectoryObserver.

    If multiple table_names and sequence lengths are provided data will only be
    stored once but be available for sampling with multiple sequence lengths
    from the respective reverb tables.

    **Note**: This observer is designed to work with py_drivers only, and does
    not support batches.

    Args:
      py_client: Python client for the reverb replay server.
      table_name: The table name where samples will be written to.
      sequence_length: The sequence_length used to write
        to the given table.
      stride_length: The integer stride for the sliding window for overlapping
        sequences.  The default value of `1` creates an item for every
        window.  Using `L = sequence_length` this means items are created for
        times `{0, 1, .., L-1}, {1, 2, .., L}, ...`.  In contrast,
        `stride_length = L` will create an item only for disjoint windows
        `{0, 1, ..., L-1}, {L, ..., 2 * L - 1}, ...`.
      priority: Initial priority for new samples in the RB.

    Raises:
      ValueError: If table_names or sequence_lengths are not lists or their
      lengths are not equal.
    """
        self._table_name = table_name
        self._sequence_length = sequence_length
        self._stride_length = stride_length
        self._priority = priority

        self._py_client = py_client
        # TODO(b/153700282): Use a single writer with max_sequence_length=max(...)
        # once Reverb Dataset with emit_timesteps=True returns properly shaped
        # sequences.
        self._writer = py_client.writer(max_sequence_length=sequence_length)
        self._cached_steps = 0
Beispiel #4
0
    def __init__(self,
                 py_client: types.ReverbClient,
                 table_name: Union[Text, Sequence[Text]],
                 sequence_length: int,
                 stride_length: int = 1,
                 priority: Union[float, int] = 1,
                 pad_end_of_episodes: bool = False,
                 tile_end_of_episodes: bool = False):
        """Creates an instance of the ReverbAddTrajectoryObserver.

    If multiple table_names and sequence lengths are provided data will only be
    stored once but be available for sampling with multiple sequence lengths
    from the respective reverb tables.

    **Note**: This observer is designed to work with py_drivers only, and does
    not support batches.

    Args:
      py_client: Python client for the reverb replay server.
      table_name: The table name(s) where samples will be written to.
      sequence_length: The sequence_length used to write to the given table.
      stride_length: The integer stride for the sliding window for overlapping
        sequences.  The default value of `1` creates an item for every window.
        Using `L = sequence_length` this means items are created for times `{0,
        1, .., L-1}, {1, 2, .., L}, ...`.  In contrast, `stride_length = L` will
        create an item only for disjoint windows `{0, 1, ..., L-1}, {L, ..., 2 *
        L - 1}, ...`.
      priority: Initial priority for new samples in the RB.
      pad_end_of_episodes: At the end of an episode, the cache is dropped by
        default. When `pad_end_of_episodes = True`, the cache gets padded with
        boundary steps (last->first) with `0` values everywhere and padded items
        of `sequence_length` are written to Reverb.
      tile_end_of_episodes: If `pad_end_of_episodes` is True then, the last
        padded item starts with a boundary step from the episode.

        When this option is True the following items will be generated:

        F, M, L, P
        M, L, P, P
        L, P, P, P

        If False, only a single one will be generated:

        F, M, L, P

        For training recurrent models on environments where required information
        is only available at the start of the episode it is useful to set
        `tile_end_of_episodes=False` and the `sequence_length` to be the length
        of the longest episode.
    Raises:
      ValueError: If `tile_end_of_episodes` is set without
        `pad_end_of_episodes`.
    """
        if isinstance(table_name, Text):
            self._table_names = [table_name]
        else:
            self._table_names = table_name
        self._sequence_length = sequence_length
        self._stride_length = stride_length
        self._priority = priority
        self._pad_end_of_episodes = pad_end_of_episodes
        self._tile_end_of_episodes = tile_end_of_episodes

        if tile_end_of_episodes and not pad_end_of_episodes:
            raise ValueError("Must set `pad_end_of_episodes=True` when using "
                             "`tile_end_of_episodes`")

        self._py_client = py_client
        # TODO(b/153700282): Use a single writer with max_sequence_length=max(...)
        # once Reverb Dataset with emit_timesteps=True returns properly shaped
        # sequences.
        self._writer = py_client.trajectory_writer(
            num_keep_alive_refs=sequence_length + 1,
            get_signature_timeout_ms=None)
        self._cached_steps = 0
        self._last_trajectory = None