예제 #1
0
    def get_transition_elements(self, batch_size=None):
        """Returns a 'type signature' for sample_transition_batch.

    Args:
      batch_size: int, number of transitions returned. If None, the default
        batch_size will be used.
    Returns:
      signature: A namedtuple describing the method's return type signature.
    """
        parent_transition_type = (super(
            OutOfGraphOffPolicyReplayBuffer,
            self).get_transition_elements(batch_size))
        update_horizon = self._update_horizon
        batch_size = self._batch_size if batch_size is None else batch_size

        trajectory_type = [
            ReplayElement('traj_state',
                          (batch_size, update_horizon) + self._state_shape,
                          self._observation_dtype),
            ReplayElement('traj_action', (batch_size, update_horizon),
                          np.int32),
            ReplayElement('traj_reward', (batch_size, update_horizon),
                          np.float32),
            ReplayElement('traj_prob', (batch_size, update_horizon),
                          np.float32),
            ReplayElement('traj_discount', (batch_size, update_horizon),
                          np.float32),
        ]
        return parent_transition_type + trajectory_type
 def __init__(self, artificial_done, **kwargs):
     extra_storage_types = kwargs.pop("extra_storage_types", None) or []
     extra_storage_types.append(
         ReplayElement("artificial_done", (), np.uint8))
     super(_OutOfGraphReplayBuffer,
           self).__init__(extra_storage_types=extra_storage_types, **kwargs)
     self._artificial_done = artificial_done
    def _build_replay_buffer(self, use_staging):
        """Creates the replay buffer used by the agent.

    Args:
    use_staging: bool, if True, uses a staging area to prefetch data for
        faster training.

    Returns:
    `WrappedPrioritizedReplayBuffer` object.

    Raises:
    ValueError: if given an invalid replay scheme.
    """
        print('in RGBGripper rainbow   _build_replay_buffer')

        if self._replay_scheme not in ['uniform', 'prioritized']:
            raise ValueError('Invalid replay scheme: {}'.format(
                self._replay_scheme))
        return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer(
            observation_shape=dqn_agent.OBSERVATION_SHAPE,
            stack_size=dqn_agent.STACK_SIZE,
            use_staging=use_staging,
            update_horizon=self.update_horizon,
            gamma=self.gamma,
            extra_storage_types=[ReplayElement(
                'gripper', (),
                np.uint8)])  # , ReplayElement('next_gripper', (), np.uint8)
        '''
예제 #4
0
 def __init__(self, artificial_done, **kwargs):
   extra_storage_types = kwargs.pop("extra_storage_types", None) or []
   msg = "Other extra_storage_types aren't currently supported for this class."
   assert not extra_storage_types, msg
   extra_storage_types.append(ReplayElement("artificial_done", (), np.uint8))
   super(_OutOfGraphPrioritizedReplayBuffer, self).__init__(
       extra_storage_types=extra_storage_types, **kwargs)
   self._artificial_done = artificial_done
예제 #5
0
    def get_add_args_signature(self):
        """The signature of the add function.

    The signature is the same as the one for OutOfGraphReplayBuffer, with an
    added priority.

    Returns:
      list of ReplayElements defining the type of the argument signature needed
        by the add function.
    """
        parent_add_signature = super(OutOfGraphPrioritizedReplayBuffer,
                                     self).get_add_args_signature()
        add_signature = parent_add_signature + [
            ReplayElement('priority', (), np.float32)
        ]
        return add_signature
예제 #6
0
    def get_transition_elements(self, batch_size=None):
        """Returns a 'type signature' for sample_transition_batch.

    Args:
      batch_size: int, number of transitions returned. If None, the default
        batch_size will be used.
    Returns:
      signature: A namedtuple describing the method's return type signature.
    """
        parent_transition_type = (super(
            OutOfGraphPrioritizedReplayBuffer,
            self).get_transition_elements(batch_size))
        probablilities_type = [
            ReplayElement('sampling_probabilities', (batch_size, ), np.float32)
        ]
        return parent_transition_type + probablilities_type
예제 #7
0
  def _build_replay_buffer(self, use_staging):
    """Creates the replay buffer used by the agent.

    Args:
      use_staging: bool, if True, uses a staging area to prefetch data for
        faster training.

    Returns:
      A `WrappedCSReplayBuffer` object.

    Raises:
      ValueError: if given an invalid replay scheme.
    """
    return cs_replay_buffer.WrappedCSReplayBuffer(
        observation_shape=self.observation_shape,
        stack_size=self.stack_size,
        use_staging=use_staging,
        update_horizon=self.update_horizon,
        gamma=self.gamma,
        extra_storage_types=[ReplayElement('beginning', (), np.bool)])
예제 #8
0
  def _build_replay_buffer(self, use_staging):
    """Creates the replay buffer used by the agent.

    Args:
      use_staging: bool, if True, uses a staging area to prefetch data for
        faster training.

    Returns:
      A WrapperReplayBuffer object.
    """
    import numpy as np
    from dopamine.replay_memory.circular_replay_buffer import ReplayElement

    return logged_replay_buffer.WrappedLoggedReplayBuffer(
        log_dir=self._replay_log_dir,
        observation_shape=self.observation_shape,
        stack_size=self.stack_size,
        use_staging=use_staging,
        update_horizon=self.update_horizon,
        gamma=self.gamma,
        extra_storage_types=[ReplayElement('prob', [], np.float32)],
        observation_dtype=self.observation_dtype.as_numpy_dtype)