def _build_replay_buffer(self, use_staging):
        """Creates the replay buffer used by the agent.

    Args:
    use_staging: bool, if True, uses a staging area to prefetch data for
        faster training.

    Returns:
    `WrappedPrioritizedReplayBuffer` object.

    Raises:
    ValueError: if given an invalid replay scheme.
    """
        print('in RGBGripper rainbow   _build_replay_buffer')

        if self._replay_scheme not in ['uniform', 'prioritized']:
            raise ValueError('Invalid replay scheme: {}'.format(
                self._replay_scheme))
        return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer(
            observation_shape=dqn_agent.OBSERVATION_SHAPE,
            stack_size=dqn_agent.STACK_SIZE,
            use_staging=use_staging,
            update_horizon=self.update_horizon,
            gamma=self.gamma,
            extra_storage_types=[ReplayElement(
                'gripper', (),
                np.uint8)])  # , ReplayElement('next_gripper', (), np.uint8)
        '''
Exemple #2
0
    def _build_replay_buffer(self, use_staging):
        """Creates the replay buffer used by the agent.

    Args:
      use_staging: bool, if True, uses a staging area to prefetch data for
        faster training.

    Returns:
      A `WrappedPrioritizedReplayBuffer` object.

    Raises:
      ValueError: if given an invalid replay scheme.
    """
        if self._replay_scheme not in ['uniform', 'prioritized']:
            raise ValueError('Invalid replay scheme: {}'.format(
                self._replay_scheme))
        # Both replay schemes use the same data structure, but the 'uniform' scheme
        # sets all priorities to the same value (which yields uniform sampling).
        return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer(
            observation_shape=self.observation_shape,
            stack_size=self.stack_size,
            use_staging=use_staging,
            update_horizon=self.update_horizon,
            gamma=self.gamma,
            observation_dtype=self.observation_dtype.as_numpy_dtype)
Exemple #3
0
 def create_default_memory(self):
     return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer(
         SCREEN_SIZE,
         STACK_SIZE,
         use_staging=False,
         replay_capacity=REPLAY_CAPACITY,
         batch_size=BATCH_SIZE,
         max_sample_attempts=10)  # For faster tests.
Exemple #4
0
 def _build_replay_buffer(self, use_staging):
     if self._replay_scheme not in ['uniform', 'prioritized']:
         raise ValueError('Invalid replay scheme: {}'.format(
             self._replay_scheme))
     return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer(
         observation_shape=self.observation_shape,
         stack_size=self.stack_size,
         use_staging=use_staging,
         update_horizon=self.update_horizon,
         gamma=self.gamma,
         observation_dtype=self.observation_dtype.as_numpy_dtype)
 def _build_replay_buffer(self, use_staging):
     if self._replay_scheme not in ['uniform', 'prioritized']:
         raise ValueError('Invalid replay scheme: {}'.format(self._replay_scheme))
     # Both replay schemes use the same data structure, but the 'uniform' scheme
     # sets all priorities to the same value (which yields uniform sampling).
     return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer(
         observation_shape=self.observation_shape,
         stack_size=self.stack_size,
         use_staging=use_staging,
         update_horizon=self.update_horizon,
         gamma=self.gamma,
         observation_dtype=self.observation_dtype.as_numpy_dtype)
Exemple #6
0
  def _build_replay_buffer(self, use_staging):
    """Creates the replay buffer used by the agent.

    Args:
      use_staging: bool, if True, uses a staging area to prefetch data for
        faster training.

    Returns:
      A `WrappedPrioritizedReplayBuffer` object.

    Raises:
      ValueError: if given an invalid replay scheme.
    """
    if self._replay_scheme not in ['uniform', 'prioritized']:
      raise ValueError('Invalid replay scheme: {}'.format(self._replay_scheme))
    return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer(
        observation_shape=dqn_agent.OBSERVATION_SHAPE,
        stack_size=dqn_agent.STACK_SIZE,
        use_staging=use_staging,
        update_horizon=self.update_horizon,
        gamma=self.gamma)