def get_transition_elements(self, batch_size=None): """Returns a 'type signature' for sample_transition_batch. Args: batch_size: int, number of transitions returned. If None, the default batch_size will be used. Returns: signature: A namedtuple describing the method's return type signature. """ parent_transition_type = (super( OutOfGraphOffPolicyReplayBuffer, self).get_transition_elements(batch_size)) update_horizon = self._update_horizon batch_size = self._batch_size if batch_size is None else batch_size trajectory_type = [ ReplayElement('traj_state', (batch_size, update_horizon) + self._state_shape, self._observation_dtype), ReplayElement('traj_action', (batch_size, update_horizon), np.int32), ReplayElement('traj_reward', (batch_size, update_horizon), np.float32), ReplayElement('traj_prob', (batch_size, update_horizon), np.float32), ReplayElement('traj_discount', (batch_size, update_horizon), np.float32), ] return parent_transition_type + trajectory_type
def __init__(self, artificial_done, **kwargs): extra_storage_types = kwargs.pop("extra_storage_types", None) or [] extra_storage_types.append( ReplayElement("artificial_done", (), np.uint8)) super(_OutOfGraphReplayBuffer, self).__init__(extra_storage_types=extra_storage_types, **kwargs) self._artificial_done = artificial_done
def _build_replay_buffer(self, use_staging): """Creates the replay buffer used by the agent. Args: use_staging: bool, if True, uses a staging area to prefetch data for faster training. Returns: `WrappedPrioritizedReplayBuffer` object. Raises: ValueError: if given an invalid replay scheme. """ print('in RGBGripper rainbow _build_replay_buffer') if self._replay_scheme not in ['uniform', 'prioritized']: raise ValueError('Invalid replay scheme: {}'.format( self._replay_scheme)) return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer( observation_shape=dqn_agent.OBSERVATION_SHAPE, stack_size=dqn_agent.STACK_SIZE, use_staging=use_staging, update_horizon=self.update_horizon, gamma=self.gamma, extra_storage_types=[ReplayElement( 'gripper', (), np.uint8)]) # , ReplayElement('next_gripper', (), np.uint8) '''
def __init__(self, artificial_done, **kwargs): extra_storage_types = kwargs.pop("extra_storage_types", None) or [] msg = "Other extra_storage_types aren't currently supported for this class." assert not extra_storage_types, msg extra_storage_types.append(ReplayElement("artificial_done", (), np.uint8)) super(_OutOfGraphPrioritizedReplayBuffer, self).__init__( extra_storage_types=extra_storage_types, **kwargs) self._artificial_done = artificial_done
def get_add_args_signature(self): """The signature of the add function. The signature is the same as the one for OutOfGraphReplayBuffer, with an added priority. Returns: list of ReplayElements defining the type of the argument signature needed by the add function. """ parent_add_signature = super(OutOfGraphPrioritizedReplayBuffer, self).get_add_args_signature() add_signature = parent_add_signature + [ ReplayElement('priority', (), np.float32) ] return add_signature
def get_transition_elements(self, batch_size=None): """Returns a 'type signature' for sample_transition_batch. Args: batch_size: int, number of transitions returned. If None, the default batch_size will be used. Returns: signature: A namedtuple describing the method's return type signature. """ parent_transition_type = (super( OutOfGraphPrioritizedReplayBuffer, self).get_transition_elements(batch_size)) probablilities_type = [ ReplayElement('sampling_probabilities', (batch_size, ), np.float32) ] return parent_transition_type + probablilities_type
def _build_replay_buffer(self, use_staging): """Creates the replay buffer used by the agent. Args: use_staging: bool, if True, uses a staging area to prefetch data for faster training. Returns: A `WrappedCSReplayBuffer` object. Raises: ValueError: if given an invalid replay scheme. """ return cs_replay_buffer.WrappedCSReplayBuffer( observation_shape=self.observation_shape, stack_size=self.stack_size, use_staging=use_staging, update_horizon=self.update_horizon, gamma=self.gamma, extra_storage_types=[ReplayElement('beginning', (), np.bool)])
def _build_replay_buffer(self, use_staging): """Creates the replay buffer used by the agent. Args: use_staging: bool, if True, uses a staging area to prefetch data for faster training. Returns: A WrapperReplayBuffer object. """ import numpy as np from dopamine.replay_memory.circular_replay_buffer import ReplayElement return logged_replay_buffer.WrappedLoggedReplayBuffer( log_dir=self._replay_log_dir, observation_shape=self.observation_shape, stack_size=self.stack_size, use_staging=use_staging, update_horizon=self.update_horizon, gamma=self.gamma, extra_storage_types=[ReplayElement('prob', [], np.float32)], observation_dtype=self.observation_dtype.as_numpy_dtype)