def _build_replay_buffer(self, use_staging): """Creates the replay buffer used by the agent. Args: use_staging: bool, if True, uses a staging area to prefetch data for faster training. Returns: `WrappedPrioritizedReplayBuffer` object. Raises: ValueError: if given an invalid replay scheme. """ print('in RGBGripper rainbow _build_replay_buffer') if self._replay_scheme not in ['uniform', 'prioritized']: raise ValueError('Invalid replay scheme: {}'.format( self._replay_scheme)) return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer( observation_shape=dqn_agent.OBSERVATION_SHAPE, stack_size=dqn_agent.STACK_SIZE, use_staging=use_staging, update_horizon=self.update_horizon, gamma=self.gamma, extra_storage_types=[ReplayElement( 'gripper', (), np.uint8)]) # , ReplayElement('next_gripper', (), np.uint8) '''
def _build_replay_buffer(self, use_staging): """Creates the replay buffer used by the agent. Args: use_staging: bool, if True, uses a staging area to prefetch data for faster training. Returns: A `WrappedPrioritizedReplayBuffer` object. Raises: ValueError: if given an invalid replay scheme. """ if self._replay_scheme not in ['uniform', 'prioritized']: raise ValueError('Invalid replay scheme: {}'.format( self._replay_scheme)) # Both replay schemes use the same data structure, but the 'uniform' scheme # sets all priorities to the same value (which yields uniform sampling). return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer( observation_shape=self.observation_shape, stack_size=self.stack_size, use_staging=use_staging, update_horizon=self.update_horizon, gamma=self.gamma, observation_dtype=self.observation_dtype.as_numpy_dtype)
def create_default_memory(self): return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer( SCREEN_SIZE, STACK_SIZE, use_staging=False, replay_capacity=REPLAY_CAPACITY, batch_size=BATCH_SIZE, max_sample_attempts=10) # For faster tests.
def _build_replay_buffer(self, use_staging): if self._replay_scheme not in ['uniform', 'prioritized']: raise ValueError('Invalid replay scheme: {}'.format( self._replay_scheme)) return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer( observation_shape=self.observation_shape, stack_size=self.stack_size, use_staging=use_staging, update_horizon=self.update_horizon, gamma=self.gamma, observation_dtype=self.observation_dtype.as_numpy_dtype)
def _build_replay_buffer(self, use_staging): if self._replay_scheme not in ['uniform', 'prioritized']: raise ValueError('Invalid replay scheme: {}'.format(self._replay_scheme)) # Both replay schemes use the same data structure, but the 'uniform' scheme # sets all priorities to the same value (which yields uniform sampling). return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer( observation_shape=self.observation_shape, stack_size=self.stack_size, use_staging=use_staging, update_horizon=self.update_horizon, gamma=self.gamma, observation_dtype=self.observation_dtype.as_numpy_dtype)
def _build_replay_buffer(self, use_staging): """Creates the replay buffer used by the agent. Args: use_staging: bool, if True, uses a staging area to prefetch data for faster training. Returns: A `WrappedPrioritizedReplayBuffer` object. Raises: ValueError: if given an invalid replay scheme. """ if self._replay_scheme not in ['uniform', 'prioritized']: raise ValueError('Invalid replay scheme: {}'.format(self._replay_scheme)) return prioritized_replay_buffer.WrappedPrioritizedReplayBuffer( observation_shape=dqn_agent.OBSERVATION_SHAPE, stack_size=dqn_agent.STACK_SIZE, use_staging=use_staging, update_horizon=self.update_horizon, gamma=self.gamma)