def __init__(self, data_spec, capacity, alpha=0.6): """ Params: data_spec: An ArraySpec or a list/tuple/nest of ArraySpecs describing a single item that can be stored in this buffer. capacity: The maximum number of items that can be stored in the buffer. alpha: α determines how much prioritization is used, with α = 0 corresponding to the uniform case. """ super(PyPrioritizedReplayBuffer, self).__init__(data_spec, capacity) logger.info("Creating an instance of %s. Params: data_spec: %s, capacity: %s, alpha: %s" % (str(type(self).__name__), str(data_spec), str(capacity), str(alpha))) # State variables needed to maintain the replay buffer. These were copied from the uniform replay buffer self._storage = numpy_storage.NumpyStorage(self._encoded_data_spec(), capacity) self._lock = threading.Lock() self._np_state = numpy_storage.NumpyState() # Adding elements to the replay buffer is done in a circular way. # Keeps track of the actual size of the replay buffer and the location # where to add new elements. self._np_state.size = np.int64(0) self._np_state.cur_id = np.int64(0) # Total number of items that went through the replay buffer. self._np_state.item_count = np.int64(0) self._prioritized_buffer_alpha = alpha self._prioritized_buffer_capacity = capacity # an array in which we keep track of the priorities. The size of this array is equal to the size of the replay # buffer. Items stored at a given index in the Priority array map to the experience at the same index in the # buffer. The content of the items in the priority array represent the loss of their respective experience the # last time that experience was used for training. self._prioritized_buffer_priorities = np.zeros((capacity,), dtype=np.float32)
def __init__(self, data_spec, capacity): """Creates a PyUniformReplayBuffer. Args: data_spec: An ArraySpec or a list/tuple/nest of ArraySpecs describing a single item that can be stored in this buffer. capacity: The maximum number of items that can be stored in the buffer. """ super(PyUniformReplayBuffer, self).__init__(data_spec, capacity) self._storage = numpy_storage.NumpyStorage(self._encoded_data_spec(), capacity) self._lock = threading.Lock() self._np_state = numpy_storage.NumpyState() # Adding elements to the replay buffer is done in a circular way. # Keeps track of the actual size of the replay buffer and the location # where to add new elements. self._np_state.size = np.int64(0) self._np_state.cur_id = np.int64(0) # Total number of items that went through the replay buffer. self._np_state.item_count = np.int64(0)