Ejemplo n.º 1
0
    def __init__(self,
                 env,
                 info_key='score',
                 name='AverageScore',
                 buffer_size=10,
                 batch_size=None):
        """
    Creates an CubeAverageScoreMetric.
    Args:
      env: Instance of gym.Env that implements get_score() which updates the
           metric
      info_key: str of info dict key that is being averaged
      name: metric name
      buffer_size: number of episodes to compute average over
    """

        # Set a dummy value on self._np_state.obs_val so it gets included in
        # the first checkpoint (before metric is first called).
        self._wrapped_gym_envs = env
        self._info_key = info_key
        batch_size = batch_size or len(env)
        self._np_state = numpy_storage.NumpyState()
        super(AverageGymInfoMetric, self).__init__(name,
                                                   buffer_size=buffer_size,
                                                   batch_size=batch_size)
    def __init__(self, data_spec, capacity, alpha=0.6):
        """
        Params:
            data_spec: An ArraySpec or a list/tuple/nest of ArraySpecs describing a single item that can be stored
                       in this buffer.
            capacity: The maximum number of items that can be stored in the buffer.
            alpha: α determines how much prioritization is used, with α = 0 corresponding to the uniform case.
        """
        super(PyPrioritizedReplayBuffer, self).__init__(data_spec, capacity)
        logger.info("Creating an instance of %s. Params: data_spec: %s, capacity: %s, alpha: %s" %
                    (str(type(self).__name__), str(data_spec), str(capacity), str(alpha)))

        # State variables needed to maintain the replay buffer. These were copied from the uniform replay buffer
        self._storage = numpy_storage.NumpyStorage(self._encoded_data_spec(), capacity)
        self._lock = threading.Lock()
        self._np_state = numpy_storage.NumpyState()

        # Adding elements to the replay buffer is done in a circular way.
        # Keeps track of the actual size of the replay buffer and the location
        # where to add new elements.
        self._np_state.size = np.int64(0)
        self._np_state.cur_id = np.int64(0)

        # Total number of items that went through the replay buffer.
        self._np_state.item_count = np.int64(0)

        self._prioritized_buffer_alpha = alpha
        self._prioritized_buffer_capacity = capacity

        # an array in which we keep track of the priorities. The size of this array is equal to the size of the replay
        # buffer. Items stored at a given index in the Priority array map to the experience at the same index in the
        # buffer. The content of the items in the priority array represent the loss of their respective experience the
        # last time that experience was used for training.
        self._prioritized_buffer_priorities = np.zeros((capacity,), dtype=np.float32)
Ejemplo n.º 3
0
    def __init__(self,
                 env,
                 name='AverageScore',
                 buffer_size=10,
                 batch_size=None):
        """
    Creates an CubeAverageScoreMetric.
    Args:
      env: Instance of gym.Env that implements get_score() which updates the metric
      name: metric name
      buffer_size: number of episodes to compute average over
    """

        # Set a dummy value on self._np_state.obs_val so it gets included in
        # the first checkpoint (before metric is first called).
        if isinstance(env, list):
            self._env = env
        else:
            self._env = [env]
        batch_size = batch_size or len(env)
        self._np_state = numpy_storage.NumpyState()
        self._np_state.adds_to_buff = np.array(0, dtype=float)
        # used so that buff is not over-populated by returned trajectories from short episodes
        super(CubeAverageScoreMetric, self).__init__(name,
                                                     buffer_size=buffer_size,
                                                     batch_size=batch_size)
Ejemplo n.º 4
0
  def testSaveRestore(self):
    arrays = numpy_storage.NumpyState()
    checkpoint = tf.train.Checkpoint(numpy_arrays=arrays)
    arrays.x = np.ones([3, 4])
    directory = self.get_temp_dir()
    prefix = os.path.join(directory, 'ckpt')
    save_path = checkpoint.save(prefix)
    arrays.x[:] = 0.
    self.assertAllEqual(arrays.x, np.zeros([3, 4]))
    checkpoint.restore(save_path).assert_consumed()
    self.assertAllEqual(arrays.x, np.ones([3, 4]))

    second_checkpoint = tf.train.Checkpoint(
        numpy_arrays=numpy_storage.NumpyState())
    # Attributes of NumpyState objects are created automatically by restore()
    second_checkpoint.restore(save_path).assert_consumed()
    self.assertAllEqual(np.ones([3, 4]), second_checkpoint.numpy_arrays.x)
Ejemplo n.º 5
0
 def __init__(self, name='AverageReturn', buffer_size=10, batch_size=None):
     """Creates an AverageReturnMetric."""
     self._np_state = numpy_storage.NumpyState()
     # Set a dummy value on self._np_state.control_cost so it gets included in
     # the first checkpoint (before metric is first called).
     self._np_state.control_cost = np.float64(0)
     super(AverageControlCostMetric, self).__init__(name,
                                                    buffer_size=buffer_size,
                                                    batch_size=batch_size)
Ejemplo n.º 6
0
 def __init__(self, name='QMetric', buffer_size=10):
     super(QMetric, self).__init__(name)
     self._buffer = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64)
     self._count = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64)
     self._sumcount = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64)
     self._np_state = numpy_storage.NumpyState()
     self._np_state._most_recent_q = np.float64(-100)  # pylint: disable=protected-access
     self._np_state._most_recent_time = np.int64(0)  # pylint: disable=protected-access
     self.reset()
Ejemplo n.º 7
0
 def __init__(self,
              name: Text = 'AverageEpisodeLength',
              buffer_size: types.Int = 10,
              batch_size: Optional[types.Int] = None):
   """Creates an AverageEpisodeLengthMetric."""
   self._np_state = numpy_storage.NumpyState()
   # Set a dummy value on self._np_state.episode_return so it gets included in
   # the first checkpoint (before metric is first called).
   self._np_state.episode_steps = np.float64(0)
   super(AverageEpisodeLengthMetric, self).__init__(
       name, buffer_size=buffer_size, batch_size=batch_size)
Ejemplo n.º 8
0
 def __init__(self,
              name='MinitaurAverageMaxSpeed',
              buffer_size=10,
              batch_size=None):
     """Creates a metric for minitaur speed stats."""
     self._np_state = numpy_storage.NumpyState()
     # Set a dummy value on self._np_state.obs_val so it gets included in
     # the first checkpoint (before metric is first called).
     self._np_state.speed = np.array(0, dtype=float)
     super(MinitaurAverageMaxSpeedMetric,
           self).__init__(name,
                          buffer_size=buffer_size,
                          batch_size=batch_size)
Ejemplo n.º 9
0
 def __init__(self,
              max_episode_len=500,
              dtype=np.bool,
              name='AverageEarlyFailure',
              buffer_size=10,
              batch_size=None):
   """Creates an AverageEnvObsDict."""
   self._np_state = numpy_storage.NumpyState()
   self._max_episode_len = max_episode_len
   # Set a dummy value on self._np_state.obs_val so it gets included in
   # the first checkpoint (before metric is first called).
   self._np_state.episode_steps = np.array(0, dtype=np.int32)
   super(AverageEarlyFailureMetric, self).__init__(
       name, buffer_size=buffer_size, batch_size=batch_size)
Ejemplo n.º 10
0
 def __init__(self,
              name='DistributionEpisodeLength',
              buffer_size=10,
              batch_size=None):
     """Creates an AverageEpisodeLengthMetric."""
     self._np_state = numpy_storage.NumpyState()
     # Set a dummy value on self._np_state.episode_return so it gets included in
     # the first checkpoint (before metric is first called).
     self._np_state.episode_steps = np.float64(0)
     self._np_state.episode_end_mask = np.float64(0)
     super(DistributionEpisodeLengthMetric,
           self).__init__(name,
                          buffer_size=buffer_size,
                          batch_size=batch_size)
     self._buffer = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64)
Ejemplo n.º 11
0
 def __init__(self,
              name='DistributionReturn',
              buffer_size=10,
              batch_size=None):
     """Creates an DistributionReturnMetric."""
     self._np_state = numpy_storage.NumpyState()
     # Set a dummy value on self._np_state.episode_return so it gets included in
     # the first checkpoint (before metric is first called).
     self._np_state.episode_return = np.float64(0)
     self._np_state.episode_end_mask = np.float64(0)
     # self.count_episode = 0
     super(DistributionReturnMetric, self).__init__(name,
                                                    buffer_size=buffer_size,
                                                    batch_size=batch_size)
     # overwrite buffer to enable more statistics computation
     self._buffer = StatsNumpyDeque(maxlen=buffer_size, dtype=np.float64)
Ejemplo n.º 12
0
 def __init__(self,
              n_agents,
              name='MultiagentAverageReturn',
              buffer_size=10,
              batch_size=None):
   """Creates an AverageReturnPyMetric."""
   self.n_agents = n_agents
   self._np_state = numpy_storage.NumpyState()
   # Set a dummy value on self._np_state.episode_return so it gets included in
   # the first checkpoint (before metric is first called).
   self._np_state.episode_return = np.float64(0)
   self._agent_metrics = [
       py_metrics.AverageReturnMetric(
           'AverageReturn%i' % i, buffer_size=buffer_size)
       for i in range(n_agents)
   ]
   super(AverageReturnPyMetric, self).__init__(name, buffer_size=buffer_size,
                                               batch_size=batch_size)
Ejemplo n.º 13
0
 def __init__(self, name, num_envs, env_batch_size, buffer_size=None):
     """
     Args:
         name (str): name of the metric
         num_envs (int): number of tf_agents.environments; each environment is
                 a batched environment (contains multiple independent envs)
         env_batch_size (int): the size of each batched environment
         buffer_size (int): the window size of data points we want to average over
     """
     num_envs *= env_batch_size
     self._env_batch_size = env_batch_size
     self._np_state = numpy_storage.NumpyState()
     # Set a dummy value on self._np_state.episode_return so it gets included in
     # the first checkpoint (before metric is first called).
     self._np_state.episode_return = np.float64(0)
     if buffer_size is None:
         buffer_size = max(env_batch_size, 10)
     super(AsyncStreamingMetric, self).__init__(buffer_size=buffer_size,
                                                num_envs=num_envs,
                                                name=name)
    def __init__(self, data_spec, capacity):
        """Creates a PyUniformReplayBuffer.

    Args:
      data_spec: An ArraySpec or a list/tuple/nest of ArraySpecs describing a
        single item that can be stored in this buffer.
      capacity: The maximum number of items that can be stored in the buffer.
    """
        super(PyUniformReplayBuffer, self).__init__(data_spec, capacity)

        self._storage = numpy_storage.NumpyStorage(self._encoded_data_spec(),
                                                   capacity)
        self._lock = threading.Lock()
        self._np_state = numpy_storage.NumpyState()

        # Adding elements to the replay buffer is done in a circular way.
        # Keeps track of the actual size of the replay buffer and the location
        # where to add new elements.
        self._np_state.size = np.int64(0)
        self._np_state.cur_id = np.int64(0)

        # Total number of items that went through the replay buffer.
        self._np_state.item_count = np.int64(0)
Ejemplo n.º 15
0
 def __init__(self, name: Text = 'Counter'):
     super(CounterMetric, self).__init__(name)
     self._np_state = numpy_storage.NumpyState()
     self.reset()
Ejemplo n.º 16
0
 def __init__(self, name: Text = 'NumberOfEpisodes'):
     super(NumberOfEpisodes, self).__init__(name)
     self._np_state = numpy_storage.NumpyState()
     self.reset()
Ejemplo n.º 17
0
 def __init__(self, name: Text = 'EnvironmentSteps'):
     super(EnvironmentSteps, self).__init__(name)
     self._np_state = numpy_storage.NumpyState()
     self.reset()
Ejemplo n.º 18
0
 def __init__(self, name='PyScoreMetric', buffer_size=10, batch_size=None):
     super(PyScoreMetric, self).__init__(name=name, buffer_size=buffer_size, batch_size=batch_size)
     self._np_state = numpy_storage.NumpyState()
     self._np_state.episode_score = np.float64(0)