예제 #1
0
    def testBatch(self, metric_class, expected_result):
        metric = metric_class()

        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.first((), (), (), 1., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.mid((), (), (), 2., 1.),
                trajectory.last((), (), (), 3., 0.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.last((), (), (), 3., 0.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        self.assertEqual(expected_result, metric.result(), 5.0)
예제 #2
0
    def get_single():
      """Gets a single item from the replay buffer."""
      with self._lock:
        if self._np_state.size <= 0:
          def empty_item(spec):
            return np.empty(spec.shape, dtype=spec.dtype)
          if num_steps is not None:
            item = [tf.nest.map_structure(empty_item, self.data_spec)
                    for n in range(num_steps)]
            if time_stacked:
              item = nest_utils.stack_nested_arrays(item)
          else:
            item = tf.nest.map_structure(empty_item, self.data_spec)
          return item
        idx = np.random.randint(self._np_state.size - num_steps_value + 1)
        if self._np_state.size == self._capacity:
          # If the buffer is full, add cur_id (head of circular buffer) so that
          # we sample from the range [cur_id, cur_id + size - num_steps_value].
          # We will modulo the size below.
          idx += self._np_state.cur_id

        if num_steps is not None:
          # TODO(b/120242830): Try getting data from numpy in one shot rather
          # than num_steps_value.
          item = [self._decode(self._storage.get((idx + n) % self._capacity))
                  for n in range(num_steps)]
        else:
          item = self._decode(self._storage.get(idx % self._capacity))

      if num_steps is not None and time_stacked:
        item = nest_utils.stack_nested_arrays(item)
      return item
예제 #3
0
    def testBatchSizeProvided(self, metric_class, expected_result):
        metric = py_metrics.AverageReturnMetric(batch_size=2)

        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.first((), (), (), 1., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.mid((), (), (), 2., 1.),
                trajectory.last((), (), (), 3., 0.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.last((), (), (), 3., 0.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        self.assertEqual(metric.result(), 5.0)
예제 #4
0
파일: dyna_ppo.py 프로젝트: samsinai/FLEXS
    def _step(self, actions):
        """Progress the agent one step in the environment."""
        actions = actions.flatten()
        self.states[:, self.partial_seq_len, -1] = 0
        self.states[np.arange(self._batch_size), self.partial_seq_len,
                    actions] = 1
        self.partial_seq_len += 1

        # We have not generated the last residue in the sequence, so continue
        if self.partial_seq_len < self.seq_length - 1:
            return nest_utils.stack_nested_arrays(
                [ts.transition(seq_state, 0) for seq_state in self.states])

        # If sequence is of full length, score the sequence and end the episode
        # We need to take off the column in the matrix (-1) representing the mask token
        complete_sequences = [
            s_utils.one_hot_to_string(seq_state[:, :-1], self.alphabet)
            for seq_state in self.states
        ]
        if self.fitness_model_is_gt:
            fitnesses = self.landscape.get_fitness(complete_sequences)
        else:
            fitnesses = self.model.get_fitness(complete_sequences)
        self.all_seqs.update(zip(complete_sequences, fitnesses))

        # Reward = fitness - lambda * sequence density
        rewards = np.array([
            f - self.lam * self.sequence_density(seq)
            for seq, f in zip(complete_sequences, fitnesses)
        ])
        return nest_utils.stack_nested_arrays([
            ts.termination(seq_state, r)
            for seq_state, r in zip(self.states, rewards)
        ])
예제 #5
0
        def get_single(idx):
            """Gets the idx item from the replay buffer."""
            with self._lock:
                if self._np_state.size <= idx:

                    def empty_item(spec):
                        return np.empty(spec.shape, dtype=spec.dtype)

                    item = [
                        tf.nest.map_structure(empty_item, self.data_spec)
                        for n in range(num_steps_value)
                    ]
                    item = nest_utils.stack_nested_arrays(item)
                    return item

                if self._np_state.size == self._capacity:
                    # If the buffer is full, add cur_id (head of circular buffer) so that
                    # we sample from the range [cur_id, cur_id + size - num_steps_value].
                    # We will modulo the size below.
                    idx += self._np_state.cur_id

                item = [
                    self._decode(self._storage.get((idx + n) % self._capacity))
                    for n in range(num_steps_value)
                ]

            item = nest_utils.stack_nested_arrays(item)
            return item
예제 #6
0
  def get_episode(self, batch_size: Optional[int] = None,
                  truncate_episode_at: Optional[int] = None) -> Tuple[
                      EnvStep, np.ndarray]:
    if batch_size is None:
      episode = self._get_episode(truncate_episode_at)
      mask = np.ones((len(episode),))
      return nest_utils.stack_nested_arrays(episode), mask
    if batch_size <= 0:
      raise ValueError('Invalid batch size %s.' % batch_size)

    episodes = []
    episode_lengths = []
    for _ in range(batch_size):
      next_episode = self._get_episode(truncate_episode_at)
      episodes.append(next_episode)
      episode_lengths.append(len(next_episode))

    max_length = max(episode_lengths)
    for episode in episodes:
      episode.extend([episode[-1]] * (max_length - len(episode)))

    batched_episodes = nest_utils.stack_nested_arrays(
        [nest_utils.stack_nested_arrays(episode)
         for episode in episodes])

    mask = np.arange(max_length)[None, :] < np.array(episode_lengths)[:, None]

    return batched_episodes, mask
예제 #7
0
  def _add_history(self, time_step, action):
    self._observation_history.append(time_step.observation)
    self._action_history.append(action)

    if self._include_actions:
      observation = {
          'observation':
              nest_utils.stack_nested_arrays(self._observation_history),
          'action':
              nest_utils.stack_nested_arrays(self._action_history)
      }
    else:
      observation = nest_utils.stack_nested_arrays(self._observation_history)
    return time_step._replace(observation=observation)
예제 #8
0
  def _step(self, actions):
    """Forward a batch of actions to the wrapped environments.

    Args:
      actions: Batched action, possibly nested, to apply to the environment.

    Raises:
      ValueError: Invalid actions.

    Returns:
      Batch of observations, rewards, and done flags.
    """

    if self._num_envs == 1:
      actions = nest_utils.unbatch_nested_array(actions)
      time_steps = self._envs[0].step(actions)
      return nest_utils.batch_nested_array(time_steps)
    else:
      unstacked_actions = unstack_actions(actions)
      if len(unstacked_actions) != self.batch_size:
        raise ValueError(
            "Primary dimension of action items does not match "
            "batch size: %d vs. %d" % (len(unstacked_actions), self.batch_size))
      time_steps = self._execute(
          lambda env_action: env_action[0].step(env_action[1]),
          zip(self._envs, unstacked_actions))
      return nest_utils.stack_nested_arrays(time_steps)
예제 #9
0
 def render(self, mode="rgb_array") -> Optional[types.NestedArray]:
   if self._num_envs == 1:
     img = self._envs[0].render(mode)
     return nest_utils.batch_nested_array(img)
   else:
     imgs = self._execute(lambda env: env.render(mode), self._envs)
     return nest_utils.stack_nested_arrays(imgs)
예제 #10
0
 def reset_random(self):
     if self._num_envs == 1:
         return nest_utils.batch_nested_array(self._envs[0].reset_random())
     else:
         time_steps = self._execute(lambda env: env.reset_random(),
                                    self._envs)
         return nest_utils.stack_nested_arrays(time_steps)
  def _get_next(self,
                sample_batch_size=None,
                num_steps=None,
                time_stacked=True):
    num_steps_value = num_steps if num_steps is not None else 1
    def get_single():
      """Gets a single item from the replay buffer."""
      with self._lock:
        if self._np_state.size <= 0:
          raise ValueError('Read error: empty replay buffer')

        idx = np.random.randint(self._np_state.size - num_steps_value + 1)
        if self._np_state.size == self._capacity:
          # If the buffer is full, add cur_id (head of circular buffer) so that
          # we sample from the range [cur_id, cur_id + size - num_steps_value].
          # We will modulo the size below.
          idx += self._np_state.cur_id

        if num_steps is not None:
          # TODO(b/120242830): Try getting data from numpy in one shot rather
          # than num_steps_value.
          item = [self._decode(self._storage.get((idx + n) % self._capacity))
                  for n in range(num_steps)]
        else:
          item = self._decode(self._storage.get(idx % self._capacity))

      if num_steps is not None and time_stacked:
        item = nest_utils.stack_nested_arrays(item)
      return item

    if sample_batch_size is None:
      return get_single()
    else:
      samples = [get_single() for _ in range(sample_batch_size)]
      return nest_utils.stack_nested_arrays(samples)
예제 #12
0
 def _get_initial_state(self, batch_size: int) -> types.NestedArray:
     if self._num_policies == 1:
         return nest_utils.batch_nested_array(
             self._policies[0].get_initial_state())
     else:
         infos = self._execute(_execute_get_initial_state, self._policies)
         infos = nest_utils.unbatch_nested_array(infos)
         return nest_utils.stack_nested_arrays(infos)
 def _gather_all(self):
     data = [
         self._decode(self._storage.get(idx))
         for idx in range(self._capacity)
     ]
     stacked = nest_utils.stack_nested_arrays(data)
     batched = tf.nest.map_structure(lambda t: np.expand_dims(t, 0),
                                     stacked)
     return batched
 def generator_fn():
   while True:
     if sample_batch_size is not None:
       batch = [self._get_next(num_steps=num_steps, time_stacked=False)
                for _ in range(sample_batch_size)]
       item = nest_utils.stack_nested_arrays(batch)
     else:
       item = self._get_next(num_steps=num_steps, time_stacked=False)
     yield tuple(tf.nest.flatten(item))
예제 #15
0
    def get_single():
      """Gets a single item from the replay buffer."""
      if num_steps is not None and time_stacked:
        item = [self.load_item_from_bigtable() for _ in range(num_steps)]
        item = nest_utils.stack_nested_arrays(item)
      else:
        item = self.load_item_from_bigtable()

      buffer_info = BufferInfo(ids=0, probabilities=0)
      return item, buffer_info
예제 #16
0
  def _reset(self):
    """Reset all environments and combine the resulting observation.

    Returns:
      Time step with batch dimension.
    """
    if self._num_envs == 1:
      return nest_utils.batch_nested_array(self._envs[0].reset())
    else:
      time_steps = self._execute(lambda env: env.reset(), self._envs)
      return nest_utils.stack_nested_arrays(time_steps)
예제 #17
0
  def testStackNestedArrays(self):
    shape = (5, 8)
    batch_size = 3
    batched_shape = (batch_size,) + shape

    specs = self.nest_spec(shape)
    unstacked_arrays = [self.zeros_from_spec(specs) for _ in range(batch_size)]
    stacked_array = nest_utils.stack_nested_arrays(unstacked_arrays)

    tf.nest.assert_same_structure(specs, stacked_array)
    assert_shapes = lambda a: self.assertEqual(a.shape, batched_shape)
    tf.nest.map_structure(assert_shapes, stacked_array)
        def generator_fn():
            while True:
                if sample_batch_size is not None:
                    batch = [self.get_next(num_steps=num_steps_value, time_stacked=False,
                                           prioritized_buffer_beta=prioritized_buffer_beta)
                             for _ in range(sample_batch_size)]
                    item, item_idx, item_weight = nest_utils.stack_nested_arrays(batch)
                else:
                    item, item_idx, item_weight = self.get_next(num_steps=num_steps_value, time_stacked=False,
                                                                prioritized_buffer_beta=prioritized_buffer_beta)

                yield {"experiences": tuple(tf.nest.flatten(item)), "indices": item_idx, "weights": item_weight}
예제 #19
0
  def testGetOuterArrayShape(self):
    spec = (
        array_spec.ArraySpec([5, 8], np.float32),
        (array_spec.ArraySpec([1], np.int32),
         array_spec.ArraySpec([2, 2, 2], np.float32))
    )

    batch_size = 3
    unstacked_arrays = [self.zeros_from_spec(spec) for _ in range(batch_size)]

    outer_dims = nest_utils.get_outer_array_shape(unstacked_arrays[0], spec)
    self.assertEqual((), outer_dims)

    stacked_array = nest_utils.stack_nested_arrays(unstacked_arrays)
    outer_dims = nest_utils.get_outer_array_shape(stacked_array, spec)
    self.assertEqual((batch_size,), outer_dims)

    time_dim = [nest_utils.batch_nested_array(arr) for arr in unstacked_arrays]
    batch_time = nest_utils.stack_nested_arrays(time_dim)
    outer_dims = nest_utils.get_outer_array_shape(batch_time, spec)
    self.assertEqual((batch_size, 1), outer_dims)
 def setUp(self):
     super(BatchedPyMetricTest, self).setUp()
     # Order of args for trajectory methods:
     # (observation, action, policy_info, reward, discount)
     self._ts0 = nest_utils.stack_nested_arrays([
         trajectory.boundary((), (), (), 0., 1.),
         trajectory.boundary((), (), (), 0., 1.)
     ])
     self._ts1 = nest_utils.stack_nested_arrays([
         trajectory.first((), (), (), 1., 1.),
         trajectory.first((), (), (), 2., 1.)
     ])
     self._ts2 = nest_utils.stack_nested_arrays([
         trajectory.last((), (), (), 3., 1.),
         trajectory.last((), (), (), 4., 1.)
     ])
     self._ts3 = nest_utils.stack_nested_arrays([
         trajectory.boundary((), (), (), 0., 1.),
         trajectory.boundary((), (), (), 0., 1.)
     ])
     self._ts4 = nest_utils.stack_nested_arrays([
         trajectory.first((), (), (), 5., 1.),
         trajectory.first((), (), (), 6., 1.)
     ])
     self._ts5 = nest_utils.stack_nested_arrays([
         trajectory.last((), (), (), 7., 1.),
         trajectory.last((), (), (), 8., 1.)
     ])
예제 #21
0
    def _action(self,
                time_step: ts.TimeStep,
                policy_state: types.NestedArray,
                seed: Optional[types.Seed] = None) -> ps.PolicyStep:
        """Forward a batch of time_step and policy_states to the wrapped policies.

    Args:
      time_step: A `TimeStep` tuple corresponding to `time_step_spec()`.
      policy_state: An Array, or a nested dict, list or tuple of Arrays
        representing the previous policy_state.
      seed: Seed value used to initialize a pseudorandom number generator.

    Returns:
      A batch of `PolicyStep` named tuples, each one containing:
        `action`: A nest of action Arrays matching the `action_spec()`.
        `state`: A nest of policy states to be fed into the next call to action.
        `info`: Optional side information such as action log probabilities.

    Raises:
      NotImplementedError: if `seed` is not None.
    """
        if seed is not None:
            raise NotImplementedError(
                "seed is not supported; but saw seed: {}".format(seed))
        if self._num_policies == 1:
            time_step = nest_utils.unbatch_nested_array(time_step)
            policy_state = nest_utils.unbatch_nested_array(policy_state)
            policy_steps = self._policies[0].action(time_step, policy_state)
            return nest_utils.batch_nested_array(policy_steps)
        else:
            unstacked_time_steps = nest_utils.unstack_nested_arrays(time_step)
            if len(unstacked_time_steps) != len(self._policies):
                raise ValueError(
                    "Primary dimension of time_step items does not match "
                    "batch size: %d vs. %d" %
                    (len(unstacked_time_steps), len(self._policies)))
            unstacked_policy_states = [()] * len(unstacked_time_steps)
            if policy_state:
                unstacked_policy_states = nest_utils.unstack_nested_arrays(
                    policy_state)
                if len(unstacked_policy_states) != len(self._policies):
                    raise ValueError(
                        "Primary dimension of policy_state items does not match "
                        "batch size: %d vs. %d" %
                        (len(unstacked_policy_states), len(self._policies)))
            policy_steps = self._execute(
                _execute_policy,
                zip(self._policies, unstacked_time_steps,
                    unstacked_policy_states))
            return nest_utils.stack_nested_arrays(policy_steps)
예제 #22
0
  def get_step(self, batch_size: Optional[int] = None,
               num_steps: Optional[int] = None) -> EnvStep:
    if batch_size is not None:
      raise ValueError('This dataset does not support batched step sampling.')

    if num_steps is None:
      return self._get_step()

    env_steps = []
    for _ in range(num_steps):
      next_step = self._get_step()
      env_steps.append(next_step)

    return nest_utils.stack_nested_arrays(env_steps)
 def step_adversary(self, actions):
   if self._num_envs == 1:
     actions = nest_utils.unbatch_nested_array(actions)
     time_steps = self._envs[0].step_adversary(actions)
     return nest_utils.batch_nested_array(time_steps)
   else:
     unstacked_actions = batched_py_environment.unstack_actions(actions)
     if len(unstacked_actions) != self.batch_size:
       raise ValueError(
           'Primary dimension of action items does not match '
           'batch size: %d vs. %d' % (len(unstacked_actions), self.batch_size))
     time_steps = self._execute(
         lambda env_action: env_action[0].step_adversary(env_action[1]),
         zip(self._envs, unstacked_actions))
     return nest_utils.stack_nested_arrays(time_steps)
예제 #24
0
  def _get_next(self,
                sample_batch_size=None,
                num_steps=None,
                time_stacked=True):
    num_steps_value = num_steps if num_steps is not None else 1
    def get_single():
      """Gets a single item from the replay buffer."""
      if num_steps is not None and time_stacked:
        item = [self.load_item_from_bigtable() for _ in range(num_steps)]
        item = nest_utils.stack_nested_arrays(item)
      else:
        item = self.load_item_from_bigtable()

      buffer_info = BufferInfo(ids=0, probabilities=0)
      return item, buffer_info

    if sample_batch_size is None:
      return get_single()
    else:
      samples = [get_single() for _ in range(sample_batch_size)]
      return nest_utils.stack_nested_arrays(samples)
  def render(self, mode: Text = 'rgb_array') -> types.NestedArray:
    """Renders the environment.

    Args:
      mode: Rendering mode. Currently only 'rgb_array' is supported because
        this is a batched environment.

    Returns:
      An ndarray of shape [batch_size, width, height, 3] denoting RGB images
      (for mode=`rgb_array`).
    Raises:
      NotImplementedError: If the environment does not support rendering,
        or any other mode than `rgb_array` is given.
    """
    if mode != 'rgb_array':
      raise NotImplementedError('Only rgb_array rendering mode is supported. '
                                'Got %s' % mode)
    imgs = [env.render(mode, blocking=self._blocking) for env in self._envs]
    if not self._blocking:
      imgs = [promise() for promise in imgs]
    return nest_utils.stack_nested_arrays(imgs)
 def _generate_batch_of_observations(self, generator_fn, num_samples):
     unstacked_obs = [generator_fn() for _ in range(num_samples)]
     return nest_utils.stack_nested_arrays(unstacked_obs)
    def get_next(self, sample_batch_size=None, prioritized_buffer_beta=0.4, num_steps=None, time_stacked=True):
        """
        Build next batch of experiences while computing the importance sampling weights of the selected experiences

        Params:
            sample_batch_size (int): batch size
            beta (float): ratio to use when computing the importance sampling weights
            num_steps (int): number of steps to load. Only 1 is supported at the moment
            time_stacked (bool): whether the timesteps are stacked or not.

        Returns:
            (Trajectory): mini batch of experiences
            (int list): the indices of the selected experiences in the replay buffer
            (float list): importance sampling weights to use when training using the experiences.
        """
        num_steps_value = num_steps if num_steps is not None else 1

        if num_steps_value != 1:
            raise NotImplementedError('PyPrioritizedReplayBuffer only supports a batches with num_step '
                                      'size of 1, but received batch with num_steps'
                                      'size {}.'.format(num_steps_value))

        def get_single_experience(b):
            """Gets a single experience from the replay buffer."""

            with self._lock:
                # return empty items if the buffer is empty
                if self._np_state.size <= 0:
                    def empty_item(spec):
                        return np.empty(spec.shape, dtype=spec.dtype)

                    item = tf.nest.map_structure(empty_item, self.data_spec)
                    selected_idx = -1
                    selected_weight = -1

                    return item, selected_idx, selected_weight

                # select index based on the priorities of the experiences in the buffer
                selected_idx, selected_weight = self.select_prioritized_experience(b)

                # get item
                item = self._decode(self._storage.get(selected_idx % self._prioritized_buffer_capacity))

            return item, selected_idx, selected_weight

        if sample_batch_size is None:
            return get_single_experience(prioritized_buffer_beta)
        else:
            experiences = []
            buffer_indices = []
            importance_sampling_weights = []
            for _ in range(sample_batch_size):
                experience, idx, weight = get_single_experience(prioritized_buffer_beta)
                experiences.append(experience)
                buffer_indices.append(idx)
                importance_sampling_weights.append(weight)

            buffer_indices = np.array(buffer_indices)
            importance_sampling_weights = np.array(importance_sampling_weights, dtype=np.float32)

            # normalize weight
            importance_sampling_weights = np.divide(importance_sampling_weights, importance_sampling_weights.max())

            trajectory = nest_utils.stack_nested_arrays(experiences)
            return trajectory, buffer_indices, importance_sampling_weights
예제 #28
0
 def get_info(self) -> types.NestedArray:
   if self._num_envs == 1:
     return nest_utils.batch_nested_array(self._envs[0].get_info())
   else:
     infos = self._execute(lambda env: env.get_info(), self._envs)
     return nest_utils.stack_nested_arrays(infos)
예제 #29
0
파일: dyna_ppo.py 프로젝트: samsinai/FLEXS
 def _reset(self):
     self.partial_seq_len = 0
     self.states[:, :, :] = 0
     self.states[:, np.arange(self.seq_length), -1] = 1
     return nest_utils.stack_nested_arrays(
         [ts.restart(seq_state) for seq_state in self.states])