Esempio n. 1
0
    def run_test_adder(self, adder: base.ReverbAdder, first: dm_env.TimeStep,
                       steps: Sequence[Tuple[Any, dm_env.TimeStep]],
                       expected_items: Sequence[Any]):
        """Runs a unit test case for the adder.

    Args:
      adder: The instance of `base.ReverbAdder` that is being tested.
      first: The first `dm_env.TimeStep` that is used to call
        `base.ReverbAdder.add_first()`.
      steps: A sequence of (action, timestep) tuples that are passed to
        `base.ReverbAdder.add()`.
      expected_items: The sequence of items that are expected to be created
        by calling the adder's `add_first()` method on `first` and `add()` on
        all of the elements in `steps`.
    """
        if not steps:
            raise ValueError('At least one step must be given.')

        env_spec = tree.map_structure(
            _numeric_to_spec,
            specs.EnvironmentSpec(observations=steps[0][1].observation,
                                  actions=steps[0][0],
                                  rewards=steps[0][1].reward,
                                  discounts=steps[0][1].discount))
        signature = adder.signature(env_spec)

        # Add all the data up to the final step.
        adder.add_first(first)
        for action, ts in steps[:-1]:
            adder.add(action, next_timestep=ts)

        if len(steps) == 1:
            # adder.add() has not been called yet, so no writers have been created.
            self.assertEmpty(self.client.writers)
        else:
            # Make sure the writer has been created but not closed.
            self.assertLen(self.client.writers, 1)
            self.assertFalse(self.client.writers[0].closed)

        # Add the final step.
        adder.add(*steps[-1])

        # Ending the episode should close the writer. No new writer should yet have
        # been created as it is constructed lazily.
        self.assertLen(self.client.writers, 1)
        self.assertTrue(self.client.writers[0].closed)

        # Make sure our expected and observed data match.
        observed_items = [p[1] for p in self.client.writers[0].priorities]
        for expected_item, observed_item in zip(expected_items,
                                                observed_items):
            # Set check_types=False because
            tree.map_structure(np.testing.assert_array_almost_equal,
                               expected_item,
                               observed_item,
                               check_types=False)

        def _check_signature(spec: tf.TensorSpec, value):
            # Convert int/float to numpy arrays of dtype np.int64 and np.float64.
            value = np.asarray(value)
            self.assertTrue(
                spec.is_compatible_with(tf.convert_to_tensor(value)))

        for step in self.client.writers[0].timesteps:
            tree.map_structure(_check_signature, signature, step)

        # Add the start of a second trajectory.
        adder.add_first(first)
        adder.add(*steps[0])

        # Make sure this creates an new writer.
        self.assertLen(self.client.writers, 2)
        # The writer is closed if the recently added `dm_env.TimeStep`'s' step_type
        # is `dm_env.StepType.LAST`.
        if steps[0][1].last():
            self.assertTrue(self.client.writers[1].closed)
        else:
            self.assertFalse(self.client.writers[1].closed)
Esempio n. 2
0
  def run_test_adder(self,
                     adder: base.ReverbAdder,
                     first: dm_env.TimeStep,
                     steps: Sequence[Step],
                     expected_items: Sequence[Any],
                     pack_expected_items: bool = False,
                     repeat_episode_times: int = 1,
                     break_end_of_episode: bool = True):
    """Runs a unit test case for the adder.

    Args:
      adder: The instance of `base.ReverbAdder` that is being tested.
      first: The first `dm_env.TimeStep` that is used to call
        `base.ReverbAdder.add_first()`.
      steps: A sequence of (action, timestep) tuples that are passed to
        `base.ReverbAdder.add()`.
      expected_items: The sequence of items that are expected to be created
        by calling the adder's `add_first()` method on `first` and `add()` on
        all of the elements in `steps`.
      pack_expected_items: If true the expected items are given unpacked and
        need to be packed in a list before comparison.
      repeat_episode_times: How many times to run an episode.
      break_end_of_episode: If False, an end of an episode does not break the
        sequence.
    """
    if not steps:
      raise ValueError('At least one step must be given.')

    has_extras = len(steps[0]) == 3
    env_spec = tree.map_structure(
        _numeric_to_spec,
        specs.EnvironmentSpec(
            observations=steps[0][1].observation,
            actions=steps[0][0],
            rewards=steps[0][1].reward,
            discounts=steps[0][1].discount))
    if has_extras:
      extras_spec = tree.map_structure(_numeric_to_spec, steps[0][2])
    else:
      extras_spec = ()
    signature = adder.signature(env_spec, extras_spec=extras_spec)

    for episode_id in range(repeat_episode_times):
      # Add all the data up to the final step.
      adder.add_first(first)
      for step in steps[:-1]:
        action, ts = step[0], step[1]

        if has_extras:
          extras = step[2]
        else:
          extras = ()

        adder.add(action, next_timestep=ts, extras=extras)

      # Only check for the first episode.
      if episode_id == 0:
        if len(steps) == 1:
          # adder.add() has not been called yet, so no writers have been
          # created.
          self.assertEmpty(self.client.writers)
        else:
          # Make sure the writer has been created but not closed.
          self.assertLen(self.client.writers, 1)
          self.assertFalse(self.client.writers[0].closed)

      # Add the final step.
      adder.add(*steps[-1])

    # Ending the episode should close the writer. No new writer should yet have
    # been created as it is constructed lazily.
    self.assertLen(self.client.writers, 1)
    if break_end_of_episode:
      self.assertTrue(self.client.writers[0].closed)

    # Make sure our expected and observed data match.
    observed_items = [p[1] for p in self.client.writers[0].priorities]
    self.assertEqual(len(expected_items), len(observed_items))
    for expected_item, observed_item in zip(expected_items, observed_items):
      if pack_expected_items:
        expected_item = [expected_item]
      # Set check_types=False because
      tree.map_structure(
          np.testing.assert_array_almost_equal,
          expected_item,
          observed_item,
          check_types=False)

    def _check_signature(spec: tf.TensorSpec, value):
      # Convert int/float to numpy arrays of dtype np.int64 and np.float64.
      value = np.asarray(value)
      self.assertTrue(spec.is_compatible_with(tf.convert_to_tensor(value)))

    for step in self.client.writers[0].timesteps:
      tree.map_structure(_check_signature, signature, step)

    if break_end_of_episode:
      # Add the start of a second trajectory.
      adder.add_first(first)
      adder.add(*steps[0])

      # Make sure this creates an new writer.
      self.assertLen(self.client.writers, 2)
      # The writer is closed if the recently added `dm_env.TimeStep`'s'
      # step_type is `dm_env.StepType.LAST`.
      if steps[0][1].last():
        self.assertTrue(self.client.writers[1].closed)
      else:
        self.assertFalse(self.client.writers[1].closed)
Esempio n. 3
0
    def run_test_adder(self,
                       adder: base.ReverbAdder,
                       first: dm_env.TimeStep,
                       steps: Sequence[Step],
                       expected_items: Sequence[Any],
                       pack_expected_items: bool = False,
                       stack_sequence_fields: bool = True,
                       repeat_episode_times: int = 1,
                       break_end_of_episode: bool = True):
        """Runs a unit test case for the adder.

    Args:
      adder: The instance of `base.ReverbAdder` that is being tested.
      first: The first `dm_env.TimeStep` that is used to call
        `base.ReverbAdder.add_first()`.
      steps: A sequence of (action, timestep) tuples that are passed to
        `base.ReverbAdder.add()`.
      expected_items: The sequence of items that are expected to be created
        by calling the adder's `add_first()` method on `first` and `add()` on
        all of the elements in `steps`.
      pack_expected_items: Deprecated and not used. If true the expected items
        are given unpacked and need to be packed in a list before comparison.
      stack_sequence_fields: Whether to stack the sequence fields of the
        expected items before comparing to the observed items. Usually False
        for transition adders and True for both episode and sequence adders.
      repeat_episode_times: How many times to run an episode.
      break_end_of_episode: If False, an end of an episode does not break the
        sequence.
    """

        del pack_expected_items

        if not steps:
            raise ValueError('At least one step must be given.')

        has_extras = len(steps[0]) == 3
        env_spec = tree.map_structure(
            _numeric_to_spec,
            specs.EnvironmentSpec(observations=steps[0][1].observation,
                                  actions=steps[0][0],
                                  rewards=steps[0][1].reward,
                                  discounts=steps[0][1].discount))
        if has_extras:
            extras_spec = tree.map_structure(_numeric_to_spec, steps[0][2])
        else:
            extras_spec = ()
        signature = adder.signature(env_spec, extras_spec=extras_spec)

        for episode_id in range(repeat_episode_times):
            # Add all the data up to the final step.
            adder.add_first(first)
            for step in steps[:-1]:
                action, ts = step[0], step[1]

                if has_extras:
                    extras = step[2]
                else:
                    extras = ()

                adder.add(action, next_timestep=ts, extras=extras)

            # Add the final step.
            adder.add(*steps[-1])

        # Ending the episode should close the writer. No new writer should yet have
        # been created as it is constructed lazily.
        if break_end_of_episode:
            self.assertEqual(self.client.writer.num_episodes,
                             repeat_episode_times)

        # Make sure our expected and observed data match.
        observed_items = [p[2] for p in self.client.writer.priorities]

        # Check matching number of items.
        self.assertEqual(len(expected_items), len(observed_items))

        # Check items are matching according to numpy's almost_equal.
        for expected_item, observed_item in zip(expected_items,
                                                observed_items):
            if stack_sequence_fields:
                expected_item = tree_utils.stack_sequence_fields(expected_item)

            # Set check_types=False because we check them below.
            tree.map_structure(np.testing.assert_array_almost_equal,
                               expected_item,
                               tuple(observed_item),
                               check_types=False)

        # Make sure the signature matches was is being written by Reverb.
        def _check_signature(spec: tf.TensorSpec, value: np.ndarray):
            self.assertTrue(
                spec.is_compatible_with(tf.convert_to_tensor(value)))

        # Check the last transition's signature.
        tree.map_structure(_check_signature, signature, observed_items[-1])