def test_unstack_sequence_fields(self): """Tests that `unstack_sequence_fields(stack_sequence_fields(x)) == x`.""" stacked = tree_utils.stack_sequence_fields(TEST_SEQUENCE) batch_size = len(TEST_SEQUENCE) unstacked = tree_utils.unstack_sequence_fields(stacked, batch_size) tree.map_structure(np.testing.assert_array_equal, unstacked, TEST_SEQUENCE)
def dump(self): """Calculates statistics and forwards them to the target logger.""" results = {} stacked_cache = acme_tree.stack_sequence_fields(self._cache) for key, values in stacked_cache.items(): if re.search(self._aggregate_regex, key) is not None: results.update({ f'{key}_mean': np.mean(values), f'{key}_std': np.std(values), f'{key}_median': np.median(values), f'{key}_max': np.max(values), f'{key}_min': np.min(values), }) else: results[key] = values[-1] self._to.write(results) self._cache.clear()
def test_stack_sequence_fields(self): """Tests that `stack_sequence_fields` behaves correctly on nested data.""" stacked = tree_utils.stack_sequence_fields(TEST_SEQUENCE) # Check that the stacked output has the correct structure. tree.assert_same_structure(stacked, TEST_SEQUENCE[0]) # Check that the leaves have the correct array shapes. self.assertEqual(stacked['action'].shape, (3, 1)) self.assertEqual(stacked['observation'][0].shape, (3, 3)) self.assertEqual(stacked['reward'].shape, (3, )) # Check values. self.assertEqual(stacked['observation'][0].tolist(), [ [0., 1., 2.], [1., 2., 3.], [2., 3., 4.], ]) self.assertEqual(stacked['action'].tolist(), [[1.], [0.5], [0.3]]) self.assertEqual(stacked['reward'].tolist(), [1., 0., 0.5])
def sample(self): """Sample a batch of experiences.""" samples = [self._buffer.popleft() for _ in range(self._batch_size)] return tree_utils.stack_sequence_fields(samples)
def run_test_adder( self, adder: adders_base.Adder, first: dm_env.TimeStep, steps: Sequence[Step], expected_items: Sequence[Any], signature: types.NestedSpec, pack_expected_items: bool = False, stack_sequence_fields: bool = True, repeat_episode_times: int = 1, end_behavior: adders.EndBehavior = adders.EndBehavior.ZERO_PAD, item_transform: Optional[Callable[[Sequence[np.ndarray]], Any]] = None): """Runs a unit test case for the adder. Args: adder: The instance of `Adder` that is being tested. first: The first `dm_env.TimeStep` that is used to call `Adder.add_first()`. steps: A sequence of (action, timestep) tuples that are passed to `Adder.add()`. expected_items: The sequence of items that are expected to be created by calling the adder's `add_first()` method on `first` and `add()` on all of the elements in `steps`. signature: Signature that written items must be compatible with. pack_expected_items: Deprecated and not used. If true the expected items are given unpacked and need to be packed in a list before comparison. stack_sequence_fields: Whether to stack the sequence fields of the expected items before comparing to the observed items. Usually False for transition adders and True for both episode and sequence adders. repeat_episode_times: How many times to run an episode. end_behavior: How end of episode should be handled. item_transform: Transformation of item simulating the work done by the dataset pipeline on the learner in a real setup. """ del pack_expected_items if not steps: raise ValueError('At least one step must be given.') has_extras = len(steps[0]) == 3 for episode_id in range(repeat_episode_times): # Add all the data up to the final step. adder.add_first(first) for step in steps[:-1]: action, ts = step[0], step[1] if has_extras: extras = step[2] else: extras = () adder.add(action, next_timestep=ts, extras=extras) # Add the final step. adder.add(*steps[-1]) # Force run the destructor to trigger the flushing of all pending items. getattr(adder, '__del__', lambda: None)() # Ending the episode should close the writer. No new writer should yet have # been created as it is constructed lazily. if end_behavior is not adders.EndBehavior.CONTINUE: self.assertEqual(self.num_episodes(), repeat_episode_times) # Make sure our expected and observed data match. observed_items = self.items() # Check matching number of items. self.assertEqual(len(expected_items), len(observed_items)) # Check items are matching according to numpy's almost_equal. for expected_item, observed_item in zip(expected_items, observed_items): if stack_sequence_fields: expected_item = tree_utils.stack_sequence_fields(expected_item) # Apply the transformation which would be done by the dataset in a real # setup. if item_transform: observed_item = item_transform(observed_item) tree.map_structure(np.testing.assert_array_almost_equal, tree.flatten(expected_item), tree.flatten(observed_item)) # Make sure the signature matches was is being written by Reverb. def _check_signature(spec: tf.TensorSpec, value: np.ndarray): self.assertTrue( spec.is_compatible_with(tf.convert_to_tensor(value))) # Check that it is possible to unpack observed using the signature. for item in observed_items: tree.map_structure(_check_signature, tree.flatten(signature), tree.flatten(item))
def run_test_adder(self, adder: base.ReverbAdder, first: dm_env.TimeStep, steps: Sequence[Step], expected_items: Sequence[Any], pack_expected_items: bool = False, stack_sequence_fields: bool = True, repeat_episode_times: int = 1, break_end_of_episode: bool = True): """Runs a unit test case for the adder. Args: adder: The instance of `base.ReverbAdder` that is being tested. first: The first `dm_env.TimeStep` that is used to call `base.ReverbAdder.add_first()`. steps: A sequence of (action, timestep) tuples that are passed to `base.ReverbAdder.add()`. expected_items: The sequence of items that are expected to be created by calling the adder's `add_first()` method on `first` and `add()` on all of the elements in `steps`. pack_expected_items: Deprecated and not used. If true the expected items are given unpacked and need to be packed in a list before comparison. stack_sequence_fields: Whether to stack the sequence fields of the expected items before comparing to the observed items. Usually False for transition adders and True for both episode and sequence adders. repeat_episode_times: How many times to run an episode. break_end_of_episode: If False, an end of an episode does not break the sequence. """ del pack_expected_items if not steps: raise ValueError('At least one step must be given.') has_extras = len(steps[0]) == 3 env_spec = tree.map_structure( _numeric_to_spec, specs.EnvironmentSpec(observations=steps[0][1].observation, actions=steps[0][0], rewards=steps[0][1].reward, discounts=steps[0][1].discount)) if has_extras: extras_spec = tree.map_structure(_numeric_to_spec, steps[0][2]) else: extras_spec = () signature = adder.signature(env_spec, extras_spec=extras_spec) for episode_id in range(repeat_episode_times): # Add all the data up to the final step. adder.add_first(first) for step in steps[:-1]: action, ts = step[0], step[1] if has_extras: extras = step[2] else: extras = () adder.add(action, next_timestep=ts, extras=extras) # Add the final step. adder.add(*steps[-1]) # Ending the episode should close the writer. No new writer should yet have # been created as it is constructed lazily. if break_end_of_episode: self.assertEqual(self.client.writer.num_episodes, repeat_episode_times) # Make sure our expected and observed data match. observed_items = [p[2] for p in self.client.writer.priorities] # Check matching number of items. self.assertEqual(len(expected_items), len(observed_items)) # Check items are matching according to numpy's almost_equal. for expected_item, observed_item in zip(expected_items, observed_items): if stack_sequence_fields: expected_item = tree_utils.stack_sequence_fields(expected_item) # Set check_types=False because we check them below. tree.map_structure(np.testing.assert_array_almost_equal, expected_item, tuple(observed_item), check_types=False) # Make sure the signature matches was is being written by Reverb. def _check_signature(spec: tf.TensorSpec, value: np.ndarray): self.assertTrue( spec.is_compatible_with(tf.convert_to_tensor(value))) # Check the last transition's signature. tree.map_structure(_check_signature, signature, observed_items[-1])