def _create_trajectories(self): # Order of args for trajectory methods: # observation, action, policy_info, reward, discount ts0 = nest_utils.stack_nested_tensors([ trajectory.boundary((), (), (), 0., 1.), trajectory.boundary((), (), (), 0., 1.) ]) ts1 = nest_utils.stack_nested_tensors([ trajectory.first((), (), (), 1., 1.), trajectory.first((), (), (), 2., 1.) ]) ts2 = nest_utils.stack_nested_tensors([ trajectory.last((), (), (), 3., 1.), trajectory.last((), (), (), 4., 1.) ]) ts3 = nest_utils.stack_nested_tensors([ trajectory.boundary((), (), (), 0., 1.), trajectory.boundary((), (), (), 0., 1.) ]) ts4 = nest_utils.stack_nested_tensors([ trajectory.first((), (), (), 5., 1.), trajectory.first((), (), (), 6., 1.) ]) ts5 = nest_utils.stack_nested_tensors([ trajectory.last((), (), (), 7., 1.), trajectory.last((), (), (), 8., 1.) ]) return [ts0, ts1, ts2, ts3, ts4, ts5]
def _create_misaligned_trajectories(self): def _concat_nested_tensors(nest1, nest2): return tf.nest.map_structure(lambda t1, t2: tf.concat([t1, t2], axis=0), nest1, nest2) # Order of args for trajectory methods: # observation, action, policy_info, reward, discount ts1 = _concat_nested_tensors( trajectory.first((), tf.constant([2]), (), tf.constant([1.], dtype=tf.float32), [1.]), trajectory.boundary((), tf.constant([1]), (), tf.constant([0.], dtype=tf.float32), [1.])) ts2 = _concat_nested_tensors( trajectory.last((), tf.constant([1]), (), tf.constant([3.], dtype=tf.float32), [1.]), trajectory.first((), tf.constant([1]), (), tf.constant([2.], dtype=tf.float32), [1.])) ts3 = _concat_nested_tensors( trajectory.boundary((), tf.constant([2]), (), tf.constant([0.], dtype=tf.float32), [1.]), trajectory.last((), tf.constant([1]), (), tf.constant([4.], dtype=tf.float32), [1.])) return [ts1, ts2, ts3]
def setUp(self): super(BatchedPyMetricTest, self).setUp() # Order of args for trajectory methods: # observation, action, policy_info, reward, discount ts0 = nest_utils.stack_nested_tensors([ trajectory.boundary((), (), (), 0., 1.), trajectory.boundary((), (), (), 0., 1.) ]) ts1 = nest_utils.stack_nested_tensors([ trajectory.first((), (), (), 1., 1.), trajectory.first((), (), (), 2., 1.) ]) ts2 = nest_utils.stack_nested_tensors([ trajectory.last((), (), (), 3., 1.), trajectory.last((), (), (), 4., 1.) ]) ts3 = nest_utils.stack_nested_tensors([ trajectory.boundary((), (), (), 0., 1.), trajectory.boundary((), (), (), 0., 1.) ]) ts4 = nest_utils.stack_nested_tensors([ trajectory.first((), (), (), 5., 1.), trajectory.first((), (), (), 6., 1.) ]) ts5 = nest_utils.stack_nested_tensors([ trajectory.last((), (), (), 7., 1.), trajectory.last((), (), (), 8., 1.) ]) self._ts = [ts0, ts1, ts2, ts3, ts4, ts5]
def testBatchSizeProvided(self, metric_class, expected_result): metric = metric_class(batch_size=2) metric( nest_utils.stack_nested_arrays([ trajectory.boundary((), (), (), 0., 1.), trajectory.boundary((), (), (), 0., 1.) ])) metric( nest_utils.stack_nested_arrays([ trajectory.first((), (), (), 1., 1.), trajectory.first((), (), (), 1., 1.) ])) metric( nest_utils.stack_nested_arrays([ trajectory.mid((), (), (), 2., 1.), trajectory.last((), (), (), 3., 0.) ])) metric( nest_utils.stack_nested_arrays([ trajectory.last((), (), (), 3., 0.), trajectory.boundary((), (), (), 0., 1.) ])) metric( nest_utils.stack_nested_arrays([ trajectory.boundary((), (), (), 0., 1.), trajectory.first((), (), (), 1., 1.) ])) self.assertEqual(metric.result(), expected_result)
def setUp(self): super(PyDriverTest, self).setUp() f0 = np.array(0., dtype=np.float32) f1 = np.array(1., dtype=np.float32) # Order of args for trajectory methods: # (observation, action, policy_info, reward, discount) self._trajectories = [ trajectory.first(0, 1, 2, f1, f1), trajectory.last(1, 2, 4, f1, f0), trajectory.boundary(3, 1, 2, f0, f1), trajectory.first(0, 1, 2, f1, f1), trajectory.last(1, 2, 4, f1, f0), trajectory.boundary(3, 1, 2, f0, f1), trajectory.first(0, 1, 2, f1, f1), ]
def testAverageOneEpisode(self, metric_class, expected_result): metric = metric_class() metric(trajectory.boundary((), (), (), 0., 1.)) metric(trajectory.mid((), (), (), 1., 1.)) metric(trajectory.mid((), (), (), 2., 1.)) metric(trajectory.last((), (), (), 3., 0.)) self.assertEqual(expected_result, metric.result())
def testLastArrays(self): observation = () action = () policy_info = () reward = np.array([1.0, 1.0, 2.0]) discount = np.array([1.0, 1.0, 1.0]) traj = trajectory.last(observation, action, policy_info, reward, discount) self.assertFalse(tf.is_tensor(traj.step_type)) self.assertAllEqual(traj.step_type, [ts.StepType.MID] * 3) self.assertAllEqual(traj.next_step_type, [ts.StepType.LAST] * 3)
def testLastTensors(self): observation = () action = () policy_info = () reward = tf.constant([1.0, 1.0, 2.0]) discount = tf.constant([1.0, 1.0, 1.0]) traj = trajectory.last(observation, action, policy_info, reward, discount) self.assertTrue(tf.is_tensor(traj.step_type)) traj_val = self.evaluate(traj) self.assertAllEqual(traj_val.step_type, [ts.StepType.MID] * 3) self.assertAllEqual(traj_val.next_step_type, [ts.StepType.LAST] * 3)
def testAverageTwoEpisode(self, metric_class, expected_result): metric = metric_class() metric(trajectory.boundary((), (), (), 0., 1.)) metric(trajectory.first((), (), (), 1., 1.)) metric(trajectory.mid((), (), (), 2., 1.)) metric(trajectory.last((), (), (), 3., 0.)) metric(trajectory.boundary((), (), (), 0., 1.)) # TODO(kbanoop): Add optional next_step_type arg to trajectory.first. Or # implement trajectory.first_last(). metric( trajectory.Trajectory(ts.StepType.FIRST, (), (), (), ts.StepType.LAST, -6., 1.)) self.assertEqual(expected_result, metric.result())
def testAverageOneEpisodeWithReset(self, metric_class, expected_result): metric = metric_class() metric(trajectory.first((), (), (), 0., 1.)) metric(trajectory.mid((), (), (), 1., 1.)) metric(trajectory.mid((), (), (), 2., 1.)) # The episode is reset. # # This could happen when using the dynamic_episode_driver with # parallel_py_environment. When the parallel episodes are of different # lengths and num_episodes is reached, some episodes would be left in "MID". # When the driver runs again, all environments are reset at the beginning # of the tf.while_loop and the unfinished episodes would get "FIRST" without # seeing "LAST". metric(trajectory.first((), (), (), 3., 1.)) metric(trajectory.last((), (), (), 4., 1.)) self.assertEqual(expected_result, metric.result())
def testSaveRestore(self): metrics = [ py_metrics.AverageReturnMetric(), py_metrics.AverageEpisodeLengthMetric(), py_metrics.EnvironmentSteps(), py_metrics.NumberOfEpisodes() ] for metric in metrics: metric(trajectory.boundary((), (), (), 0., 1.)) metric(trajectory.mid((), (), (), 1., 1.)) metric(trajectory.mid((), (), (), 2., 1.)) metric(trajectory.last((), (), (), 3., 0.)) checkpoint = tf.train.Checkpoint(**{m.name: m for m in metrics}) prefix = self.get_temp_dir() + '/ckpt' save_path = checkpoint.save(prefix) for metric in metrics: metric.reset() self.assertEqual(0, metric.result()) checkpoint.restore(save_path).assert_consumed() for metric in metrics: self.assertGreater(metric.result(), 0)