Python boundary 예제들, tf_agents.environments.trajectory.boundary Python 예제들

예제 #1

0

파일 보기

    def testBatchSizeProvided(self, metric_class, expected_result):
        metric = py_metrics.AverageReturnMetric(batch_size=2)

        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.first((), (), (), 1., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.mid((), (), (), 2., 1.),
                trajectory.last((), (), (), 3., 0.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.last((), (), (), 3., 0.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        self.assertEqual(metric.result(), 5.0)

예제 #2

0

파일 보기

    def testBatch(self, metric_class, expected_result):
        metric = metric_class()

        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.first((), (), (), 1., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.mid((), (), (), 2., 1.),
                trajectory.last((), (), (), 3., 0.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.last((), (), (), 3., 0.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        self.assertEqual(expected_result, metric.result(), 5.0)

예제 #3

0

파일 보기

    def _create_trajectories(self):
        # Order of args for trajectory methods:
        # observation, action, policy_info, reward, discount
        ts0 = nest_utils.stack_nested_tensors([
            trajectory.boundary((), (), (), 0., 1.),
            trajectory.boundary((), (), (), 0., 1.)
        ])
        ts1 = nest_utils.stack_nested_tensors([
            trajectory.first((), (), (), 1., 1.),
            trajectory.first((), (), (), 2., 1.)
        ])
        ts2 = nest_utils.stack_nested_tensors([
            trajectory.last((), (), (), 3., 1.),
            trajectory.last((), (), (), 4., 1.)
        ])
        ts3 = nest_utils.stack_nested_tensors([
            trajectory.boundary((), (), (), 0., 1.),
            trajectory.boundary((), (), (), 0., 1.)
        ])
        ts4 = nest_utils.stack_nested_tensors([
            trajectory.first((), (), (), 5., 1.),
            trajectory.first((), (), (), 6., 1.)
        ])
        ts5 = nest_utils.stack_nested_tensors([
            trajectory.last((), (), (), 7., 1.),
            trajectory.last((), (), (), 8., 1.)
        ])

        return [ts0, ts1, ts2, ts3, ts4, ts5]

예제 #4

0

파일 보기

파일: tf_py_metric_test.py 프로젝트: zhenchangXia/tf-agents

    def setUp(self):
        super(BatchedPyMetricTest, self).setUp()
        # Order of args for trajectory methods:
        # observation, action, policy_info, reward, discount
        ts0 = nest_utils.stack_nested_tensors([
            trajectory.boundary((), (), (), 0., 1.),
            trajectory.boundary((), (), (), 0., 1.)
        ])
        ts1 = nest_utils.stack_nested_tensors([
            trajectory.first((), (), (), 1., 1.),
            trajectory.first((), (), (), 2., 1.)
        ])
        ts2 = nest_utils.stack_nested_tensors([
            trajectory.last((), (), (), 3., 1.),
            trajectory.last((), (), (), 4., 1.)
        ])
        ts3 = nest_utils.stack_nested_tensors([
            trajectory.boundary((), (), (), 0., 1.),
            trajectory.boundary((), (), (), 0., 1.)
        ])
        ts4 = nest_utils.stack_nested_tensors([
            trajectory.first((), (), (), 5., 1.),
            trajectory.first((), (), (), 6., 1.)
        ])
        ts5 = nest_utils.stack_nested_tensors([
            trajectory.last((), (), (), 7., 1.),
            trajectory.last((), (), (), 8., 1.)
        ])

        self._ts = [ts0, ts1, ts2, ts3, ts4, ts5]

예제 #5

0

파일 보기

파일: py_driver_test.py 프로젝트: zhenchangXia/tf-agents

 def setUp(self):
   super(PyDriverTest, self).setUp()
   f0 = np.array(0., dtype=np.float32)
   f1 = np.array(1., dtype=np.float32)
   # Order of args for trajectory methods:
   # (observation, action, policy_info, reward, discount)
   self._trajectories = [
       trajectory.first(0, 1, 2, f1, f1),
       trajectory.last(1, 2, 4, f1, f0),
       trajectory.boundary(3, 1, 2, f0, f1),
       trajectory.first(0, 1, 2, f1, f1),
       trajectory.last(1, 2, 4, f1, f0),
       trajectory.boundary(3, 1, 2, f0, f1),
       trajectory.first(0, 1, 2, f1, f1),
   ]

예제 #6

0

파일 보기

 def testZeroEpisodes(self, metric_class, expected_result):
     metric = metric_class()
     # Order of args for trajectory methods:
     # observation, action, policy_info, reward, discount
     metric(trajectory.boundary((), (), (), 0., 1.))
     metric(trajectory.first((), (), (), 1., 1.))
     self.assertEqual(expected_result, metric.result())

예제 #7

0

파일 보기

    def testAverageTwoEpisode(self, metric_class, expected_result):
        metric = metric_class()

        metric(trajectory.boundary((), (), (), 0., 1.))
        metric(trajectory.first((), (), (), 1., 1.))
        metric(trajectory.mid((), (), (), 2., 1.))
        metric(trajectory.last((), (), (), 3., 0.))
        metric(trajectory.boundary((), (), (), 0., 1.))

        # TODO(kbanoop): Add optional next_step_type arg to trajectory.first. Or
        # implement trajectory.first_last().
        metric(
            trajectory.Trajectory(ts.StepType.FIRST, (), (), (),
                                  ts.StepType.LAST, -6., 1.))

        self.assertEqual(expected_result, metric.result())

예제 #8

0

파일 보기

    def testAverageOneEpisode(self, metric_class, expected_result):
        metric = metric_class()

        metric(trajectory.boundary((), (), (), 0., 1.))
        metric(trajectory.mid((), (), (), 1., 1.))
        metric(trajectory.mid((), (), (), 2., 1.))
        metric(trajectory.last((), (), (), 3., 0.))
        self.assertEqual(expected_result, metric.result())

예제 #9

0

파일 보기

    def testSaveRestore(self):
        metrics = [
            py_metrics.AverageReturnMetric(),
            py_metrics.AverageEpisodeLengthMetric(),
            py_metrics.EnvironmentSteps(),
            py_metrics.NumberOfEpisodes()
        ]

        for metric in metrics:
            metric(trajectory.boundary((), (), (), 0., 1.))
            metric(trajectory.mid((), (), (), 1., 1.))
            metric(trajectory.mid((), (), (), 2., 1.))
            metric(trajectory.last((), (), (), 3., 0.))

        checkpoint = tf.train.Checkpoint(**{m.name: m for m in metrics})
        prefix = self.get_temp_dir() + '/ckpt'
        save_path = checkpoint.save(prefix)
        for metric in metrics:
            metric.reset()
            self.assertEqual(0, metric.result())
        checkpoint.restore(save_path).assert_consumed()
        for metric in metrics:
            self.assertGreater(metric.result(), 0)