コード例 #1
0
 def testFromEpisodeArray(self):
   observation = np.random.rand(4, 5)
   action = ()
   policy_info = ()
   reward = np.random.rand(4)
   traj = trajectory.from_episode(
       observation, action, policy_info, reward, discount=None)
   self.assertFalse(tf.contrib.framework.is_tensor(traj.step_type))
   first = ts.StepType.FIRST
   mid = ts.StepType.MID
   last = ts.StepType.LAST
   self.assertAllEqual(
       traj.step_type, [first, mid, mid, mid])
   self.assertAllEqual(
       traj.next_step_type, [mid, mid, mid, last])
   self.assertAllEqual(traj.observation, observation)
   self.assertAllEqual(traj.reward, reward)
   self.assertAllEqual(traj.discount, [1.0, 1.0, 1.0, 1.0])
コード例 #2
0
 def testFromEpisodeTensor(self):
   observation = tf.random_uniform((4, 5))
   action = ()
   policy_info = ()
   reward = tf.random_uniform((4,))
   traj = trajectory.from_episode(
       observation, action, policy_info, reward, discount=None)
   self.assertTrue(tf.contrib.framework.is_tensor(traj.step_type))
   traj_val, obs_val, reward_val = self.evaluate((traj, observation, reward))
   first = ts.StepType.FIRST
   mid = ts.StepType.MID
   last = ts.StepType.LAST
   self.assertAllEqual(
       traj_val.step_type, [first, mid, mid, mid])
   self.assertAllEqual(
       traj_val.next_step_type, [mid, mid, mid, last])
   self.assertAllEqual(traj_val.observation, obs_val)
   self.assertAllEqual(traj_val.reward, reward_val)
   self.assertAllEqual(traj_val.discount, [1.0, 1.0, 1.0, 1.0])