def testFromEpisodeArray(self): observation = np.random.rand(4, 5) action = () policy_info = () reward = np.random.rand(4) traj = trajectory.from_episode( observation, action, policy_info, reward, discount=None) self.assertFalse(tf.contrib.framework.is_tensor(traj.step_type)) first = ts.StepType.FIRST mid = ts.StepType.MID last = ts.StepType.LAST self.assertAllEqual( traj.step_type, [first, mid, mid, mid]) self.assertAllEqual( traj.next_step_type, [mid, mid, mid, last]) self.assertAllEqual(traj.observation, observation) self.assertAllEqual(traj.reward, reward) self.assertAllEqual(traj.discount, [1.0, 1.0, 1.0, 1.0])
def testFromEpisodeTensor(self): observation = tf.random_uniform((4, 5)) action = () policy_info = () reward = tf.random_uniform((4,)) traj = trajectory.from_episode( observation, action, policy_info, reward, discount=None) self.assertTrue(tf.contrib.framework.is_tensor(traj.step_type)) traj_val, obs_val, reward_val = self.evaluate((traj, observation, reward)) first = ts.StepType.FIRST mid = ts.StepType.MID last = ts.StepType.LAST self.assertAllEqual( traj_val.step_type, [first, mid, mid, mid]) self.assertAllEqual( traj_val.next_step_type, [mid, mid, mid, last]) self.assertAllEqual(traj_val.observation, obs_val) self.assertAllEqual(traj_val.reward, reward_val) self.assertAllEqual(traj_val.discount, [1.0, 1.0, 1.0, 1.0])