Python truncation 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tf_agents.trajectories.time_step

메소드/함수: truncation

hotexamples.com에서의 예제들: 5

Python truncation - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tf_agents.trajectories.time_step.truncation에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

  def testTruncation(self):
    observation = -1
    reward = 2.0
    discount = 1.0
    time_step = ts.truncation(observation, reward, discount)

    self.assertEqual(ts.StepType.LAST, time_step.step_type)
    self.assertEqual(-1, time_step.observation)
    self.assertEqual(2.0, time_step.reward)
    self.assertEqual(1.0, time_step.discount)

예제 #2

파일 보기

 def testTruncation(self):
   observation = tf.constant(-1)
   reward = tf.constant(2.0)
   discount = tf.constant(1.0)
   time_step = ts.truncation(observation, reward, discount)
   time_step_ = self.evaluate(time_step)
   self.assertEqual(ts.StepType.LAST, time_step_.step_type)
   self.assertEqual(-1, time_step_.observation)
   self.assertEqual(2.0, time_step_.reward)
   self.assertEqual(1.0, time_step_.discount)

예제 #3

파일 보기

  def testTruncationBatched(self):
    observation = np.array([[-1], [-1]])
    reward = np.array([2., 2.])
    discount = np.array([1., 1.])
    time_step = ts.truncation(observation, reward, discount)

    self.assertItemsEqual([ts.StepType.LAST] * 2, time_step.step_type)
    self.assertItemsEqual(observation, time_step.observation)
    self.assertItemsEqual(reward, time_step.reward)
    self.assertItemsEqual(discount, time_step.discount)

예제 #4

파일 보기

파일: time_step_test.py 프로젝트: ymodak/agents

  def testTruncationMultiRewards(self):
    observation = np.array([[-1], [-1]])
    reward = [np.array([[2.], [2.]]),
              np.array([[3., 3.], [4., 4.]])]
    discount = np.array([1., 1.])
    time_step = ts.truncation(observation, reward, discount)

    time_step_with_outerdims = ts.truncation(
        observation, reward, discount, outer_dims=[2])

    self.assertItemsEqual([ts.StepType.LAST] * 2, time_step.step_type)
    self.assertItemsEqual(
        [ts.StepType.LAST] * 2, time_step_with_outerdims.step_type)
    self.assertItemsEqual(observation, time_step.observation)
    self.assertItemsEqual(observation, time_step_with_outerdims.observation)
    self.assertAllEqual(reward[0], time_step.reward[0])
    self.assertAllEqual(reward[1], time_step.reward[1])
    self.assertAllEqual(reward[0], time_step_with_outerdims.reward[0])
    self.assertAllEqual(reward[1], time_step_with_outerdims.reward[1])
    self.assertItemsEqual(discount, time_step.discount)
    self.assertItemsEqual(discount, time_step_with_outerdims.discount)

예제 #5

파일 보기

파일: time_step_test.py 프로젝트: ymodak/agents

  def testTruncationMultiRewards(self):
    observation = tf.constant([[-1], [-1]])
    reward = [tf.constant([[2.], [2.]]),
              tf.constant([[3., 3.], [4., 4.]])]
    discount = tf.constant(0.5)
    time_step = ts.truncation(observation, reward, discount)
    time_step_ = self.evaluate(time_step)

    time_step_with_outerdims = ts.truncation(
        observation, reward, discount, outer_dims=[2])
    time_step_with_outerdims_ = self.evaluate(time_step_with_outerdims)

    self.assertItemsEqual([ts.StepType.LAST] * 2, time_step_.step_type)
    self.assertItemsEqual([ts.StepType.LAST] * 2,
                          time_step_with_outerdims_.step_type)
    self.assertItemsEqual([-1, -1], time_step_.observation)
    self.assertItemsEqual([-1, -1], time_step_with_outerdims_.observation)
    self.assertAllEqual(reward[0], time_step_.reward[0])
    self.assertAllEqual(reward[1], time_step_.reward[1])
    self.assertAllEqual(reward[0], time_step_with_outerdims_.reward[0])
    self.assertAllEqual(reward[1], time_step_with_outerdims_.reward[1])
    self.assertItemsEqual([0.5, 0.5], time_step_.discount)