Exemple #1
0
  def testLearnerLossPassExperience(self):
    strategy = tf.distribute.OneDeviceStrategy('/cpu:0')

    test_learner, _, variables, _, dataset_fn = (
        self._build_learner_with_strategy(
            dist_test_utils.create_dqn_agent_and_dataset_fn,
            strategy,
            sample_batch_size=4))
    old_vars = self.evaluate(variables)

    dataset_iter = iter(dataset_fn())
    loss_sum = test_learner.loss(
        experience_and_sample_info=next(dataset_iter)).loss
    new_vars = self.evaluate(variables)

    dist_test_utils.check_variables_same(self, old_vars, new_vars)
    self.assertAllInRange(loss_sum, tf.float32.min, tf.float32.max)
Exemple #2
0
  def testLearnerLoss(self):
    strategy = tf.distribute.OneDeviceStrategy('/cpu:0')

    test_learner, _, variables, _, _ = (
        self._build_learner_with_strategy(
            dist_test_utils.create_dqn_agent_and_dataset_fn,
            strategy,
            sample_batch_size=4))
    old_vars = self.evaluate(variables)

    # Compute loss using the default sum reduce op.
    loss_sum = test_learner.loss().loss
    new_vars = self.evaluate(variables)

    dist_test_utils.check_variables_same(self, old_vars, new_vars)
    self.assertAllInRange(loss_sum, tf.float32.min, tf.float32.max)

    # Compute loss using a mean reduce op.
    loss_mean = test_learner.loss(reduce_op=tf.distribute.ReduceOp.MEAN).loss
    new_vars = self.evaluate(variables)

    dist_test_utils.check_variables_same(self, old_vars, new_vars)
    self.assertAllInRange(loss_mean, tf.float32.min, tf.float32.max)