예제 #1
0
    def testLearnerRun(self):
        strategy = tf.distribute.OneDeviceStrategy('/cpu:0')

        test_learner, _, variables, _ = (self._build_learner_with_strategy(
            dist_test_utils.create_dqn_agent_and_dataset_fn,
            strategy,
            sample_batch_size=4))
        old_vars = self.evaluate(variables)
        loss = test_learner.run().loss
        new_vars = self.evaluate(variables)

        dist_test_utils.check_variables_different(self, old_vars, new_vars)
        self.assertAllInRange(loss, tf.float32.min, tf.float32.max)
예제 #2
0
    def testLossLearnerDifferentDistStrat(self, create_agent_fn):
        # Create the strategies used in the test. The second value is the per-core
        # batch size.
        bs_multiplier = 4
        strategies = {
            'default': (tf.distribute.get_strategy(), 4 * bs_multiplier),
            'one_device':
            (tf.distribute.OneDeviceStrategy('/cpu:0'), 4 * bs_multiplier),
            'mirrored': (tf.distribute.MirroredStrategy(), 1 * bs_multiplier),
        }
        if tf.config.list_logical_devices('TPU'):
            strategies['TPU'] = (self._get_tpu_strategy(), 2 * bs_multiplier)
        else:
            logging.info(
                'TPU hardware is not available, TPU strategy test skipped.')

        learners = {
            name: self._build_learner_with_strategy(create_agent_fn, strategy,
                                                    per_core_batch_size)
            for name, (strategy, per_core_batch_size) in strategies.items()
        }

        # Verify that the initial variable values in the learners are the same.
        default_strat_trainer, _, default_vars, _ = learners['default']
        for name, (trainer, _, variables, _) in learners.items():
            if name != 'default':
                self._assign_variables(default_strat_trainer, trainer)
                self.assertLen(variables, len(default_vars))
                for default_variable, variable in zip(default_vars, variables):
                    self.assertAllEqual(default_variable, variable)

        # Calculate losses.
        losses = {}
        iterations = 1
        for name, (trainer, _, variables, train_step) in learners.items():
            old_vars = self.evaluate(variables)

            loss = trainer.run(iterations=iterations).loss
            logging.info(
                'Using strategy: %s, the loss is: %s at train step: %s', name,
                loss, train_step)

            new_vars = self.evaluate(variables)
            losses[name] = old_vars, loss, new_vars

        # Verify same dataset across learner calls.
        for item in tf.data.Dataset.zip(
                tuple([v[1] for v in learners.values()])):
            for i in range(1, len(item)):
                # Compare default strategy obervation to the other datasets, second
                # index is getting the trajectory from (trajectory, sample_info) tuple.
                self.assertAllEqual(item[0][0].observation,
                                    item[i][0].observation)

        # Check that the losses are close to each other.
        _, default_loss, _ = losses['default']
        for name, (_, loss, _) in losses.items():
            self._compare_losses(loss, default_loss, delta=1.e-2)

        # Check that the variables changed after calling `learner.run`.
        for old_vars, _, new_vars in losses.values():
            dist_test_utils.check_variables_different(self, old_vars, new_vars)
예제 #3
0
  def testLossLearnerDifferentDistStrat(self, create_agent_fn):
    # Create the strategies used in the test. The second value is the per-core
    # batch size.
    bs_multiplier = 4
    strategies = {
        'default': (tf.distribute.get_strategy(), 4 * bs_multiplier),
        'one_device':
            (tf.distribute.OneDeviceStrategy('/cpu:0'), 4 * bs_multiplier),
        'mirrored': (tf.distribute.MirroredStrategy(), 1 * bs_multiplier),
    }
    if tf.config.list_logical_devices('TPU'):
      strategies['TPU'] = (_get_tpu_strategy(), 2 * bs_multiplier)
    else:
      logging.info('TPU hardware is not available, TPU strategy test skipped.')

    learners = {
        name: self._build_learner_with_strategy(create_agent_fn, strategy,
                                                per_core_batch_size)
        for name, (strategy, per_core_batch_size) in strategies.items()
    }

    # Verify that the initial variable values in the learners are the same.
    default_strat_trainer, _, default_vars, _, _ = learners['default']
    for name, (trainer, _, variables, _, _) in learners.items():
      if name != 'default':
        self._assign_variables(default_strat_trainer, trainer)
        self.assertLen(variables, len(default_vars))
        for default_variable, variable in zip(default_vars, variables):
          self.assertAllEqual(default_variable, variable)

    # Calculate losses.
    losses = {}
    checkpoint_path = {}
    iterations = 1
    optimizer_variables = {}
    for name, (trainer, _, variables, train_step, _) in learners.items():
      old_vars = self.evaluate(variables)

      loss = trainer.run(iterations=iterations).loss
      logging.info('Using strategy: %s, the loss is: %s at train step: %s',
                   name, loss, train_step)

      new_vars = self.evaluate(variables)
      losses[name] = old_vars, loss, new_vars
      self.assertNotEmpty(trainer._agent._optimizer.variables())
      optimizer_variables[name] = trainer._agent._optimizer.variables()
      checkpoint_path[name] = trainer._checkpointer.manager.directory

    for name, path in checkpoint_path.items():
      logging.info('Checkpoint dir for learner %s: %s. Content: %s', name, path,
                   tf.io.gfile.listdir(path))
      checkpointer = common.Checkpointer(path)

      # Make sure that the checkpoint file exists, so the learner initialized
      # using the corresponding root directory will pick up the values in the
      # checkpoint file.
      self.assertTrue(checkpointer.checkpoint_exists)

      # Create a learner using an existing root directory containing the
      # checkpoint files.
      strategy, per_core_batch_size = strategies[name]
      learner_from_checkpoint = self._build_learner_with_strategy(
          create_agent_fn,
          strategy,
          per_core_batch_size,
          root_dir=os.path.join(path, '..', '..'))[0]

      # Check if the learner was in fact created based on the an existing
      # checkpoint.
      self.assertTrue(learner_from_checkpoint._checkpointer.checkpoint_exists)

      # Check if the values of the variables of the learner initialized from
      # checkpoint that are the same as the values were used to write the
      # checkpoint.
      original_learner = learners[name][0]
      self.assertAllClose(
          learner_from_checkpoint._agent.collect_policy.variables(),
          original_learner._agent.collect_policy.variables())
      self.assertAllClose(learner_from_checkpoint._agent._optimizer.variables(),
                          original_learner._agent._optimizer.variables())

    # Verify same dataset across learner calls.
    for item in tf.data.Dataset.zip(tuple([v[1] for v in learners.values()])):
      for i in range(1, len(item)):
        # Compare default strategy obervation to the other datasets, second
        # index is getting the trajectory from (trajectory, sample_info) tuple.
        self.assertAllEqual(item[0][0].observation, item[i][0].observation)

    # Check that the losses are close to each other.
    _, default_loss, _ = losses['default']
    for name, (_, loss, _) in losses.items():
      self._compare_losses(loss, default_loss, delta=1.e-2)

    # Check that the optimizer variables are close to each other.
    default_optimizer_vars = optimizer_variables['default']
    for name, optimizer_vars in optimizer_variables.items():
      self.assertAllClose(
          optimizer_vars,
          default_optimizer_vars,
          atol=1.e-2,
          rtol=1.e-2,
          msg=('The initial values of the optimizer variables for the strategy '
               '{} are significantly different from the initial values of the '
               'default strategy.').format(name))

    # Check that the variables changed after calling `learner.run`.
    for old_vars, _, new_vars in losses.values():
      dist_test_utils.check_variables_different(self, old_vars, new_vars)