Ejemplo n.º 1
0
    def test_step_per_loop_callable(self):
        test_runner = TestRunner()

        checkpoint = tf.train.Checkpoint(model=test_runner.model,
                                         optimizer=test_runner.optimizer)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            self.model_dir,
            max_to_keep=None,
            step_counter=test_runner.global_step,
            checkpoint_interval=10)

        def steps_per_loop_fn(global_step):
            if global_step > 4:
                return 4
            return 2

        test_controller = controller.Controller(
            trainer=test_runner,
            global_step=test_runner.global_step,
            steps_per_loop=steps_per_loop_fn,
            checkpoint_manager=checkpoint_manager,
        )
        test_controller.train(steps=10)
        self.assertEqual(test_runner.global_step, 10)
  def test_evaluate_with_nested_summaries(self):
    test_evaluator = TestEvaluatorWithNestedSummary()
    test_controller = controller.Controller(
        evaluator=test_evaluator,
        global_step=tf.Variable(0, dtype=tf.int64),
        eval_summary_dir=self.model_dir)
    test_controller.evaluate(steps=5)

    self.assertNotEmpty(
        tf.io.gfile.listdir(os.path.join(self.model_dir, "dataset")))
    self.assertNotEmpty(
        summaries_with_matching_keyword(
            "loss", os.path.join(self.model_dir, "dataset")))
    self.assertNotEmpty(
        summaries_with_matching_keyword(
            "accuracy", os.path.join(self.model_dir, "dataset")))

    self.assertNotEmpty(
        tf.io.gfile.listdir(os.path.join(self.model_dir, "dataset2")))
    self.assertNotEmpty(
        summaries_with_matching_keyword(
            "loss", os.path.join(self.model_dir, "dataset2")))
    self.assertNotEmpty(
        summaries_with_matching_keyword(
            "accuracy", os.path.join(self.model_dir, "dataset2")))
Ejemplo n.º 3
0
    def test_eval_and_checkpoint_interval(self):
        test_runner = TestRunner()

        checkpoint = tf.train.Checkpoint(model=test_runner.model,
                                         optimizer=test_runner.optimizer)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            self.model_dir,
            max_to_keep=None,
            step_counter=test_runner.global_step,
            checkpoint_interval=5)
        test_controller = controller.Controller(
            trainer=test_runner,
            evaluator=test_runner,
            global_step=test_runner.global_step,
            steps_per_loop=10,
            checkpoint_manager=checkpoint_manager)
        test_controller.train_and_evaluate(train_steps=10,
                                           eval_steps=2,
                                           eval_interval=5)

        # Expect 3 checkpoints to be saved at step: 0, 5, 10.
        self.assertLen(
            tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt-*.data*")), 3)
        # Expect evaluation is performed 2 times at step: 5, 10.
        self.assertLen(
            summaries_with_matching_keyword("eval_loss", self.model_dir), 2)
Ejemplo n.º 4
0
    def test_train_and_evaluate_with_same_summary_dir(self):
        test_runner = TestRunner()

        checkpoint = tf.train.Checkpoint(model=test_runner.model,
                                         optimizer=test_runner.optimizer)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            self.model_dir,
            max_to_keep=None,
            step_counter=test_runner.global_step)
        test_controller = controller.Controller(
            trainer=test_runner,
            evaluator=test_runner,
            global_step=test_runner.global_step,
            steps_per_loop=2,
            summary_dir=os.path.join(self.model_dir, "summaries"),
            checkpoint_manager=checkpoint_manager,
            eval_summary_dir=os.path.join(self.model_dir, "summaries"))
        test_controller.train_and_evaluate(train_steps=10,
                                           eval_steps=2,
                                           eval_interval=6)

        # Loss and accuracy values should be written into summaries.
        self.assertNotEmpty(
            tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries")))
        self.assertNotEmpty(
            summaries_with_matching_keyword(
                "loss", os.path.join(self.model_dir, "summaries")))
        self.assertNotEmpty(
            summaries_with_matching_keyword(
                "eval_loss", os.path.join(self.model_dir, "summaries")))
Ejemplo n.º 5
0
    def test_train_and_evaluate_reset_datasets(self):
        test_runner = TestRunner()

        test_controller = controller.Controller(
            trainer=test_runner,
            evaluator=test_runner,
            global_step=test_runner.global_step,
            steps_per_loop=2)

        test_controller.train_and_evaluate(train_steps=10,
                                           eval_steps=2,
                                           eval_interval=6)

        train_dataset = (
            test_runner.strategy.
            experimental_distribute_datasets_from_function(dataset_fn))
        eval_dataset = (
            test_runner.strategy.
            experimental_distribute_datasets_from_function(dataset_fn))
        test_runner.train_dataset = train_dataset
        test_runner.eval_dataset = eval_dataset

        test_controller.train_and_evaluate(train_steps=10,
                                           eval_steps=2,
                                           eval_interval=6)
  def test_has_checkpoint_eval_summary_only(self):
    test_runner = TestRunner()
    # Has checkpoint, but no summary directories.
    checkpoint = tf.train.Checkpoint(model=test_runner.model)
    checkpoint_manager = tf.train.CheckpointManager(
        checkpoint,
        self.model_dir,
        max_to_keep=None,
        step_counter=test_runner.global_step)
    test_controller = controller.Controller(
        trainer=test_runner,
        evaluator=test_runner,
        global_step=test_runner.global_step,
        checkpoint_manager=checkpoint_manager,
        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"),
        steps_per_loop=2)
    test_controller.train_and_evaluate(
        train_steps=10, eval_steps=2, eval_interval=6)
    self.assertEqual(test_runner.global_step, 10)

    # Training summaries are not saved.
    self.assertEmpty(tf.io.gfile.glob(
        os.path.join(checkpoint_manager.directory, "events.*")))
    # Evaluation summaries are saved.
    self.assertNotEmpty(tf.io.gfile.glob(
        os.path.join(self.model_dir, "summaries/eval/events.*")))
Ejemplo n.º 7
0
    def test_summaries_inside_train_fn(self):
        test_runner = TestTrainerWithSummaries()

        checkpoint = tf.train.Checkpoint(model=test_runner.model,
                                         optimizer=test_runner.optimizer)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            self.model_dir,
            max_to_keep=None,
            step_counter=test_runner.global_step)
        test_controller = controller.Controller(
            trainer=test_runner,
            global_step=test_runner.global_step,
            steps_per_loop=2,
            summary_dir=os.path.join(self.model_dir, "summaries/train"),
            summary_interval=2,
            checkpoint_manager=checkpoint_manager,
        )
        test_controller.train(steps=10)

        # Checkpoints are saved.
        self.assertEmpty(
            tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt*")))

        # Only train summaries are written.
        self.assertNotEmpty(
            tf.io.gfile.listdir(os.path.join(self.model_dir,
                                             "summaries/train")))
        self.assertNotEmpty(
            summaries_with_matching_keyword(
                "loss", os.path.join(self.model_dir, "summaries/train")))
        self.assertFalse(
            tf.io.gfile.exists(os.path.join(self.model_dir, "summaries/eval")))
Ejemplo n.º 8
0
 def test_no_checkpoint(self):
     test_runner = TestRunner()
     # No checkpoint manager and no strategy.
     test_controller = controller.Controller(
         trainer=test_runner,
         evaluator=test_runner,
         global_step=test_runner.global_step,
         steps_per_loop=2,
         summary_dir=os.path.join(self.model_dir, "summaries/train"),
         eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
     test_controller.train_and_evaluate(train_steps=10,
                                        eval_steps=2,
                                        eval_interval=6)
     self.assertEqual(test_runner.global_step, 10)
     # Loss and accuracy values should be written into summaries.
     self.assertNotEmpty(
         tf.io.gfile.listdir(os.path.join(self.model_dir,
                                          "summaries/train")))
     self.assertNotEmpty(
         summaries_with_matching_keyword(
             "loss", os.path.join(self.model_dir, "summaries/train")))
     self.assertNotEmpty(
         tf.io.gfile.listdir(os.path.join(self.model_dir,
                                          "summaries/eval")))
     self.assertNotEmpty(
         summaries_with_matching_keyword(
             "eval_loss", os.path.join(self.model_dir, "summaries/eval")))
     # No checkpoint, so global step starts from 0.
     test_runner.global_step.assign(0)
     test_controller.train_and_evaluate(train_steps=10,
                                        eval_steps=2,
                                        eval_interval=6)
     self.assertEqual(test_runner.global_step, 10)
Ejemplo n.º 9
0
    def test_evaluate_only(self):
        test_runner = TestRunner()

        checkpoint = tf.train.Checkpoint(model=test_runner.model)
        checkpoint.save(os.path.join(self.model_dir, "ckpt"))
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            self.model_dir,
            max_to_keep=None,
            step_counter=test_runner.global_step)
        test_controller = controller.Controller(
            evaluator=test_runner,
            global_step=test_runner.global_step,
            checkpoint_manager=checkpoint_manager,
            summary_dir=os.path.join(self.model_dir, "summaries/train"),
            eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
        eval_results = test_controller.evaluate(steps=2)

        # Only eval summaries are written
        self.assertFalse(
            tf.io.gfile.exists(os.path.join(self.model_dir,
                                            "summaries/train")))
        self.assertNotEmpty(
            tf.io.gfile.listdir(os.path.join(self.model_dir,
                                             "summaries/eval")))
        self.assertNotEmpty(
            summaries_with_matching_keyword(
                "eval_loss", os.path.join(self.model_dir, "summaries/eval")))
        self.assertIn("eval_loss", eval_results)

        # Tests continuous eval with timeout and timeout_fn.
        done_file = os.path.join(self.model_dir, "summaries/eval/Done")

        def timeout_fn():
            with tf.io.gfile.GFile(done_file, "w") as f:
                f.write("DONE")
                return True

        test_controller = controller.Controller(
            evaluator=test_runner,
            global_step=test_runner.global_step,
            checkpoint_manager=checkpoint_manager,
            eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
        test_controller.evaluate_continuously(timeout=1,
                                              timeout_fn=timeout_fn,
                                              steps=2)
        self.assertNotEmpty(tf.io.gfile.glob(done_file))
 def test_no_checkpoint_and_summaries(self):
   test_runner = TestRunner()
   # No checkpoint + summary directories.
   test_controller = controller.Controller(
       trainer=test_runner,
       evaluator=test_runner,
       global_step=test_runner.global_step,
       steps_per_loop=2)
   test_controller.train_and_evaluate(
       train_steps=10, eval_steps=2, eval_interval=6)
   self.assertEqual(test_runner.global_step, 10)
Ejemplo n.º 11
0
    def test_no_eval_steps(self):
        test_runner = TestRunner()

        checkpoint = tf.train.Checkpoint(model=test_runner.model)
        checkpoint.save(os.path.join(self.model_dir, "ckpt"))
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            self.model_dir,
            max_to_keep=None,
            step_counter=test_runner.global_step)
        test_controller = controller.Controller(
            evaluator=test_runner,
            global_step=test_runner.global_step,
            checkpoint_manager=checkpoint_manager)
        test_controller.evaluate()
Ejemplo n.º 12
0
    def test_actions(self):
        test_runner = TestRunner()
        checkpoint = tf.train.Checkpoint(model=test_runner.model,
                                         optimizer=test_runner.optimizer)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            self.model_dir,
            max_to_keep=None,
            step_counter=test_runner.global_step,
            checkpoint_interval=10)

        class OutputRecorderAction:
            """Simple `Action` that just saves the outputs passed to `__call__`."""
            def __init__(self):
                self.outputs = []

            def __call__(self, output):
                self.outputs.append(output)

        train_output_recorder = OutputRecorderAction()
        eval_output_recorder = OutputRecorderAction()

        test_controller = controller.Controller(
            trainer=test_runner,
            evaluator=test_runner,
            train_actions=[train_output_recorder],
            eval_actions=[eval_output_recorder],
            global_step=test_runner.global_step,
            steps_per_loop=2,
            summary_dir=os.path.join(self.model_dir, "summaries/train"),
            checkpoint_manager=checkpoint_manager,
            eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
        test_controller.train_and_evaluate(train_steps=10,
                                           eval_steps=2,
                                           eval_interval=6)

        self.assertLen(train_output_recorder.outputs, 5)
        for output in train_output_recorder.outputs:
            self.assertIn("loss", output)
            self.assertGreaterEqual(output["loss"], 0)

        self.assertLen(eval_output_recorder.outputs, 2)
        for output in eval_output_recorder.outputs:
            self.assertIn("eval_loss", output)
            self.assertGreaterEqual(output["eval_loss"], 0)
Ejemplo n.º 13
0
    def test_already_trained_model(self):
        test_runner = TestRunner()
        test_runner.global_step.assign(10)

        checkpoint = tf.train.Checkpoint(model=test_runner.model,
                                         optimizer=test_runner.optimizer)
        checkpoint_manager = tf.train.CheckpointManager(
            checkpoint,
            self.model_dir,
            max_to_keep=None,
            step_counter=test_runner.global_step,
            checkpoint_interval=10)
        test_controller = controller.Controller(
            trainer=test_runner,
            global_step=test_runner.global_step,
            steps_per_loop=2,
            checkpoint_manager=checkpoint_manager)
        # `global_step` is already `train_steps`.
        test_controller.train(steps=10)
 def test_restore_from_most_recent_checkpoint(self):
   test_runner = TestRunner()
   checkpoint = tf.train.Checkpoint(model=test_runner.model)
   checkpoint_manager = tf.train.CheckpointManager(
       checkpoint,
       self.model_dir,
       max_to_keep=None,
       step_counter=test_runner.global_step,
       checkpoint_interval=5)
   test_controller = controller.Controller(
       trainer=test_runner,
       global_step=test_runner.global_step,
       checkpoint_manager=checkpoint_manager,
       eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"),
       steps_per_loop=5)
   test_controller.train(20)
   self.assertLen(checkpoint_manager.checkpoints, 4)
   restored_path = test_controller.restore_checkpoint()
   self.assertEqual(restored_path, checkpoint_manager.checkpoints[-1])
  def test_evaluate_with_loss_output(self):
    test_evaluator = TestEvaluator()

    checkpoint = tf.train.Checkpoint(model=test_evaluator.model)
    checkpoint.save(os.path.join(self.model_dir, "ckpt"))
    checkpoint_manager = tf.train.CheckpointManager(
        checkpoint, self.model_dir, max_to_keep=None)
    test_controller = controller.Controller(
        evaluator=test_evaluator,
        global_step=tf.Variable(0, dtype=tf.int64),
        checkpoint_manager=checkpoint_manager,
        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
    test_controller.evaluate(steps=5)

    # Only eval summaries are written
    self.assertNotEmpty(
        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval")))
    self.assertNotEmpty(
        summaries_with_matching_keyword(
            "eval_loss", os.path.join(self.model_dir, "summaries/eval")))
Ejemplo n.º 16
0
  def test_train_and_evaluate(self, return_numpy):
    test_runner = TestRunner(return_numpy=return_numpy)

    checkpoint = tf.train.Checkpoint(
        model=test_runner.model, optimizer=test_runner.optimizer)
    checkpoint_manager = tf.train.CheckpointManager(
        checkpoint,
        self.model_dir,
        max_to_keep=None,
        step_counter=test_runner.global_step,
        checkpoint_interval=10)
    test_controller = controller.Controller(
        trainer=test_runner,
        evaluator=test_runner,
        global_step=test_runner.global_step,
        steps_per_loop=2,
        summary_dir=os.path.join(self.model_dir, "summaries/train"),
        checkpoint_manager=checkpoint_manager,
        eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
    test_controller.train_and_evaluate(
        train_steps=10, eval_steps=2, eval_interval=6)

    # Checkpoints are saved.
    self.assertNotEmpty(tf.io.gfile.glob(os.path.join(self.model_dir, "ckpt*")))

    # Loss and accuracy values should be written into summaries.
    self.assertNotEmpty(
        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/train")))
    self.assertTrue(
        check_eventfile_for_keyword(
            "loss", os.path.join(self.model_dir, "summaries/train")))
    self.assertNotEmpty(
        tf.io.gfile.listdir(os.path.join(self.model_dir, "summaries/eval")))
    self.assertTrue(
        check_eventfile_for_keyword(
            "eval_loss", os.path.join(self.model_dir, "summaries/eval")))
 def test_evaluate_with_no_output(self):
   test_controller = controller.Controller(
       evaluator=TestEvaluatorNoOutput(),
       global_step=tf.Variable(0, dtype=tf.int64),
       eval_summary_dir=os.path.join(self.model_dir, "summaries/eval"))
   self.assertEqual(test_controller.evaluate(steps=5), {})