def test_end2end(self, model_name, mock_get_input_fn_from_dataset):
        """End-to-end test of model_eval."""
        tf_test_utils.write_fake_checkpoint('inception_v3',
                                            self.test_session(),
                                            self.checkpoint_dir,
                                            FLAGS.moving_average_decay)

        # Start up eval, loading that checkpoint.
        FLAGS.batch_size = 2
        FLAGS.checkpoint_dir = self.checkpoint_dir
        FLAGS.eval_name = self.eval_name
        FLAGS.max_evaluations = 1
        FLAGS.max_examples = 2
        FLAGS.best_checkpoint_metric = 'F1/All'
        FLAGS.model_name = model_name
        FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt'
        FLAGS.master = ''
        # Always try to read in compressed inputs to stress that case. Uncompressed
        # inputs are certain to work. This test is expensive to run, so we want to
        # minimize the number of times we need to run this.
        mock_get_input_fn_from_dataset.return_value = (
            data_providers_test.make_golden_dataset(compressed_inputs=True,
                                                    use_tpu=FLAGS.use_tpu))
        model_eval.main(0)
        mock_get_input_fn_from_dataset.assert_called_once_with(
            dataset_config_filename=FLAGS.dataset_config_pbtxt,
            mode=tf.estimator.ModeKeys.EVAL,
            use_tpu=FLAGS.use_tpu)
        self.assertTrue(
            tf_test_utils.check_file_exists('best_checkpoint.txt',
                                            eval_name=self.eval_name))
        self.assertTrue(
            tf_test_utils.check_file_exists('best_checkpoint.metrics',
                                            eval_name=self.eval_name))
Exemple #2
0
 def test_end2end_inception_v3_failed_warm_up_from(self):
     """End-to-end test of model_train script with a non-existent path."""
     with self.assertRaises(tf.errors.OpError):
         self._run_tiny_training(
             model_name='inception_v3',
             dataset=data_providers_test.make_golden_dataset(
                 use_tpu=FLAGS.use_tpu),
             warm_start_from='this/path/does/not/exist')
Exemple #3
0
 def test_end2end_inception_v3_warm_up_from(self):
   """End-to-end test of model_train script."""
   checkpoint_dir = tf_test_utils.test_tmpdir('inception_v3_warm_up_from')
   tf_test_utils.write_fake_checkpoint('inception_v3', self.test_session(),
                                       checkpoint_dir)
   self._run_tiny_training(
       model_name='inception_v3',
       dataset=data_providers_test.make_golden_dataset(use_tpu=FLAGS.use_tpu),
       warm_start_from=checkpoint_dir + '/model')
 def test_end2end_inception_v3_embedding_invalid_embedding_size(self):
     """End-to-end test of model_train script with an invalid embedding size."""
     with six.assertRaisesRegex(
             self, ValueError, 'Expected seq_type_embedding_size '
             'to be a positive number but saw -100 '
             'instead.'):
         FLAGS.seq_type_embedding_size = -100
         self._run_tiny_training(
             model_name='inception_v3_embedding',
             dataset=data_providers_test.make_golden_dataset(
                 use_tpu=FLAGS.use_tpu))
Exemple #5
0
 def test_end2end_inception_v3_warm_up_allow_different_num_channels(self):
   """End-to-end test of model_train script."""
   FLAGS.allow_warmstart_from_different_num_channels = True
   checkpoint_dir = tf_test_utils.test_tmpdir(
       'inception_v3_warm_up_allow_different_num_channels')
   tf_test_utils.write_fake_checkpoint(
       'inception_v3',
       self.test_session(),
       checkpoint_dir,
       num_channels=dv_constants.PILEUP_NUM_CHANNELS + 1)
   self._run_tiny_training(
       model_name='inception_v3',
       dataset=data_providers_test.make_golden_dataset(use_tpu=FLAGS.use_tpu),
       warm_start_from=checkpoint_dir + '/model')
Exemple #6
0
 def test_end2end_inception_v3_warm_up_from_mobilenet_v1(self):
   """Tests the behavior when warm start from mobilenet but train inception."""
   checkpoint_dir = tf_test_utils.test_tmpdir(
       'inception_v3_warm_up_from_mobilenet_v1')
   tf_test_utils.write_fake_checkpoint('mobilenet_v1', self.test_session(),
                                       checkpoint_dir)
   self.assertTrue(
       tf_test_utils.check_equals_checkpoint_top_scopes(
           checkpoint_dir + '/model', ['MobilenetV1', 'global_step']))
   self._run_tiny_training(
       model_name='inception_v3',
       dataset=data_providers_test.make_golden_dataset(use_tpu=FLAGS.use_tpu),
       warm_start_from=checkpoint_dir + '/model')
   self.assertTrue(
       tf_test_utils.check_equals_checkpoint_top_scopes(
           FLAGS.train_dir + '/model.ckpt-1', ['InceptionV3', 'global_step']))
  def test_end2end(self, model_name, mock_get_dataset):
    """End-to-end test of model_eval."""
    self._write_fake_checkpoint(model_name)

    # Start up eval, loading that checkpoint.
    FLAGS.batch_size = 2
    FLAGS.checkpoint_dir = self.checkpoint_dir
    FLAGS.eval_dir = tf.test.get_temp_dir()
    FLAGS.max_evaluations = 1
    FLAGS.max_examples = 2
    FLAGS.model_name = model_name
    FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt'
    # Always try to read in compressed inputs to stress that case. Uncompressed
    # inputs are certain to work. This test is expensive to run, so we want to
    # minimize the number of times we need to run this.
    mock_get_dataset.return_value = data_providers_test.make_golden_dataset(
        compressed_inputs=True)
    model_eval.main(0)
    mock_get_dataset.assert_called_once_with(FLAGS.dataset_config_pbtxt)
    def test_end2end(self, model_name, mock_get_dataset):
        """End-to-end test of model_eval."""
        self._write_fake_checkpoint(model_name)

        # Start up eval, loading that checkpoint.
        FLAGS.batch_size = 2
        FLAGS.checkpoint_dir = self.checkpoint_dir
        FLAGS.eval_dir = tf.test.get_temp_dir()
        FLAGS.max_evaluations = 1
        FLAGS.max_examples = 2
        FLAGS.model_name = model_name
        FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt'
        # Always try to read in compressed inputs to stress that case. Uncompressed
        # inputs are certain to work. This test is expensive to run, so we want to
        # minimize the number of times we need to run this.
        mock_get_dataset.return_value = data_providers_test.make_golden_dataset(
            compressed_inputs=True)
        model_eval.main(0)
        mock_get_dataset.assert_called_once_with(FLAGS.dataset_config_pbtxt)
    def test_end2end(self, model_name, mock_get_dataset):
        """End-to-end test of model_eval."""
        checkpoint_dir = tf.test.get_temp_dir()

        # Create a model with 3 classes, and save it to our checkpoint dir.
        with self.test_session() as sess:
            model = modeling.get_model(model_name)
            # Needed to protect ourselves for models without an input image shape.
            h, w = getattr(model, 'input_image_shape', (100, 221))
            images = tf.placeholder(tf.float32,
                                    shape=(4, h, w,
                                           pileup_image.DEFAULT_NUM_CHANNEL))
            model.create(images, num_classes=3, is_training=True)
            # This is gross, but necessary as model_eval assumes the model was trained
            # with model_train which uses exp moving averages. Unfortunately we cannot
            # just call into model_train as it uses FLAGS which conflict with the
            # flags in use by model_eval. So we inline the creation of the EMA here.
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, slim.get_or_create_global_step())
            tf.add_to_collection(
                tf.GraphKeys.UPDATE_OPS,
                variable_averages.apply(slim.get_model_variables()))
            sess.run(tf.global_variables_initializer())
            save = tf.train.Saver(slim.get_variables())
            save.save(sess, os.path.join(checkpoint_dir, 'model'))

        # Start up eval, loading that checkpoint.
        FLAGS.batch_size = 2
        FLAGS.checkpoint_dir = checkpoint_dir
        FLAGS.eval_dir = tf.test.get_temp_dir()
        FLAGS.batches_per_eval_step = 1
        FLAGS.max_evaluations = 1
        FLAGS.eval_interval_secs = 0
        FLAGS.model_name = model_name
        FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt'
        # Always try to read in compressed inputs to stress that case. Uncompressed
        # inputs are certain to work. This test is expensive to run, so we want to
        # minimize the number of times we need to run this.
        mock_get_dataset.return_value = data_providers_test.make_golden_dataset(
            compressed_inputs=True)
        model_eval.main(0)
        mock_get_dataset.assert_called_once_with(FLAGS.dataset_config_pbtxt)
Exemple #10
0
 def test_end2end_inception_v3_warm_up_by_default_fail_diff_num_channels(self):
   """End-to-end test of model_train script."""
   checkpoint_dir = tf_test_utils.test_tmpdir(
       'test_end2end_inception_v3_warm_up_by_default_fail_diff_num_channels')
   tf_test_utils.write_fake_checkpoint(
       'inception_v3',
       self.test_session(),
       checkpoint_dir,
       num_channels=dv_constants.PILEUP_NUM_CHANNELS + 1)
   with self.assertRaisesRegex(
       ValueError,
       r'Shape of variable InceptionV3/Conv2d_1a_3x3/weights:0 \(\(.*\)\) '
       r'doesn\'t match with shape of tensor '
       r'InceptionV3/Conv2d_1a_3x3/weights \(\[.*\]\) from checkpoint reader.'
   ):
     self._run_tiny_training(
         model_name='inception_v3',
         dataset=data_providers_test.make_golden_dataset(
             use_tpu=FLAGS.use_tpu),
         warm_start_from=checkpoint_dir + '/model')
 def test_training_works_with_compressed_inputs(self):
   """End-to-end test of model_train script."""
   self._run_tiny_training(
       model_name='mobilenet_v1',
       dataset=data_providers_test.make_golden_dataset(compressed_inputs=True))
 def test_end2end(self, model_name):
   """End-to-end test of model_train script."""
   self._run_tiny_training(
       model_name=model_name,
       dataset=data_providers_test.make_golden_dataset())
Exemple #13
0
  def test_fixed_eval_sees_the_same_evals(self, mock_get_dataset,
                                          mock_checkpoints_iterator):
    dataset = data_providers_test.make_golden_dataset()
    n_checkpoints = 3
    checkpoints = [
        self._write_fake_checkpoint('constant', name='model' + str(i))
        for i in range(n_checkpoints)
    ]

    # Setup our mocks.
    mock_checkpoints_iterator.return_value = checkpoints
    mock_get_dataset.return_value = dataset

    # Start up eval, loading that checkpoint.
    FLAGS.batch_size = 2
    FLAGS.checkpoint_dir = self.checkpoint_dir
    FLAGS.eval_dir = tf.test.get_temp_dir()
    FLAGS.max_evaluations = n_checkpoints
    FLAGS.model_name = 'constant'
    FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt'
    model_eval.main(0)

    self.assertEqual(mock_get_dataset.call_args_list,
                     [mock.call(FLAGS.dataset_config_pbtxt)] * n_checkpoints)

    metrics = [
        model_eval.read_metrics(checkpoint, eval_dir=FLAGS.eval_dir)
        for checkpoint in checkpoints
    ]

    # Check that our metrics are what we expect them to be.
    # See b/62864044 for details on how to compute these counts:
    # Counts of labels in our golden dataset:
    #  1 0
    # 12 1
    # 35 2
    expected_values_for_all_exact = {
        # We have 12 correct calls [there are 12 variants with a label of 1] and
        # 1 label 0 + 35 with a label of 2, so we have an accuracy of 12 / 48,
        # which is 0.25.
        'Accuracy/All': 0.25,
        # We don't have any FNs because we call everything het.
        'FNs/All': 0,
        # One of our labels is 0, which we call het, giving us 1 FP.
        'FPs/All': 1.0,
        # We call everything as het, so the recall has to be 1.
        'Recall/All': 1.0,
        # redacted
        # # We don't call anything but hets, so TNs has to be 0.
        # 'TNs/All': 0,
        # We find all positives, so this has to be 47.
        'TPs/All': 47,
    }
    for key, expected_value in expected_values_for_all_exact.iteritems():
      self.assertEqual(metrics[0][key], expected_value)

    expected_values_for_all_close = {
        # We called 47 / 48 correctly.
        'Precision/All': 47. / 48,
    }
    for key, expected_value in expected_values_for_all_close.iteritems():
      self.assertAlmostEqual(metrics[0][key], expected_value, places=6)

    for m1, m2 in zip(metrics, metrics[1:]):
      self.assertEqual(m1, m2)
Exemple #14
0
    def test_fixed_eval_sees_the_same_evals(self,
                                            mock_get_input_fn_from_dataset,
                                            mock_checkpoints_iterator):
        dataset = data_providers_test.make_golden_dataset(
            use_tpu=FLAGS.use_tpu)
        n_checkpoints = 3
        checkpoints = [
            tf_test_utils.write_fake_checkpoint('constant',
                                                self.test_session(),
                                                self.checkpoint_dir,
                                                FLAGS.moving_average_decay,
                                                name='model' + str(i))
            for i in range(n_checkpoints)
        ]

        # Setup our mocks.
        mock_checkpoints_iterator.return_value = checkpoints
        mock_get_input_fn_from_dataset.return_value = dataset

        # Start up eval, loading that checkpoint.
        FLAGS.batch_size = 2
        FLAGS.checkpoint_dir = self.checkpoint_dir
        FLAGS.eval_name = self.eval_name
        FLAGS.max_evaluations = n_checkpoints
        FLAGS.model_name = 'constant'
        FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt'
        FLAGS.master = ''
        model_eval.main(0)

        self.assertEqual(mock_get_input_fn_from_dataset.call_args_list, [
            mock.call(use_tpu=FLAGS.use_tpu,
                      dataset_config_filename=FLAGS.dataset_config_pbtxt,
                      mode=tf.estimator.ModeKeys.EVAL)
        ])

        metrics = [
            model_eval.read_metrics(checkpoint, eval_name=FLAGS.eval_name)
            for checkpoint in checkpoints
        ]

        # Check that our metrics are what we expect them to be.
        # See b/62864044 for details on how to compute these counts:
        # Counts of labels in our golden dataset:
        #  1 0
        # 12 1
        # 35 2
        expected_values_for_all_exact = {
            # We have 12 correct calls [there are 12 variants with a label of 1] and
            # 1 label 0 + 35 with a label of 2, so we have an accuracy of 12 / 48,
            # which is 0.25.
            'Accuracy/All': 0.25,
            # We don't have any FNs because we call everything het.
            'FNs/All': 0,
            # Two of our labels are 0, which we call het, giving us 2 FP.
            'FPs/All': 1.0,
            # We call everything as het, so the recall has to be 1.
            'Recall/All': 1.0,
            # redacted
            # # We don't call anything but hets, so TNs has to be 0.
            # 'TNs/All': 0,
            # We find 47 positives, so this has to be 47.
            'TPs/All': 47,
        }
        for key, expected_value in expected_values_for_all_exact.iteritems():
            print(str(key) + '=' + str(metrics[0][key]))

        for key, expected_value in expected_values_for_all_exact.iteritems():
            self.assertEqual(metrics[0][key], expected_value)

        expected_values_for_all_close = {
            # We called 47 / 48 correctly ~ 0.979167
            'Precision/All': 0.979167,
            # We called (2 * 47 / 48) / (1 + 47 / 48) correctly ~ 0.989474
            'F1/All': 0.989474,
        }
        for key, expected_value in expected_values_for_all_close.iteritems():
            self.assertAlmostEqual(metrics[0][key], expected_value, places=6)

        for m1, m2 in zip(metrics, metrics[1:]):
            self.assertEqual(m1, m2)
Exemple #15
0
 def test_training_works_with_compressed_inputs(self):
     """End-to-end test of model_train script."""
     self._run_tiny_training(
         model_name='mobilenet_v1',
         dataset=data_providers_test.make_golden_dataset(
             compressed_inputs=True, use_tpu=FLAGS.use_tpu))
Exemple #16
0
 def test_end2end(self, model_name):
     """End-to-end test of model_train script."""
     self._run_tiny_training(
         model_name=model_name,
         dataset=data_providers_test.make_golden_dataset(
             use_tpu=FLAGS.use_tpu))
    def test_fixed_eval_sees_the_same_evals(self, mock_get_dataset,
                                            mock_checkpoints_iterator):
        dataset = data_providers_test.make_golden_dataset()
        n_checkpoints = 3
        checkpoints = [
            self._write_fake_checkpoint('constant', name='model' + str(i))
            for i in range(n_checkpoints)
        ]

        # Setup our mocks.
        mock_checkpoints_iterator.return_value = checkpoints
        mock_get_dataset.return_value = dataset

        # Start up eval, loading that checkpoint.
        FLAGS.batch_size = 2
        FLAGS.checkpoint_dir = self.checkpoint_dir
        FLAGS.eval_dir = tf.test.get_temp_dir()
        FLAGS.max_evaluations = n_checkpoints
        FLAGS.model_name = 'constant'
        FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt'
        model_eval.main(0)

        self.assertEqual(mock_get_dataset.call_args_list,
                         [mock.call(FLAGS.dataset_config_pbtxt)] *
                         n_checkpoints)

        metrics = [
            model_eval.read_metrics(checkpoint, eval_dir=FLAGS.eval_dir)
            for checkpoint in checkpoints
        ]

        # Check that our metrics are what we expect them to be.
        # See b/62864044 for details on how to compute these counts:
        # Counts of labels in our golden dataset:
        #  1 0
        # 12 1
        # 35 2
        expected_values_for_all_exact = {
            # We have 12 correct calls [there are 12 variants with a label of 1] and
            # 1 label 0 + 35 with a label of 2, so we have an accuracy of 12 / 48,
            # which is 0.25.
            'Accuracy/All': 0.25,
            # We don't have any FNs because we call everything het.
            'FNs/All': 0,
            # One of our labels is 0, which we call het, giving us 1 FP.
            'FPs/All': 1.0,
            # We call everything as het, so the recall has to be 1.
            'Recall/All': 1.0,
            # redacted
            # # We don't call anything but hets, so TNs has to be 0.
            # 'TNs/All': 0,
            # We find all positives, so this has to be 47.
            'TPs/All': 47,
        }
        for key, expected_value in expected_values_for_all_exact.iteritems():
            self.assertEqual(metrics[0][key], expected_value)

        expected_values_for_all_close = {
            # We called 47 / 48 correctly.
            'Precision/All': 47. / 48,
        }
        for key, expected_value in expected_values_for_all_close.iteritems():
            self.assertAlmostEqual(metrics[0][key], expected_value, places=6)

        for m1, m2 in zip(metrics, metrics[1:]):
            self.assertEqual(m1, m2)