def write_fake_checkpoint(model_name,
                          session,
                          checkpoint_dir,
                          moving_average_decay=_MOVING_AVERAGE_DECAY,
                          name='model',
                          height=dv_constants.PILEUP_DEFAULT_HEIGHT,
                          width=dv_constants.PILEUP_DEFAULT_WIDTH,
                          num_channels=dv_constants.PILEUP_NUM_CHANNELS):
  """Writes a fake TensorFlow checkpoint to checkpoint_dir."""
  path = os.path.join(checkpoint_dir, name)
  with session as sess:
    model = modeling.get_model(model_name)
    # Needed to protect ourselves for models without an input image shape.
    h, w = getattr(model, 'input_image_shape', (height, width))
    images = tf.compat.v1.placeholder(tf.float32, shape=(4, h, w, num_channels))
    model.create(images, num_classes=3, is_training=True)
    # This is gross, but necessary as model_eval assumes the model was trained
    # with model_train which uses exp moving averages. Unfortunately we cannot
    # just call into model_train as it uses FLAGS which conflict with the
    # flags in use by model_eval. So we inline the creation of the EMA here.
    variable_averages = tf.train.ExponentialMovingAverage(
        moving_average_decay, tf.compat.v1.train.get_or_create_global_step())
    tf.compat.v1.add_to_collection(
        tf.compat.v1.GraphKeys.UPDATE_OPS,
        variable_averages.apply(slim.get_model_variables()))
    sess.run(tf.compat.v1.global_variables_initializer())
    save = tf.compat.v1.train.Saver(slim.get_variables())
    save.save(sess, path)
  return path
  def _write_fake_checkpoint(self,
                             model_name,
                             checkpoint_dir=None,
                             name='model'):
    if checkpoint_dir is None:
      checkpoint_dir = self.checkpoint_dir

    path = os.path.join(checkpoint_dir, name)
    with self.test_session() as sess:
      model = modeling.get_model(model_name)
      # Needed to protect ourselves for models without an input image shape.
      h, w = getattr(model, 'input_image_shape', (100, 221))
      images = tf.placeholder(
          tf.float32, shape=(4, h, w, pileup_image.DEFAULT_NUM_CHANNEL))
      model.create(images, num_classes=3, is_training=True)
      # This is gross, but necessary as model_eval assumes the model was trained
      # with model_train which uses exp moving averages. Unfortunately we cannot
      # just call into model_train as it uses FLAGS which conflict with the
      # flags in use by model_eval. So we inline the creation of the EMA here.
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, tf.train.get_or_create_global_step())
      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS,
                           variable_averages.apply(
                               tf.contrib.framework.get_model_variables()))
      sess.run(tf.global_variables_initializer())
      save = tf.train.Saver(tf.contrib.framework.get_variables())
      save.save(sess, path)
    return path
 def test_call_variants_with_empty_input(self):
     source_path = test_utils.test_tmpfile('empty.tfrecord')
     io_utils.write_tfrecords([], source_path)
     # Make sure that prepare_inputs don't crash on empty input.
     call_variants.prepare_inputs(source_path,
                                  modeling.get_model('random_guess'),
                                  batch_size=1)
Exemple #4
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: call_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.
        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        if FLAGS.use_tpu:
            master = tf_utils.resolve_master(FLAGS.master, FLAGS.tpu_name,
                                             FLAGS.tpu_zone, FLAGS.gcp_project)
        else:
            master = ''

        model = modeling.get_model(FLAGS.model_name)
        call_variants(
            examples_filename=FLAGS.examples,
            checkpoint_path=FLAGS.checkpoint,
            model=model,
            execution_hardware=FLAGS.execution_hardware,
            output_file=FLAGS.outfile,
            max_batches=FLAGS.max_batches,
            batch_size=FLAGS.batch_size,
            master=master,
            use_tpu=FLAGS.use_tpu,
        )
Exemple #5
0
def main(_):
    tensor_shape = [FLAGS.height, FLAGS.width, FLAGS.channels]
    logging.info('Processing ckpt=%s, tensor_shape=%s.', FLAGS.checkpoint,
                 tensor_shape)
    freeze_graph(modeling.get_model('inception_v3'), FLAGS.checkpoint,
                 tensor_shape, FLAGS.output)
    logging.info('Output written to %s.', FLAGS.output)
    def _write_fake_checkpoint(self,
                               model_name,
                               checkpoint_dir=None,
                               name='model'):
        if checkpoint_dir is None:
            checkpoint_dir = self.checkpoint_dir

        path = os.path.join(checkpoint_dir, name)
        with self.test_session() as sess:
            model = modeling.get_model(model_name)
            # Needed to protect ourselves for models without an input image shape.
            h, w = getattr(model, 'input_image_shape', (100, 221))
            images = tf.placeholder(tf.float32,
                                    shape=(4, h, w,
                                           pileup_image.DEFAULT_NUM_CHANNEL))
            model.create(images, num_classes=3, is_training=True)
            # This is gross, but necessary as model_eval assumes the model was trained
            # with model_train which uses exp moving averages. Unfortunately we cannot
            # just call into model_train as it uses FLAGS which conflict with the
            # flags in use by model_eval. So we inline the creation of the EMA here.
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay,
                tf.train.get_or_create_global_step())
            tf.add_to_collection(
                tf.GraphKeys.UPDATE_OPS,
                variable_averages.apply(
                    tf.contrib.framework.get_model_variables()))
            sess.run(tf.global_variables_initializer())
            save = tf.train.Saver(tf.contrib.framework.get_variables())
            save.save(sess, path)
        return path
Exemple #7
0
def run(target, unused_is_chief, device_fn, use_tpu):
  """Run training.

  Args:
     target: The target of the TensorFlow standard server to use. Can be the
       empty string to run locally using an inprocess server.
     device_fn: Device function used to assign ops to devices.
     use_tpu: turn on tpu code path.
  """
  if not FLAGS.dataset_config_pbtxt:
    logging.error('Need to specify --dataset_config_pbtxt')
    return

  g = tf.Graph()
  with g.as_default():
    with tf.device(device_fn):
      # If ps_tasks is zero, the local device is used. When using multiple
      # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
      # across the different devices.

      tf_dataset = data_providers.get_input_fn_from_dataset(
          dataset_config_filename=FLAGS.dataset_config_pbtxt,
          mode=tf.estimator.ModeKeys.TRAIN,
          max_examples=FLAGS.max_examples,
          use_tpu=use_tpu)
      model = modeling.get_model(FLAGS.model_name)
      logging.info('Running training on %s with model %s and tpu %s',
                   tf_dataset, FLAGS.model_name, use_tpu)

      batches_per_epoch = tf_dataset.num_examples // FLAGS.batch_size
      logging.info('Batches per epoch %s', batches_per_epoch)
      params = dict(batches_per_epoch=batches_per_epoch,)
      estimator = model.make_estimator(
          batch_size=FLAGS.batch_size,
          model_dir=FLAGS.train_dir,
          params=params,
          use_tpu=use_tpu,
          master=target,
          start_from_checkpoint=FLAGS.start_from_checkpoint,
      )

      training_hooks = None
      if FLAGS.use_early_stopping:
        # redacted
        raise ValueError('Currently not implemented.')

      estimator.train(
          input_fn=tf_dataset,
          max_steps=FLAGS.number_of_steps,
          hooks=training_hooks)
 def assertCallVariantsEmitsNRecordsForRandomGuess(self, filename,
                                                   num_examples):
   outfile = test_utils.test_tmpfile('call_variants.tfrecord')
   model = modeling.get_model('random_guess')
   call_variants.call_variants(
       examples_filename=filename,
       checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
       model=model,
       output_file=outfile,
       batch_size=4,
       max_batches=None)
   call_variants_outputs = list(
       io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput))
   # Check that we have the right number of output protos.
   self.assertEqual(len(call_variants_outputs), num_examples)
 def assertCallVariantsEmitsNRecordsForRandomGuess(self, filename,
                                                   num_examples):
   outfile = test_utils.test_tmpfile('call_variants.tfrecord')
   model = modeling.get_model('random_guess')
   call_variants.call_variants(
       examples_filename=filename,
       checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
       model=model,
       output_file=outfile,
       batch_size=4,
       max_batches=None)
   call_variants_outputs = list(
       io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput))
   # Check that we have the right number of output protos.
   self.assertEqual(len(call_variants_outputs), num_examples)
Exemple #10
0
    def test_call_variants_with_empty_input(self):
        source_path = test_utils.test_tmpfile('empty.tfrecord')
        tfrecord.write_tfrecords([], source_path)
        # Make sure that prepare_inputs don't crash on empty input.
        ds = call_variants.prepare_inputs(source_path)
        m = modeling.get_model('random_guess')

        # The API specifies that OutOfRangeError is thrown in this case.
        batches = list(_get_infer_batches(ds, model=m, batch_size=1))
        with self.test_session() as sess:
            sess.run(tf.compat.v1.local_variables_initializer())
            sess.run(tf.compat.v1.global_variables_initializer())
            try:
                _ = sess.run(batches)
            except tf.errors.OutOfRangeError:
                pass
Exemple #11
0
  def assertCallVariantsEmitsNRecordsForInceptionV3(self, filename,
                                                    num_examples):
    outfile = test_utils.test_tmpfile('inception_v3.call_variants.tfrecord')
    model = modeling.get_model('inception_v3')
    checkpoint_path = _LEAVE_MODEL_UNINITIALIZED

    call_variants.call_variants(
        examples_filename=filename,
        checkpoint_path=checkpoint_path,
        model=model,
        output_file=outfile,
        batch_size=4,
        max_batches=None)
    call_variants_outputs = list(
        io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput))
    # Check that we have the right number of output protos.
    self.assertEqual(len(call_variants_outputs), num_examples)
Exemple #12
0
 def assertCallVariantsEmitsNRecordsForRandomGuess(self, filename,
                                                   num_examples):
     checkpoint_path = _LEAVE_MODEL_UNINITIALIZED
     outfile = test_utils.test_tmpfile('call_variants.tfrecord')
     model = modeling.get_model('random_guess')
     call_variants.call_variants(examples_filename=filename,
                                 checkpoint_path=checkpoint_path,
                                 model=model,
                                 output_file=outfile,
                                 batch_size=4,
                                 max_batches=None,
                                 master='',
                                 use_tpu=FLAGS.use_tpu)
     call_variants_outputs = list(
         tfrecord.read_tfrecords(outfile,
                                 deepvariant_pb2.CallVariantsOutput))
     # Check that we have the right number of output protos.
     self.assertEqual(len(call_variants_outputs), num_examples)
    def test_end2end(self, model_name, mock_get_dataset):
        """End-to-end test of model_eval."""
        checkpoint_dir = tf.test.get_temp_dir()

        # Create a model with 3 classes, and save it to our checkpoint dir.
        with self.test_session() as sess:
            model = modeling.get_model(model_name)
            # Needed to protect ourselves for models without an input image shape.
            h, w = getattr(model, 'input_image_shape', (100, 221))
            images = tf.placeholder(tf.float32,
                                    shape=(4, h, w,
                                           pileup_image.DEFAULT_NUM_CHANNEL))
            model.create(images, num_classes=3, is_training=True)
            # This is gross, but necessary as model_eval assumes the model was trained
            # with model_train which uses exp moving averages. Unfortunately we cannot
            # just call into model_train as it uses FLAGS which conflict with the
            # flags in use by model_eval. So we inline the creation of the EMA here.
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, slim.get_or_create_global_step())
            tf.add_to_collection(
                tf.GraphKeys.UPDATE_OPS,
                variable_averages.apply(slim.get_model_variables()))
            sess.run(tf.global_variables_initializer())
            save = tf.train.Saver(slim.get_variables())
            save.save(sess, os.path.join(checkpoint_dir, 'model'))

        # Start up eval, loading that checkpoint.
        FLAGS.batch_size = 2
        FLAGS.checkpoint_dir = checkpoint_dir
        FLAGS.eval_dir = tf.test.get_temp_dir()
        FLAGS.batches_per_eval_step = 1
        FLAGS.max_evaluations = 1
        FLAGS.eval_interval_secs = 0
        FLAGS.model_name = model_name
        FLAGS.dataset_config_pbtxt = '/path/to/mock.pbtxt'
        # Always try to read in compressed inputs to stress that case. Uncompressed
        # inputs are certain to work. This test is expensive to run, so we want to
        # minimize the number of times we need to run this.
        mock_get_dataset.return_value = data_providers_test.make_golden_dataset(
            compressed_inputs=True)
        model_eval.main(0)
        mock_get_dataset.assert_called_once_with(FLAGS.dataset_config_pbtxt)
Exemple #14
0
def main(argv=()):
  with errors.clean_commandline_error_exit():
    if len(argv) > 1:
      errors.log_and_raise(
          'Command line parsing failure: call_variants does not accept '
          'positional arguments but some are present on the command line: '
          '"{}".'.format(str(argv)), errors.CommandLineError)
    del argv  # Unused.
    proto_utils.uses_fast_cpp_protos_or_die()

    logging_level.set_from_flag()

    model = modeling.get_model(FLAGS.model_name)
    call_variants(
        examples_filename=FLAGS.examples,
        checkpoint_path=FLAGS.checkpoint,
        model=model,
        execution_hardware=FLAGS.execution_hardware,
        output_file=FLAGS.outfile,
        max_batches=FLAGS.max_batches,
        batch_size=FLAGS.batch_size)
Exemple #15
0
def main(argv=()):
    with errors.clean_commandline_error_exit():
        if len(argv) > 1:
            errors.log_and_raise(
                'Command line parsing failure: call_variants does not accept '
                'positional arguments but some are present on the command line: '
                '"{}".'.format(str(argv)), errors.CommandLineError)
        del argv  # Unused.
        proto_utils.uses_fast_cpp_protos_or_die()

        logging_level.set_from_flag()

        # Give htslib authentication access to GCS.
        htslib_gcp_oauth.init()

        model = modeling.get_model(FLAGS.model_name)
        call_variants(examples_filename=FLAGS.examples,
                      checkpoint_path=FLAGS.checkpoint,
                      model=model,
                      execution_hardware=FLAGS.execution_hardware,
                      output_file=FLAGS.outfile,
                      max_batches=FLAGS.max_batches,
                      batch_size=FLAGS.batch_size)
 def setUpClass(cls):
   cls.model = modeling.get_model('mobilenet_v1')
 def setUpClass(cls):
   cls.model = modeling.get_model('resnet_v2_50')
def eval_loop(master,
              dataset_config_pbtxt,
              checkpoint_dir,
              model_name,
              batch_size,
              max_examples,
              eval_name,
              max_evaluations,
              use_tpu=False):
    """Evaluate incoming checkpoints, until the specified end."""
    logging.info('Running fixed eval for: %s', dataset_config_pbtxt)

    tf_dataset = data_providers.get_input_fn_from_dataset(
        dataset_config_filename=dataset_config_pbtxt,
        mode=tf.estimator.ModeKeys.EVAL,
        use_tpu=use_tpu,
    )

    best_ckpt = None
    ckpt_metric = FLAGS.best_checkpoint_metric
    ckpt_metric_increasing = ckpt_metric in increasing_metrics

    model = modeling.get_model(model_name)
    logging.info('Running evaluations on %s with model %s', tf_dataset, model)

    # Compute when to stop reading, in terms of batches.
    num_examples = tf_dataset.num_examples
    if max_examples is not None:
        num_examples = min(max_examples, num_examples)
    num_batches = num_examples // batch_size
    num_samples = batch_size * num_batches
    logging.info(
        'Dataset has %s samples, doing eval over %s; '
        'max_examples is %s, num examples to be used %s; num_batches is %s',
        tf_dataset.num_examples, num_samples, max_examples, num_examples,
        num_batches)

    # This loads EMA variables.
    eval_hooks = [h(checkpoint_dir) for h in model.session_eval_hooks()]

    classifier = model.make_estimator(batch_size=batch_size,
                                      model_dir=checkpoint_dir,
                                      use_tpu=use_tpu,
                                      master=master)

    def terminate_eval():
        logging.info('Terminating eval after %d seconds of no checkpoints',
                     FLAGS.eval_timeout)
        return True

    # Run evaluation when there's a new checkpoint
    num_evaluations = 0
    for ckpt in checkpoints_iterator(
            checkpoint_dir=checkpoint_dir,
            min_interval_secs=FLAGS.min_eval_interval_s,
            timeout=FLAGS.eval_timeout,
            timeout_fn=terminate_eval):

        logging.info('Starting to evaluate.')

        # For each step, calls input_fn, which returns one batch of data.
        # Evaluates until either steps batches are processed, or input_fn raises an
        # end-of-input exception (OutOfRangeError or StopIteration).
        eval_results = classifier.evaluate(input_fn=tf_dataset,
                                           steps=num_batches,
                                           hooks=eval_hooks,
                                           checkpoint_path=ckpt,
                                           name=eval_name)
        logging.info('Eval results: %s', eval_results)

        # Track best checkpoint seen so far, measured by ckpt_metric.
        if not best_ckpt:
            # If the training jobs died, pick up where we left off.
            try:
                best_metrics = read_metrics(ckpt, eval_name,
                                            'best_checkpoint.metrics')
                logging.info('Found existing best_checkpoint: %s',
                             best_metrics)
                best_ckpt = (best_metrics, ckpt)
            except NotFoundError:
                logging.info('best_checkpoint file does not exist.')
                best_ckpt = (eval_results, ckpt)
                _write_best_checkpoint(ckpt, eval_results, eval_name)
        if ((ckpt_metric_increasing
             and eval_results[ckpt_metric] > best_ckpt[0][ckpt_metric]) or
            (not ckpt_metric_increasing
             and eval_results[ckpt_metric] < best_ckpt[0][ckpt_metric])):
            best_ckpt = (eval_results, ckpt)
            _write_best_checkpoint(ckpt, eval_results, eval_name)

        _write_checkpoint_metrics(ckpt, eval_results, eval_name)

        # An alternative strategy might check step-number-of-ckpt >= train_steps.
        num_evaluations += 1
        if max_evaluations is not None and num_evaluations >= max_evaluations:
            logging.info('Evaluation finished after %d evaluations',
                         num_evaluations)
            break

    return
 def test_call_variants_with_empty_input(self):
   source_path = test_utils.test_tmpfile('empty.tfrecord')
   io_utils.write_tfrecords([], source_path)
   # Make sure that prepare_inputs don't crash on empty input.
   call_variants.prepare_inputs(
       source_path, modeling.get_model('random_guess'), batch_size=1)
Exemple #20
0
def run(target, is_chief, device_fn):
  """Run training.

  Args:
     target: The target of the TensorFlow standard server to use. Can be the
       empty string to run locally using an inprocess server.
     is_chief: Boolean indicating whether this process is the chief.
     device_fn: Device function used to assign ops to devices.
  """
  if not FLAGS.dataset_config_pbtxt:
    logging.error('Need to specify --dataset_config_pbtxt')
    return

  g = tf.Graph()
  with g.as_default():
    model = modeling.get_model(FLAGS.model_name)
    dataset = data_providers.get_dataset(FLAGS.dataset_config_pbtxt)
    print('Running training on {} with model {}\n'.format(dataset, model))

    with tf.device(device_fn):
      # If ps_tasks is zero, the local device is used. When using multiple
      # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
      # across the different devices.
      images, labels, _ = data_providers.make_batches(
          dataset.get_slim_dataset(), model, FLAGS.batch_size, mode='TRAIN')
      endpoints = model.create(images, dataset.num_classes, is_training=True)
      labels = slim.one_hot_encoding(labels, dataset.num_classes)
      total_loss = loss(
          endpoints['Logits'], labels, label_smoothing=FLAGS.label_smoothing)

      # Setup the moving averages:
      moving_average_variables = slim.get_model_variables()
      moving_average_variables.extend(slim.losses.get_losses())
      moving_average_variables.append(total_loss)

      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, slim.get_or_create_global_step())

      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS,
                           variable_averages.apply(moving_average_variables))

      # Configure the learning rate using an exponetial decay.
      decay_steps = int(((1.0 * dataset.num_examples) / FLAGS.batch_size) *
                        FLAGS.num_epochs_per_decay)

      learning_rate = tf.train.exponential_decay(
          FLAGS.learning_rate,
          slim.get_or_create_global_step(),
          decay_steps,
          FLAGS.learning_rate_decay_factor,
          staircase=True)

      opt = tf.train.RMSPropOptimizer(learning_rate, FLAGS.rmsprop_decay,
                                      FLAGS.rmsprop_momentum,
                                      FLAGS.rmsprop_epsilon)

      # Create training op
      train_tensor = slim.learning.create_train_op(
          total_loss,
          optimizer=opt,
          update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS))

      # Summaries:
      slim.summaries.add_histogram_summaries(slim.get_model_variables())
      slim.summaries.add_scalar_summaries(slim.losses.get_losses(), 'losses')
      slim.summaries.add_scalar_summary(total_loss, 'Total_Loss', 'losses')
      slim.summaries.add_scalar_summary(learning_rate, 'Learning_Rate',
                                        'training')
      slim.summaries.add_histogram_summaries(endpoints.values())
      slim.summaries.add_zero_fraction_summaries(endpoints.values())
      # redacted

      # Set start-up delay
      startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps

      init_fn = model_init_function(model, dataset.num_classes,
                                    FLAGS.start_from_checkpoint)

      saver = tf.train.Saver(
          max_to_keep=FLAGS.max_checkpoints_to_keep,
          keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours)

      # Train model
      slim.learning.train(
          train_tensor,
          number_of_steps=FLAGS.number_of_steps,
          logdir=FLAGS.train_dir,
          master=target,
          init_fn=init_fn,
          is_chief=is_chief,
          saver=saver,
          startup_delay_steps=startup_delay_steps,
          save_summaries_secs=FLAGS.save_summaries_secs,
          save_interval_secs=FLAGS.save_interval_secs)
Exemple #21
0
def eval_loop(master,
              dataset_config_pbtxt,
              checkpoint_dir,
              model_name,
              batch_size,
              max_examples,
              eval_name,
              max_evaluations,
              use_tpu=False):
    """Evaluate incoming checkpoints, until the specified end."""
    logging.info('Running fixed eval for: %s', dataset_config_pbtxt)

    tf_dataset = data_providers.get_input_fn_from_dataset(
        dataset_config_filename=dataset_config_pbtxt,
        mode=tf.estimator.ModeKeys.EVAL,
        use_tpu=use_tpu,
    )

    model = modeling.get_model(model_name)
    logging.info('Running evaluations on %s with model %s', tf_dataset, model)

    # Compute when to stop reading, in terms of batches.
    num_batches = min(max_examples, tf_dataset.num_examples) // batch_size
    num_samples = batch_size * num_batches
    logging.info(
        'Dataset has %d samples, doing eval over %d; '
        'max_examples is %d, num_batches is %d', tf_dataset.num_examples,
        num_samples, max_examples, num_batches)
    batches_per_epoch = tf_dataset.num_examples / batch_size

    # This loads EMA variables.
    eval_hooks = [h(checkpoint_dir) for h in model.session_eval_hooks()]

    classifier = model.make_estimator(
        batch_size=batch_size,
        model_dir=checkpoint_dir,
        params={'batches_per_epoch': batches_per_epoch},
        use_tpu=use_tpu,
        master=master,
    )

    def terminate_eval():
        logging.info('Terminating eval after %d seconds of no checkpoints',
                     FLAGS.eval_timeout)
        return True

    # Run evaluation when there's a new checkpoint
    num_evaluations = 0
    for ckpt in checkpoints_iterator(
            checkpoint_dir=checkpoint_dir,
            min_interval_secs=FLAGS.min_eval_interval_s,
            timeout=FLAGS.eval_timeout,
            timeout_fn=terminate_eval):

        logging.info('Starting to evaluate.')

        # For each step, calls input_fn, which returns one batch of data.
        # Evaluates until either steps batches are processed, or input_fn raises an
        # end-of-input exception (OutOfRangeError or StopIteration).
        eval_results = classifier.evaluate(input_fn=tf_dataset,
                                           steps=num_batches,
                                           hooks=eval_hooks,
                                           checkpoint_path=ckpt,
                                           name=eval_name)
        logging.info('Eval results: %s', eval_results)

        _write_checkpoint_metrics(ckpt, eval_results, eval_name)

        # An alternative strategy might check step-number-of-ckpt >= train_steps.
        num_evaluations += 1
        if max_evaluations is not None and num_evaluations >= max_evaluations:
            logging.info('Evaluation finished after %d evaluations',
                         num_evaluations)
            break

    return
Exemple #22
0
 def setUpClass(cls):
     super(InceptionV3ModelTest, cls).setUpClass()
     cls.model = modeling.get_model('inception_v3')
Exemple #23
0
def eval_loop(master, dataset_config_pbtxt, checkpoint_dir, model_name,
              batch_size, moving_average_decay, max_examples, eval_dir,
              max_evaluations):
    logging.info('Running fixed eval for: %s', dataset_config_pbtxt)

    num_evaluations = 0
    for checkpoint_path in checkpoints_iterator(checkpoint_dir):
        logging.info('Using checkpoint %s %d', checkpoint_path,
                     num_evaluations)

        g = tf.Graph()
        with g.as_default():
            tf_global_step = tf.train.get_or_create_global_step()

            # redacted
            model = modeling.get_model(model_name)
            dataset = data_providers.get_dataset(dataset_config_pbtxt)
            logging.info('Running evaluations on %s with model %s', dataset,
                         model)

            images, labels, encoded_variant = data_providers.make_batches(
                dataset.get_slim_dataset(), model, batch_size, mode='EVAL')
            endpoints = model.create(images,
                                     dataset.num_classes,
                                     is_training=False)
            predictions = tf.argmax(endpoints['Predictions'], 1)

            # For eval, explicitly add moving_mean and moving_variance variables to
            # the MOVING_AVERAGE_VARIABLES collection.
            variable_averages = tf.train.ExponentialMovingAverage(
                moving_average_decay, tf_global_step)

            for var in tf.get_collection('moving_vars'):
                tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
                                     var)
            for var in slim.get_model_variables():
                tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
                                     var)

            variables_to_restore = variable_averages.variables_to_restore()
            variables_to_restore[tf_global_step.op.name] = tf_global_step

            names_to_values, names_to_updates = make_metrics(
                predictions, labels, encoded_variant)

            for name, value in names_to_values.iteritems():
                slim.summaries.add_scalar_summary(value,
                                                  name,
                                                  print_summary=True)

            num_batches = int(
                math.floor(
                    min(max_examples, dataset.num_examples) /
                    float(batch_size)))
            num_samples = batch_size * num_batches
            logging.info('Dataset has %d samples, doing eval over %d',
                         dataset.num_examples, num_samples)

            names_to_values = slim.evaluation.evaluate_once(
                master=master,
                checkpoint_path=checkpoint_path,
                logdir=eval_dir,
                variables_to_restore=variables_to_restore,
                num_evals=num_batches,
                initial_op=tf.group(tf.global_variables_initializer(),
                                    tf.local_variables_initializer()),
                eval_op=names_to_updates.values(),
                final_op=names_to_values,
            )

            # --- LOW LEVEL [WIP], hangs, initialization seems busted ---
            # This is (marginally) nicer as it can eliminate the slim dep.
            # saver = tf.train.Saver(variables_to_restore)
            # scaffold = tf.train.Scaffold(saver=saver)
            # names_to_values = tf.contrib.training.evaluate_once(
            #     checkpoint_path=checkpoint_path,
            #     master=FLAGS.master,
            #     scaffold=scaffold,
            #     eval_ops=names_to_updates.values(),
            #     final_ops=names_to_values,
            # )

            _write_checkpoint_metrics(checkpoint_path,
                                      names_to_values,
                                      eval_dir=eval_dir)

        num_evaluations += 1
        if max_evaluations is not None and num_evaluations >= max_evaluations:
            return
Exemple #24
0
 def setUpClass(cls):
   cls.model = modeling.get_model('resnet_v2_50')
Exemple #25
0
 def setUpClass(cls):
   cls.model = modeling.get_model('inception_v2')
Exemple #26
0
 def setUpClass(cls):
   cls.model = modeling.get_model('mobilenet_v1')
Exemple #27
0
 def test_get_model_existing_models(self, model_name, expected_class):
   self.assertIsInstance(modeling.get_model(model_name), expected_class)
 def setUpClass(cls):
   cls.examples = list(
       io_utils.read_tfrecords(testdata.GOLDEN_CALLING_EXAMPLES))
   cls.variants = [tf_utils.example_variant(ex) for ex in cls.examples]
   cls.model = modeling.get_model('random_guess')
 def setUpClass(cls):
   cls.model = modeling.get_model('inception_v2')
Exemple #30
0
 def setUpClass(cls):
     super(InceptionV3EmbeddingModelTest, cls).setUpClass()
     cls.model = modeling.get_model('inception_v3_embedding')
def main(_):
    proto_utils.uses_fast_cpp_protos_or_die()

    if not FLAGS.dataset_config_pbtxt:
        logging.error('Need to specify --dataset_config_pbtxt')
    logging_level.set_from_flag()

    g = tf.Graph()
    with g.as_default():
        tf_global_step = slim.get_or_create_global_step()

        model = modeling.get_model(FLAGS.model_name)
        dataset = data_providers.get_dataset(FLAGS.dataset_config_pbtxt)
        print('Running evaluations on {} with model {}\n'.format(
            dataset, model))

        batch = data_providers.make_training_batches(
            dataset.get_slim_dataset(), model, FLAGS.batch_size)
        images, labels, encoded_truth_variants = batch
        endpoints = model.create(images,
                                 dataset.num_classes,
                                 is_training=False)
        predictions = tf.argmax(endpoints['Predictions'], 1)

        # For eval, explicitly add moving_mean and moving_variance variables to
        # the MOVING_AVERAGE_VARIABLES collection.
        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, tf_global_step)

        for var in tf.get_collection('moving_vars'):
            tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
        for var in slim.get_model_variables():
            tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var)

        variables_to_restore = variable_averages.variables_to_restore()
        variables_to_restore[tf_global_step.op.name] = tf_global_step

        # Define the metrics:
        metrics = {
            'Accuracy': tf.contrib.metrics.streaming_accuracy,
            'Mean_absolute_error':
            tf.contrib.metrics.streaming_mean_absolute_error,
            'FPs': tf.contrib.metrics.streaming_false_positives,
            'FNs': tf.contrib.metrics.streaming_false_negatives,
        }

        def _make_selector(func):
            return select_variants_weights(func, encoded_truth_variants)

        selectors = {
            'All': None,
            'SNPs': _make_selector(variantutils.is_snp),
            'Indels': _make_selector(variantutils.is_indel),
            'Insertions': _make_selector(variantutils.has_insertion),
            'Deletions': _make_selector(variantutils.has_deletion),
            'BiAllelic': _make_selector(variantutils.is_biallelic),
            'MultiAllelic': _make_selector(variantutils.is_multiallelic),
            # These haven't proven particularly useful, but are commented out here
            # in case someone wants to do some more explorations.
            # 'HomRef': tf.equal(labels, 0),
            # 'Het': tf.equal(labels, 1),
            # 'HomAlt': tf.equal(labels, 2),
            # 'NonRef': tf.greater(labels, 0),
        }
        metrics = calling_metrics(metrics, selectors, predictions, labels)
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(
            metrics)

        for name, value in names_to_values.iteritems():
            slim.summaries.add_scalar_summary(value, name, print_summary=True)

        slim.evaluation.evaluation_loop(
            FLAGS.master,
            FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_dir,
            num_evals=FLAGS.batches_per_eval_step,
            eval_op=names_to_updates.values(),
            variables_to_restore=variables_to_restore,
            max_number_of_evaluations=FLAGS.max_evaluations,
            eval_interval_secs=FLAGS.eval_interval_secs)
Exemple #32
0
 def setUpClass(cls):
     super(InceptionV3AttentionModelTest, cls).setUpClass()
     cls.model = modeling.get_model('attention_inception_v3',
                                    attention_module='se_block',
                                    attention_position='all')
Exemple #33
0
def eval_loop(master, dataset_config_pbtxt, checkpoint_dir, model_name,
              batch_size, moving_average_decay, max_examples, eval_dir,
              max_evaluations):
  logging.info('Running fixed eval for: %s', dataset_config_pbtxt)

  num_evaluations = 0
  for checkpoint_path in checkpoints_iterator(checkpoint_dir):
    logging.info('Using checkpoint %s %d', checkpoint_path, num_evaluations)

    g = tf.Graph()
    with g.as_default():
      tf_global_step = tf.train.get_or_create_global_step()

      # redacted
      model = modeling.get_model(model_name)
      dataset = data_providers.get_dataset(dataset_config_pbtxt)
      logging.info('Running evaluations on %s with model %s', dataset, model)

      images, labels, encoded_variant = data_providers.make_batches(
          dataset.get_slim_dataset(), model, batch_size, mode='EVAL')
      endpoints = model.create(images, dataset.num_classes, is_training=False)
      predictions = tf.argmax(endpoints['Predictions'], 1)

      # For eval, explicitly add moving_mean and moving_variance variables to
      # the MOVING_AVERAGE_VARIABLES collection.
      variable_averages = tf.train.ExponentialMovingAverage(
          moving_average_decay, tf_global_step)

      for var in tf.get_collection('moving_vars'):
        tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
      for var in slim.get_model_variables():
        tf.add_to_collection(tf.GraphKeys.MOVING_AVERAGE_VARIABLES, var)

      variables_to_restore = variable_averages.variables_to_restore()
      variables_to_restore[tf_global_step.op.name] = tf_global_step

      names_to_values, names_to_updates = make_metrics(predictions, labels,
                                                       encoded_variant)

      for name, value in names_to_values.iteritems():
        slim.summaries.add_scalar_summary(value, name, print_summary=True)

      num_batches = int(
          math.floor(
              min(max_examples, dataset.num_examples) / float(batch_size)))
      num_samples = batch_size * num_batches
      logging.info('Dataset has %d samples, doing eval over %d',
                   dataset.num_examples, num_samples)

      names_to_values = slim.evaluation.evaluate_once(
          master=master,
          checkpoint_path=checkpoint_path,
          logdir=eval_dir,
          variables_to_restore=variables_to_restore,
          num_evals=num_batches,
          initial_op=tf.group(tf.global_variables_initializer(),
                              tf.local_variables_initializer()),
          eval_op=names_to_updates.values(),
          final_op=names_to_values,
      )

      # --- LOW LEVEL [WIP], hangs, initialization seems busted ---
      # This is (marginally) nicer as it can eliminate the slim dep.
      # saver = tf.train.Saver(variables_to_restore)
      # scaffold = tf.train.Scaffold(saver=saver)
      # names_to_values = tf.contrib.training.evaluate_once(
      #     checkpoint_path=checkpoint_path,
      #     master=FLAGS.master,
      #     scaffold=scaffold,
      #     eval_ops=names_to_updates.values(),
      #     final_ops=names_to_values,
      # )

      _write_checkpoint_metrics(
          checkpoint_path, names_to_values, eval_dir=eval_dir)

    num_evaluations += 1
    if max_evaluations is not None and num_evaluations >= max_evaluations:
      return
Exemple #34
0
 def test_get_model_existing_models(self, model_name, expected_class):
     self.assertIsInstance(modeling.get_model(model_name), expected_class)
Exemple #35
0
def run(target, unused_is_chief, device_fn, use_tpu):
  """Run training.

  Args:
     target: The target of the TensorFlow standard server to use. Can be the
       empty string to run locally using an inprocess server.
     device_fn: Device function used to assign ops to devices.
     use_tpu: turn on tpu code path.
  """
  if not FLAGS.dataset_config_pbtxt:
    logging.error('Need to specify --dataset_config_pbtxt')
    return

  g = tf.Graph()
  with g.as_default():
    with tf.device(device_fn):
      # If ps_tasks is zero, the local device is used. When using multiple
      # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
      # across the different devices.

      tf_dataset = data_providers.get_input_fn_from_dataset(
          dataset_config_filename=FLAGS.dataset_config_pbtxt,
          mode=tf.estimator.ModeKeys.TRAIN,
          max_examples=FLAGS.max_examples,
          use_tpu=use_tpu)
      model = modeling.get_model(FLAGS.model_name)
      logging.info('Running training on %s with model %s and tpu %s',
                   tf_dataset, FLAGS.model_name, use_tpu)

      batches_per_epoch = tf_dataset.num_examples // FLAGS.batch_size
      logging.info('Batches per epoch %s', batches_per_epoch)
      params = dict(batches_per_epoch=batches_per_epoch,)
      estimator = model.make_estimator(
          batch_size=FLAGS.batch_size,
          model_dir=FLAGS.train_dir,
          params=params,
          use_tpu=use_tpu,
          master=target,
          start_from_checkpoint=FLAGS.start_from_checkpoint,
      )

      training_hooks = None
      if FLAGS.use_early_stopping:
        # Early stopping hook depends on existence of events directory.
        eval_dir = os.path.join(FLAGS.train_dir, FLAGS.early_stopping_directory)
        tf.gfile.MakeDirs(eval_dir)

        plateau_decrease = True
        if FLAGS.early_stopping_metric_direction == 'increase':
          plateau_decrease = False

        early_stopping_hook = metrics_hook.EarlyStoppingHook(
            events_dir=eval_dir,
            tag=FLAGS.early_stopping_tag,
            num_plateau_steps=FLAGS.early_stopping_num_plateau_steps,
            plateau_delta=FLAGS.early_stopping_plateau_delta,
            plateau_decrease=plateau_decrease,
            every_n_steps=FLAGS.early_stopping_every_n_steps)

        training_hooks = [early_stopping_hook]

      estimator.train(
          input_fn=tf_dataset,
          max_steps=FLAGS.number_of_steps,
          hooks=training_hooks)
Exemple #36
0
 def test_get_model_unknown_model_signals_error(self):
     with six.assertRaisesRegex(self, ValueError, 'Unknown model'):
         modeling.get_model('unknown_model_1234')
def run(target, is_chief, device_fn):
    """Run training.

  Args:
     target: The target of the TensorFlow standard server to use. Can be the
       empty string to run locally using an inprocess server.
     is_chief: Boolean indicating whether this process is the chief.
     device_fn: Device function used to assign ops to devices.
  """
    if not FLAGS.dataset_config_pbtxt:
        logging.error('Need to specify --dataset_config_pbtxt')
        return

    g = tf.Graph()
    with g.as_default():
        model = modeling.get_model(FLAGS.model_name)
        dataset = data_providers.get_dataset(FLAGS.dataset_config_pbtxt)
        print('Running training on {} with model {}\n'.format(dataset, model))

        with tf.device(device_fn):
            # If ps_tasks is zero, the local device is used. When using multiple
            # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
            # across the different devices.
            images, labels, _ = data_providers.make_training_batches(
                dataset.get_slim_dataset(), model, FLAGS.batch_size)
            endpoints = model.create(images,
                                     dataset.num_classes,
                                     is_training=True)
            labels = slim.one_hot_encoding(labels, dataset.num_classes)
            total_loss = model.loss(endpoints, labels)

            # Setup the moving averages:
            moving_average_variables = slim.get_model_variables()
            moving_average_variables.extend(slim.losses.get_losses())
            moving_average_variables.append(total_loss)

            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, slim.get_or_create_global_step())

            tf.add_to_collection(
                tf.GraphKeys.UPDATE_OPS,
                variable_averages.apply(moving_average_variables))

            # Configure the learning rate using an exponetial decay.
            decay_steps = int(
                ((1.0 * dataset.num_examples) / FLAGS.batch_size) *
                FLAGS.num_epochs_per_decay)

            learning_rate = tf.train.exponential_decay(
                FLAGS.learning_rate,
                slim.get_or_create_global_step(),
                decay_steps,
                FLAGS.learning_rate_decay_factor,
                staircase=True)

            opt = tf.train.RMSPropOptimizer(learning_rate, FLAGS.rmsprop_decay,
                                            FLAGS.rmsprop_momentum,
                                            FLAGS.rmsprop_epsilon)

            # Create training op
            train_tensor = slim.learning.create_train_op(
                total_loss,
                optimizer=opt,
                update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS))

            # Summaries:
            slim.summaries.add_histogram_summaries(slim.get_model_variables())
            slim.summaries.add_scalar_summaries(slim.losses.get_losses(),
                                                'losses')
            slim.summaries.add_scalar_summary(total_loss, 'Total_Loss',
                                              'losses')
            slim.summaries.add_scalar_summary(learning_rate, 'Learning_Rate',
                                              'training')
            slim.summaries.add_histogram_summaries(endpoints.values())
            slim.summaries.add_zero_fraction_summaries(endpoints.values())
            # redacted

            # Set start-up delay
            startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps

            init_fn = model_init_function(model, dataset.num_classes,
                                          FLAGS.start_from_checkpoint)

            saver = tf.train.Saver(max_to_keep=FLAGS.max_checkpoints_to_keep,
                                   keep_checkpoint_every_n_hours=FLAGS.
                                   keep_checkpoint_every_n_hours)

            # Train model
            slim.learning.train(train_tensor,
                                number_of_steps=FLAGS.number_of_steps,
                                logdir=FLAGS.train_dir,
                                master=target,
                                init_fn=init_fn,
                                is_chief=is_chief,
                                saver=saver,
                                startup_delay_steps=startup_delay_steps,
                                save_summaries_secs=FLAGS.save_summaries_secs,
                                save_interval_secs=FLAGS.save_interval_secs)
Exemple #38
0
                    help='The decay to use for the moving average')
parser.add_argument('--channels',
                    default=6,
                    type=int,
                    help='Number of channels in input tensor')
parser.add_argument('--width',
                    default=221,
                    type=int,
                    help='Width of the input tensor')
parser.add_argument('--height',
                    default=100,
                    type=int,
                    help='Height of the input tensor')
args = parser.parse_args()

model = get_model('inception_v3')

out_node = 'InceptionV3/Predictions/Reshape_1'
in_node = 'input'

inp = tf.compat.v1.placeholder(
    shape=[1, args.height, args.width, args.channels],
    dtype=tf.float32,
    name=in_node)
b = model.create(inp, num_classes=3, is_training=False)

ema = tf.train.ExponentialMovingAverage(args.moving_average_decay)
variables_to_restore = ema.variables_to_restore()

load_ema = slim.assign_from_checkpoint_fn(args.checkpoint,
                                          variables_to_restore,
Exemple #39
0
 def setUpClass(cls):
     cls.examples = list(
         io_utils.read_tfrecords(testdata.GOLDEN_CALLING_EXAMPLES))
     cls.variants = [tf_utils.example_variant(ex) for ex in cls.examples]
     cls.model = modeling.get_model('random_guess')
Exemple #40
0
 def test_get_model_unknown_model_signals_error(self):
   with self.assertRaisesRegexp(ValueError, 'Unknown model'):
     modeling.get_model('unknown_model_1234')