Ejemplo n.º 1
0
 def testFailedExampleImageShape(self):
     # Create an empty example that doesn't have the required image/shape field.
     example = example_pb2.Example()
     with self.assertRaisesRegexp(
             ValueError, 'Invalid image/shape: we expect to find an '
             'image/shape field with length 3.'):
         tf_utils.example_image_shape(example)
    def test_create_pileup_examples(self):
        self.processor.pic = mock.Mock()
        self.add_mock('_encode_tensor',
                      side_effect=[
                          ('tensor1', self.default_shape, self.default_format),
                          ('tensor2', self.default_shape, self.default_format)
                      ])
        dv_call = mock.Mock()
        dv_call.variant = test_utils.make_variant(start=10,
                                                  alleles=['A', 'C', 'G'])
        ex = mock.Mock()
        alt1, alt2 = ['C'], ['G']
        self.processor.pic.create_pileup_images.return_value = [
            (alt1, 'tensor1'), (alt2, 'tensor2')
        ]

        actual = self.processor.create_pileup_examples(dv_call)

        self.processor.pic.create_pileup_images.assert_called_once_with(
            dv_call)

        self.assertEquals(len(actual), 2)
        for ex, (alt, img) in zip(actual, [(alt1, 'tensor1'),
                                           (alt2, 'tensor2')]):
            self.assertEqual(tf_utils.example_alt_alleles(ex), alt)
            self.assertEqual(tf_utils.example_variant(ex), dv_call.variant)
            self.assertEqual(tf_utils.example_encoded_image(ex), img)
            self.assertEqual(tf_utils.example_image_shape(ex),
                             self.default_shape)
            self.assertEqual(tf_utils.example_image_format(ex),
                             self.default_format)
Ejemplo n.º 3
0
  def test_create_pileup_examples(self):
    self.processor.pic = mock.Mock()
    self.add_mock(
        '_encode_tensor',
        side_effect=[('tensor1', self.default_shape, self.default_format),
                     ('tensor2', self.default_shape, self.default_format)])
    dv_call = mock.Mock()
    dv_call.variant = test_utils.make_variant(start=10, alleles=['A', 'C', 'G'])
    ex = mock.Mock()
    alt1, alt2 = ['C'], ['G']
    self.processor.pic.create_pileup_images.return_value = [(alt1, 'tensor1'),
                                                            (alt2, 'tensor2')]

    actual = self.processor.create_pileup_examples(dv_call)

    self.processor.pic.create_pileup_images.assert_called_once_with(dv_call)

    self.assertEquals(len(actual), 2)
    for ex, (alt, img) in zip(actual, [(alt1, 'tensor1'), (alt2, 'tensor2')]):
      self.assertEqual(tf_utils.example_alt_alleles(ex), alt)
      self.assertEqual(tf_utils.example_variant(ex), dv_call.variant)
      self.assertEqual(tf_utils.example_encoded_image(ex), img)
      self.assertEqual(tf_utils.example_image_shape(ex), self.default_shape)
      self.assertEqual(tf_utils.example_image_format(ex), self.default_format)
Ejemplo n.º 4
0
    def test_create_pileup_examples(self):
        self.processor.pic = mock.Mock()
        self.processor.pic.get_reads.return_value = []
        self.add_mock('_encode_tensor',
                      side_effect=[(six.b('tensor1'), self.default_shape,
                                    self.default_format),
                                   (six.b('tensor2'), self.default_shape,
                                    self.default_format)])
        dv_call = mock.Mock()
        dv_call.variant = test_utils.make_variant(start=10,
                                                  alleles=['A', 'C', 'G'])
        ex = mock.Mock()
        alt1, alt2 = ['C'], ['G']
        self.processor.pic.create_pileup_images.return_value = [
            (alt1, six.b('tensor1')), (alt2, six.b('tensor2'))
        ]

        actual = self.processor.create_pileup_examples(dv_call)

        self.processor.pic.create_pileup_images.assert_called_once_with(
            dv_call=dv_call,
            reads_for_samples=[[]],
            haplotype_alignments_for_samples=None,
            haplotype_sequences=None,
            sample_order=None)

        self.assertLen(actual, 2)
        for ex, (alt, img) in zip(actual, [(alt1, six.b('tensor1')),
                                           (alt2, six.b('tensor2'))]):
            self.assertEqual(tf_utils.example_alt_alleles(ex), alt)
            self.assertEqual(tf_utils.example_variant(ex), dv_call.variant)
            self.assertEqual(tf_utils.example_encoded_image(ex), img)
            self.assertEqual(tf_utils.example_image_shape(ex),
                             self.default_shape)
            self.assertEqual(tf_utils.example_image_format(ex),
                             six.b(self.default_format))
Ejemplo n.º 5
0
 def testExampleImageShape(self):
     example = tf_utils.make_example(self.variant, self.alts,
                                     self.encoded_image, self.default_shape,
                                     self.default_format)
     self.assertEqual(self.default_shape,
                      tf_utils.example_image_shape(example))
Ejemplo n.º 6
0
def call_variants(examples_filename,
                  checkpoint_path,
                  model,
                  output_file,
                  execution_hardware='auto',
                  batch_size=16,
                  max_batches=None,
                  use_tpu=False,
                  master=''):
  """Main driver of call_variants."""
  if FLAGS.kmp_blocktime:
    os.environ['KMP_BLOCKTIME'] = FLAGS.kmp_blocktime
    logging.vlog(3,
                 'Set KMP_BLOCKTIME to {}'.format(os.environ['KMP_BLOCKTIME']))

  # Read a single TFExample to make sure we're not loading an older version.
  first_example = tf_utils.get_one_example_from_examples_path(examples_filename)
  if first_example is None:
    logging.warning(
        'Unable to read any records from %s. Output will contain '
        'zero records.', examples_filename)
    tfrecord.write_tfrecords([], output_file)
    return

  example_format = tf_utils.example_image_format(first_example)
  example_shape = tf_utils.example_image_shape(first_example)
  if example_format != six.b('raw'):
    raise ValueError('The TF examples in {} has image/format \'{}\' '
                     '(expected \'raw\') which means you might need to rerun '
                     'make_examples to generate the examples again.'.format(
                         examples_filename, example_format))
  logging.info('Shape of input examples: %s', str(example_shape))

  if checkpoint_path is not None:
    reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path)
    shape_map_for_layers = reader.get_variable_to_shape_map()
    first_layer = 'InceptionV3/Conv2d_1a_3x3/weights'
    # For a shape map of [3, 3, 6, 32] for the Conv2d_1a_3x3 layer, the 6
    # is the number of channels.
    num_channels_in_checkpoint_model = shape_map_for_layers[first_layer][2]
    if num_channels_in_checkpoint_model != example_shape[2]:
      raise ValueError('The number of channels in examples and checkpoint '
                       'should match, but the checkpoint has {} channels while '
                       'the examples have {}.'.format(
                           num_channels_in_checkpoint_model, example_shape[2]))
    # The model checkpoint includes information on the number of channels but
    # unfortunately not the width or height.
    if example_shape[0] not in [100, 300]:
      logging.warning('The height of the input image is not 100 (standard in '
                      'DeepVariant) or 300 (standard in DeepTrio). '
                      'Please double-check that the model is trained with the '
                      'same parameters and version of DeepVariant as you '
                      'generated the examples with. An error will not appear '
                      'when these are mismatched because of how InceptionV3 '
                      'works. Note that if you set --pileup_image_height in '
                      'DeepVariant, then you must use a model trained with '
                      'that same parameter.')

    if example_shape[1] != 221:
      logging.warning('The width of the input image is not 221 (standard in '
                      'DeepVariant). '
                      'Please double-check that the model is trained with the '
                      'same parameters and version of DeepVariant as you '
                      'generated the examples with. An error will not appear '
                      'when these are mismatched because of how InceptionV3 '
                      'works. Note that if you set --pileup_image_width in '
                      'DeepVariant, then you must use a model trained with '
                      'that same parameter.')

  # Check accelerator status.
  if execution_hardware not in _ALLOW_EXECUTION_HARDWARE:
    raise ValueError(
        'Unexpected execution_hardware={} value. Allowed values are {}'.format(
            execution_hardware, ','.join(_ALLOW_EXECUTION_HARDWARE)))
  init_op = tf.group(tf.compat.v1.global_variables_initializer(),
                     tf.compat.v1.local_variables_initializer())

  config = tf.compat.v1.ConfigProto()
  if FLAGS.config_string is not None:
    text_format.Parse(FLAGS.config_string, config)
  if execution_hardware == 'cpu':
    # Don't overwrite entire dictionary.
    config.device_count['GPU'] = 0
    config.device_count['TPU'] = 0

  # Perform sanity check.
  with tf.compat.v1.Session(config=config) as sess:
    sess.run(init_op)
    if execution_hardware == 'accelerator':
      if not any(dev.device_type != 'CPU' for dev in sess.list_devices()):
        raise ExecutionHardwareError(
            'execution_hardware is set to accelerator, but no accelerator '
            'was found')
    # redacted
    # sess.list_devices here doesn't return the correct answer. That can only
    # work later, after the device (on the other VM) has been initialized,
    # which is generally not yet.

  # Prepare input stream and estimator.
  tf_dataset = prepare_inputs(source_path=examples_filename, use_tpu=use_tpu)
  if FLAGS.use_openvino:
    ie_estimator = OpenVINOEstimator(
        checkpoint_path, input_fn=tf_dataset, model=model)
    predictions = iter(ie_estimator)
  else:
    estimator = model.make_estimator(
        batch_size=batch_size,
        master=master,
        use_tpu=use_tpu,
        session_config=config,
    )

    # Instantiate the prediction "stream", and select the EMA values from
    # the model.
    if checkpoint_path is None:
      # Unit tests use this branch.
      predict_hooks = []
    else:
      predict_hooks = [
          h(checkpoint_path) for h in model.session_predict_hooks()
      ]
    predictions = iter(
        estimator.predict(
            input_fn=tf_dataset,
            checkpoint_path=checkpoint_path,
            hooks=predict_hooks))

  # Consume predictions one at a time and write them to output_file.
  logging.info('Writing calls to %s', output_file)
  writer = tfrecord.Writer(output_file)
  with writer:
    start_time = time.time()
    n_examples, n_batches = 0, 0
    while max_batches is None or n_batches <= max_batches:
      try:
        prediction = next(predictions)
      except (StopIteration, tf.errors.OutOfRangeError):
        break
      write_variant_call(writer, prediction, use_tpu)
      n_examples += 1
      n_batches = n_examples // batch_size + 1
      duration = time.time() - start_time

      if not FLAGS.use_openvino:
        logging.log_every_n(
            logging.INFO,
            ('Processed %s examples in %s batches [%.3f sec per 100]'),
            _LOG_EVERY_N, n_examples, n_batches, (100 * duration) / n_examples)
    # One last log to capture the extra examples.
    if not FLAGS.use_openvino:
      logging.info('Processed %s examples in %s batches [%.3f sec per 100]',
                   n_examples, n_batches, (100 * duration) / n_examples)

    logging.info('Done calling variants from a total of %d examples.',
                 n_examples)