def test_call_variants_runs_on_gpus(self, model):
     call_variants.call_variants(
         examples_filename=testdata.GOLDEN_CALLING_EXAMPLES,
         checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
         model=model,
         execution_hardware='accelerator',
         output_file=test_utils.test_tmpfile('zzz.tfrecord'))
 def test_call_variants_runs_on_gpus(self, model):
   call_variants.call_variants(
       examples_filename=testdata.GOLDEN_CALLING_EXAMPLES,
       checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
       model=model,
       execution_hardware='accelerator',
       output_file=test_utils.test_tmpfile('zzz.tfrecord'))
Ejemplo n.º 3
0
 def _run():
     call_variants.call_variants(
         examples_filename=testdata.GOLDEN_CALLING_EXAMPLES,
         checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
         model=self.model,
         execution_hardware=hardware_env,
         max_batches=1,
         batch_size=1,
         output_file=test_utils.test_tmpfile('zzz.tfrecord'))
Ejemplo n.º 4
0
 def _run():
   call_variants.call_variants(
       examples_filename=testdata.GOLDEN_CALLING_EXAMPLES,
       checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
       model=self.model,
       execution_hardware=hardware_env,
       max_batches=1,
       batch_size=1,
       output_file=test_utils.test_tmpfile('zzz.tfrecord'))
Ejemplo n.º 5
0
 def _run():
     call_variants.call_variants(
         use_tpu=FLAGS.use_tpu,
         examples_filename=testdata.GOLDEN_CALLING_EXAMPLES,
         checkpoint_path=_LEAVE_MODEL_UNINITIALIZED,
         model=self.model,
         execution_hardware=hardware_env,
         max_batches=1,
         batch_size=1,
         output_file=test_utils.test_tmpfile('zzz.tfrecord'))
Ejemplo n.º 6
0
 def test_call_variants_non_accelerated_execution_runs(
         self, execution_hardware):
     # This doesn't mock out the list_devices call so it's worth keeping
     # despite being very similar to the parameterized test below.
     outfile = test_utils.test_tmpfile('call_variants_cpu_only.tfrecord')
     call_variants.call_variants(
         examples_filename=testdata.GOLDEN_CALLING_EXAMPLES,
         checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
         model=self.model,
         execution_hardware=execution_hardware,
         max_batches=1,
         batch_size=1,
         output_file=outfile)
Ejemplo n.º 7
0
 def test_call_variants_non_accelerated_execution_runs(self,
                                                       execution_hardware):
   # This doesn't mock out the list_devices call so it's worth keeping
   # despite being very similar to the parameterized test below.
   outfile = test_utils.test_tmpfile('call_variants_cpu_only.tfrecord')
   call_variants.call_variants(
       examples_filename=testdata.GOLDEN_CALLING_EXAMPLES,
       checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
       model=self.model,
       execution_hardware=execution_hardware,
       max_batches=1,
       batch_size=1,
       output_file=outfile)
 def assertCallVariantsEmitsNRecordsForRandomGuess(self, filename,
                                                   num_examples):
   outfile = test_utils.test_tmpfile('call_variants.tfrecord')
   model = modeling.get_model('random_guess')
   call_variants.call_variants(
       examples_filename=filename,
       checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
       model=model,
       output_file=outfile,
       batch_size=4,
       max_batches=None)
   call_variants_outputs = list(
       io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput))
   # Check that we have the right number of output protos.
   self.assertEqual(len(call_variants_outputs), num_examples)
Ejemplo n.º 9
0
 def assertCallVariantsEmitsNRecordsForRandomGuess(self, filename,
                                                   num_examples):
   outfile = test_utils.test_tmpfile('call_variants.tfrecord')
   model = modeling.get_model('random_guess')
   call_variants.call_variants(
       examples_filename=filename,
       checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
       model=model,
       output_file=outfile,
       batch_size=4,
       max_batches=None)
   call_variants_outputs = list(
       io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput))
   # Check that we have the right number of output protos.
   self.assertEqual(len(call_variants_outputs), num_examples)
Ejemplo n.º 10
0
 def test_call_variants_non_accelerated_execution_runs(
         self, execution_hardware):
     if FLAGS.use_tpu:
         # predict batch size must be divisible by number of replicas.
         batch_size = 2
     else:
         batch_size = 1
     outfile = test_utils.test_tmpfile('call_variants_cpu_only.tfrecord')
     call_variants.call_variants(
         examples_filename=testdata.GOLDEN_CALLING_EXAMPLES,
         checkpoint_path=_LEAVE_MODEL_UNINITIALIZED,
         model=self.model,
         execution_hardware=execution_hardware,
         max_batches=1,
         batch_size=batch_size,
         output_file=outfile,
         use_tpu=FLAGS.use_tpu)
Ejemplo n.º 11
0
  def assertCallVariantsEmitsNRecordsForInceptionV3(self, filename,
                                                    num_examples):
    outfile = test_utils.test_tmpfile('inception_v3.call_variants.tfrecord')
    model = modeling.get_model('inception_v3')
    checkpoint_path = _LEAVE_MODEL_UNINITIALIZED

    call_variants.call_variants(
        examples_filename=filename,
        checkpoint_path=checkpoint_path,
        model=model,
        output_file=outfile,
        batch_size=4,
        max_batches=None)
    call_variants_outputs = list(
        io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput))
    # Check that we have the right number of output protos.
    self.assertEqual(len(call_variants_outputs), num_examples)
  def test_call_variants_with_invalid_format(self, model, bad_format):
    # Read one good record from a valid file.
    example = next(io_utils.read_tfrecords(test_utils.GOLDEN_CALLING_EXAMPLES))
    # Overwrite the image/format field to be an invalid value
    # (anything but 'raw').
    example.features.feature['image/format'].bytes_list.value[0] = bad_format
    source_path = test_utils.test_tmpfile('make_examples_output.tfrecord')
    io_utils.write_tfrecords([example], source_path)
    outfile = test_utils.test_tmpfile('call_variants_invalid_format.tfrecord')

    with self.assertRaises(ValueError):
      call_variants.call_variants(
          examples_filename=source_path,
          checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
          model=model,
          output_file=outfile,
          batch_size=1,
          max_batches=1)
Ejemplo n.º 13
0
  def test_call_variants_with_invalid_format(self, model, bad_format):
    # Read one good record from a valid file.
    example = next(io_utils.read_tfrecords(testdata.GOLDEN_CALLING_EXAMPLES))
    # Overwrite the image/format field to be an invalid value
    # (anything but 'raw').
    example.features.feature['image/format'].bytes_list.value[0] = bad_format
    source_path = test_utils.test_tmpfile('make_examples_output.tfrecord')
    io_utils.write_tfrecords([example], source_path)
    outfile = test_utils.test_tmpfile('call_variants_invalid_format.tfrecord')

    with self.assertRaises(ValueError):
      call_variants.call_variants(
          examples_filename=source_path,
          checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
          model=model,
          output_file=outfile,
          batch_size=1,
          max_batches=1)
Ejemplo n.º 14
0
 def assertCallVariantsEmitsNRecordsForRandomGuess(self, filename,
                                                   num_examples):
     checkpoint_path = _LEAVE_MODEL_UNINITIALIZED
     outfile = test_utils.test_tmpfile('call_variants.tfrecord')
     model = modeling.get_model('random_guess')
     call_variants.call_variants(examples_filename=filename,
                                 checkpoint_path=checkpoint_path,
                                 model=model,
                                 output_file=outfile,
                                 batch_size=4,
                                 max_batches=None,
                                 master='',
                                 use_tpu=FLAGS.use_tpu)
     call_variants_outputs = list(
         tfrecord.read_tfrecords(outfile,
                                 deepvariant_pb2.CallVariantsOutput))
     # Check that we have the right number of output protos.
     self.assertEqual(len(call_variants_outputs), num_examples)
Ejemplo n.º 15
0
    def _call_end2end_helper(self, examples_path, model, shard_inputs):
        examples = list(io_utils.read_tfrecords(examples_path))

        if shard_inputs:
            # Create a sharded version of our golden examples.
            source_path = test_utils.test_tmpfile('sharded@{}'.format(3))
            io_utils.write_tfrecords(examples, source_path)
        else:
            source_path = examples_path

        # If we point the test at a headless server, it will often be 2x2,
        # which has 8 replicas.  Otherwise a smaller batch size is fine.
        if FLAGS.use_tpu:
            batch_size = 8
        else:
            batch_size = 4

        if model.name == 'random_guess':
            # For the random guess model we can run everything.
            max_batches = None
        else:
            # For all other models we only run a single batch for inference.
            max_batches = 1

        outfile = test_utils.test_tmpfile('call_variants.tfrecord')
        call_variants.call_variants(
            examples_filename=source_path,
            checkpoint_path=_LEAVE_MODEL_UNINITIALIZED,
            model=model,
            output_file=outfile,
            batch_size=batch_size,
            max_batches=max_batches,
            master='',
            use_tpu=FLAGS.use_tpu,
        )

        call_variants_outputs = list(
            io_utils.read_tfrecords(outfile,
                                    deepvariant_pb2.CallVariantsOutput))

        return call_variants_outputs, examples, batch_size, max_batches
Ejemplo n.º 16
0
    def test_call_end2end(self, model, shard_inputs, include_debug_info):
        FLAGS.include_debug_info = include_debug_info
        examples = list(
            io_utils.read_tfrecords(testdata.GOLDEN_CALLING_EXAMPLES))

        if shard_inputs:
            # Create a sharded version of our golden examples.
            source_path = test_utils.test_tmpfile('sharded@{}'.format(3))
            io_utils.write_tfrecords(examples, source_path)
        else:
            source_path = testdata.GOLDEN_CALLING_EXAMPLES

        batch_size = 4
        if model.name == 'random_guess':
            # For the random guess model we can run everything.
            max_batches = None
        else:
            # For all other models we only run a single batch for inference.
            max_batches = 1

        outfile = test_utils.test_tmpfile('call_variants.tfrecord')
        call_variants.call_variants(
            examples_filename=source_path,
            checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
            model=model,
            output_file=outfile,
            batch_size=batch_size,
            max_batches=max_batches)

        call_variants_outputs = list(
            io_utils.read_tfrecords(outfile,
                                    deepvariant_pb2.CallVariantsOutput))

        # Check that we have the right number of output protos.
        self.assertEqual(
            len(call_variants_outputs),
            batch_size * max_batches if max_batches else len(examples))

        # Check that our CallVariantsOutput (CVO) have the following critical
        # properties:
        # - we have one CVO for each example we processed.
        # - the variant in the CVO is exactly what was in the example.
        # - the alt_allele_indices of the CVO match those of its corresponding
        #   example.
        # - there are 3 genotype probabilities and these are between 0.0 and 1.0.
        # We can only do this test when processing all of the variants (max_batches
        # is None), since we processed all of the examples with that model.
        if max_batches is None:
            self.assertItemsEqual(
                [cvo.variant for cvo in call_variants_outputs],
                [tf_utils.example_variant(ex) for ex in examples])

        # Check the CVO debug_info: not filled if include_debug_info is False;
        # else, filled by logic based on CVO.
        if not include_debug_info:
            for cvo in call_variants_outputs:
                self.assertEqual(
                    cvo.debug_info,
                    deepvariant_pb2.CallVariantsOutput.DebugInfo())
        else:
            for cvo in call_variants_outputs:
                self.assertEqual(cvo.debug_info.has_insertion,
                                 variant_utils.has_insertion(cvo.variant))
                self.assertEqual(cvo.debug_info.has_deletion,
                                 variant_utils.has_deletion(cvo.variant))
                self.assertEqual(cvo.debug_info.is_snp,
                                 variant_utils.is_snp(cvo.variant))
                self.assertEqual(cvo.debug_info.predicted_label,
                                 np.argmax(cvo.genotype_probabilities))

        def example_matches_call_variants_output(example,
                                                 call_variants_output):
            return (tf_utils.example_variant(example)
                    == call_variants_output.variant
                    and tf_utils.example_alt_alleles_indices(example)
                    == call_variants_output.alt_allele_indices.indices)

        for call_variants_output in call_variants_outputs:
            # Find all matching examples.
            matches = [
                ex for ex in examples if example_matches_call_variants_output(
                    ex, call_variants_output)
            ]
            # We should have exactly one match.
            self.assertEqual(len(matches), 1)
            example = matches[0]
            # Check that we've faithfully copied in the alt alleles (though currently
            # as implemented we find our example using this information so it cannot
            # fail). Included here in case that changes in the future.
            self.assertEqual(
                list(tf_utils.example_alt_alleles_indices(example)),
                list(call_variants_output.alt_allele_indices.indices))
            # We should have exactly three genotype probabilities (assuming our
            # ploidy == 2).
            self.assertEqual(len(call_variants_output.genotype_probabilities),
                             3)
            # These are probabilities so they should be between 0 and 1.
            self.assertTrue(
                0 <= gp <= 1
                for gp in call_variants_output.genotype_probabilities)
Ejemplo n.º 17
0
  def test_call_end2end(self, model, shard_inputs, include_debug_info):
    FLAGS.include_debug_info = include_debug_info
    examples = list(io_utils.read_tfrecords(testdata.GOLDEN_CALLING_EXAMPLES))

    if shard_inputs:
      # Create a sharded version of our golden examples.
      source_path = test_utils.test_tmpfile('sharded@{}'.format(3))
      io_utils.write_tfrecords(examples, source_path)
    else:
      source_path = testdata.GOLDEN_CALLING_EXAMPLES

    batch_size = 4
    if model.name == 'random_guess':
      # For the random guess model we can run everything.
      max_batches = None
    else:
      # For all other models we only run a single batch for inference.
      max_batches = 1

    outfile = test_utils.test_tmpfile('call_variants.tfrecord')
    call_variants.call_variants(
        examples_filename=source_path,
        checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
        model=model,
        output_file=outfile,
        batch_size=batch_size,
        max_batches=max_batches)

    call_variants_outputs = list(
        io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput))

    # Check that we have the right number of output protos.
    self.assertEqual(
        len(call_variants_outputs), batch_size * max_batches
        if max_batches else len(examples))

    # Check that our CallVariantsOutput (CVO) have the following critical
    # properties:
    # - we have one CVO for each example we processed.
    # - the variant in the CVO is exactly what was in the example.
    # - the alt_allele_indices of the CVO match those of its corresponding
    #   example.
    # - there are 3 genotype probabilities and these are between 0.0 and 1.0.
    # We can only do this test when processing all of the variants (max_batches
    # is None), since we processed all of the examples with that model.
    if max_batches is None:
      self.assertItemsEqual([cvo.variant for cvo in call_variants_outputs],
                            [tf_utils.example_variant(ex) for ex in examples])

    # Check the CVO debug_info: not filled if include_debug_info is False;
    # else, filled by logic based on CVO.
    if not include_debug_info:
      for cvo in call_variants_outputs:
        self.assertEqual(cvo.debug_info,
                         deepvariant_pb2.CallVariantsOutput.DebugInfo())
    else:
      for cvo in call_variants_outputs:
        self.assertEqual(cvo.debug_info.has_insertion,
                         variant_utils.has_insertion(cvo.variant))
        self.assertEqual(cvo.debug_info.has_deletion,
                         variant_utils.has_deletion(cvo.variant))
        self.assertEqual(cvo.debug_info.is_snp, variant_utils.is_snp(
            cvo.variant))
        self.assertEqual(cvo.debug_info.predicted_label,
                         np.argmax(cvo.genotype_probabilities))

    def example_matches_call_variants_output(example, call_variants_output):
      return (tf_utils.example_variant(example) == call_variants_output.variant
              and tf_utils.example_alt_alleles_indices(
                  example) == call_variants_output.alt_allele_indices.indices)

    for call_variants_output in call_variants_outputs:
      # Find all matching examples.
      matches = [
          ex for ex in examples
          if example_matches_call_variants_output(ex, call_variants_output)
      ]
      # We should have exactly one match.
      self.assertEqual(len(matches), 1)
      example = matches[0]
      # Check that we've faithfully copied in the alt alleles (though currently
      # as implemented we find our example using this information so it cannot
      # fail). Included here in case that changes in the future.
      self.assertEqual(
          list(tf_utils.example_alt_alleles_indices(example)),
          list(call_variants_output.alt_allele_indices.indices))
      # We should have exactly three genotype probabilities (assuming our
      # ploidy == 2).
      self.assertEqual(len(call_variants_output.genotype_probabilities), 3)
      # These are probabilities so they should be between 0 and 1.
      self.assertTrue(
          0 <= gp <= 1 for gp in call_variants_output.genotype_probabilities)