예제 #1
0
def _create_cvo_proto(encoded_variant, gls, encoded_alt_allele_indices):
    """Returns a CallVariantsOutput proto from the relevant input information."""
    variant = variants_pb2.Variant.FromString(encoded_variant)
    alt_allele_indices = (deepvariant_pb2.CallVariantsOutput.AltAlleleIndices.
                          FromString(encoded_alt_allele_indices))
    debug_info = None
    if FLAGS.include_debug_info:
        debug_info = deepvariant_pb2.CallVariantsOutput.DebugInfo(
            has_insertion=variantutils.has_insertion(variant),
            has_deletion=variantutils.has_deletion(variant),
            is_snp=variantutils.is_snp(variant),
            predicted_label=np.argmax(gls))
    call_variants_output = deepvariant_pb2.CallVariantsOutput(
        variant=variant,
        alt_allele_indices=alt_allele_indices,
        genotype_probabilities=gls,
        debug_info=debug_info)
    return call_variants_output
예제 #2
0
 def write_output(encoded_variant, gls, encoded_alt_allele_indices):
     """Provides a write function for a CallVariantsOutput proto."""
     variant = variants_pb2.Variant.FromString(encoded_variant)
     alt_allele_indices = (
         deepvariant_pb2.CallVariantsOutput.AltAlleleIndices.FromString(
             encoded_alt_allele_indices))
     debug_info = None
     if FLAGS.include_debug_info:
         debug_info = deepvariant_pb2.CallVariantsOutput.DebugInfo(
             has_insertion=variantutils.has_insertion(variant),
             has_deletion=variantutils.has_deletion(variant),
             is_snp=variantutils.is_snp(variant),
             predicted_label=np.argmax(gls))
     call_variants_output = deepvariant_pb2.CallVariantsOutput(
         variant=variant,
         alt_allele_indices=alt_allele_indices,
         genotype_probabilities=gls,
         debug_info=debug_info)
     write_fn(call_variants_output)
예제 #3
0
 def test_has_insertion_deletion(self, variant, has_insertion,
                                 has_deletion):
     self.assertEqual(variantutils.has_insertion(variant), has_insertion)
     self.assertEqual(variantutils.has_deletion(variant), has_deletion)
  def test_call_end2end(self, model, shard_inputs, include_debug_info):
    FLAGS.include_debug_info = include_debug_info
    examples = list(io_utils.read_tfrecords(test_utils.GOLDEN_CALLING_EXAMPLES))

    if shard_inputs:
      # Create a sharded version of our golden examples.
      source_path = test_utils.test_tmpfile('sharded@{}'.format(3))
      io_utils.write_tfrecords(examples, source_path)
    else:
      source_path = test_utils.GOLDEN_CALLING_EXAMPLES

    batch_size = 4
    if model.name == 'random_guess':
      # For the random guess model we can run everything.
      max_batches = None
    else:
      # For all other models we only run a single batch for inference.
      max_batches = 1

    outfile = test_utils.test_tmpfile('call_variants.tfrecord')
    call_variants.call_variants(
        examples_filename=source_path,
        checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST,
        model=model,
        output_file=outfile,
        batch_size=batch_size,
        max_batches=max_batches)

    call_variants_outputs = list(
        io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput))

    # Check that we have the right number of output protos.
    self.assertEqual(
        len(call_variants_outputs), batch_size * max_batches
        if max_batches else len(examples))

    # Check that our CallVariantsOutput (CVO) have the following critical
    # properties:
    # - we have one CVO for each example we processed.
    # - the variant in the CVO is exactly what was in the example.
    # - the alt_allele_indices of the CVO match those of its corresponding
    #   example.
    # - there are 3 genotype probabilities and these are between 0.0 and 1.0.
    # We can only do this test when processing all of the variants (max_batches
    # is None), since we processed all of the examples with that model.
    if max_batches is None:
      self.assertItemsEqual([cvo.variant for cvo in call_variants_outputs],
                            [tf_utils.example_variant(ex) for ex in examples])

    # Check the CVO debug_info: not filled if include_debug_info is False;
    # else, filled by logic based on CVO.
    if not include_debug_info:
      for cvo in call_variants_outputs:
        self.assertEqual(cvo.debug_info,
                         deepvariant_pb2.CallVariantsOutput.DebugInfo())
    else:
      for cvo in call_variants_outputs:
        self.assertEqual(cvo.debug_info.has_insertion,
                         variantutils.has_insertion(cvo.variant))
        self.assertEqual(cvo.debug_info.has_deletion,
                         variantutils.has_deletion(cvo.variant))
        self.assertEqual(cvo.debug_info.is_snp, variantutils.is_snp(
            cvo.variant))
        self.assertEqual(cvo.debug_info.predicted_label,
                         np.argmax(cvo.genotype_probabilities))

    def example_matches_call_variants_output(example, call_variants_output):
      return (tf_utils.example_variant(example) == call_variants_output.variant
              and tf_utils.example_alt_alleles_indices(
                  example) == call_variants_output.alt_allele_indices.indices)

    for call_variants_output in call_variants_outputs:
      # Find all matching examples.
      matches = [
          ex for ex in examples
          if example_matches_call_variants_output(ex, call_variants_output)
      ]
      # We should have exactly one match.
      self.assertEqual(len(matches), 1)
      example = matches[0]
      # Check that we've faithfully copied in the alt alleles (though currently
      # as implemented we find our example using this information so it cannot
      # fail). Included here in case that changes in the future.
      self.assertEqual(
          list(tf_utils.example_alt_alleles_indices(example)),
          list(call_variants_output.alt_allele_indices.indices))
      # We should have exactly three genotype probabilities (assuming our
      # ploidy == 2).
      self.assertEqual(len(call_variants_output.genotype_probabilities), 3)
      # These are probabilities so they should be between 0 and 1.
      self.assertTrue(
          0 <= gp <= 1 for gp in call_variants_output.genotype_probabilities)