def _create_cvo_proto(encoded_variant, gls, encoded_alt_allele_indices): """Returns a CallVariantsOutput proto from the relevant input information.""" variant = variants_pb2.Variant.FromString(encoded_variant) alt_allele_indices = (deepvariant_pb2.CallVariantsOutput.AltAlleleIndices. FromString(encoded_alt_allele_indices)) debug_info = None if FLAGS.include_debug_info: debug_info = deepvariant_pb2.CallVariantsOutput.DebugInfo( has_insertion=variantutils.has_insertion(variant), has_deletion=variantutils.has_deletion(variant), is_snp=variantutils.is_snp(variant), predicted_label=np.argmax(gls)) call_variants_output = deepvariant_pb2.CallVariantsOutput( variant=variant, alt_allele_indices=alt_allele_indices, genotype_probabilities=gls, debug_info=debug_info) return call_variants_output
def write_output(encoded_variant, gls, encoded_alt_allele_indices): """Provides a write function for a CallVariantsOutput proto.""" variant = variants_pb2.Variant.FromString(encoded_variant) alt_allele_indices = ( deepvariant_pb2.CallVariantsOutput.AltAlleleIndices.FromString( encoded_alt_allele_indices)) debug_info = None if FLAGS.include_debug_info: debug_info = deepvariant_pb2.CallVariantsOutput.DebugInfo( has_insertion=variantutils.has_insertion(variant), has_deletion=variantutils.has_deletion(variant), is_snp=variantutils.is_snp(variant), predicted_label=np.argmax(gls)) call_variants_output = deepvariant_pb2.CallVariantsOutput( variant=variant, alt_allele_indices=alt_allele_indices, genotype_probabilities=gls, debug_info=debug_info) write_fn(call_variants_output)
def test_has_insertion_deletion(self, variant, has_insertion, has_deletion): self.assertEqual(variantutils.has_insertion(variant), has_insertion) self.assertEqual(variantutils.has_deletion(variant), has_deletion)
def test_call_end2end(self, model, shard_inputs, include_debug_info): FLAGS.include_debug_info = include_debug_info examples = list(io_utils.read_tfrecords(test_utils.GOLDEN_CALLING_EXAMPLES)) if shard_inputs: # Create a sharded version of our golden examples. source_path = test_utils.test_tmpfile('sharded@{}'.format(3)) io_utils.write_tfrecords(examples, source_path) else: source_path = test_utils.GOLDEN_CALLING_EXAMPLES batch_size = 4 if model.name == 'random_guess': # For the random guess model we can run everything. max_batches = None else: # For all other models we only run a single batch for inference. max_batches = 1 outfile = test_utils.test_tmpfile('call_variants.tfrecord') call_variants.call_variants( examples_filename=source_path, checkpoint_path=modeling.SKIP_MODEL_INITIALIZATION_IN_TEST, model=model, output_file=outfile, batch_size=batch_size, max_batches=max_batches) call_variants_outputs = list( io_utils.read_tfrecords(outfile, deepvariant_pb2.CallVariantsOutput)) # Check that we have the right number of output protos. self.assertEqual( len(call_variants_outputs), batch_size * max_batches if max_batches else len(examples)) # Check that our CallVariantsOutput (CVO) have the following critical # properties: # - we have one CVO for each example we processed. # - the variant in the CVO is exactly what was in the example. # - the alt_allele_indices of the CVO match those of its corresponding # example. # - there are 3 genotype probabilities and these are between 0.0 and 1.0. # We can only do this test when processing all of the variants (max_batches # is None), since we processed all of the examples with that model. if max_batches is None: self.assertItemsEqual([cvo.variant for cvo in call_variants_outputs], [tf_utils.example_variant(ex) for ex in examples]) # Check the CVO debug_info: not filled if include_debug_info is False; # else, filled by logic based on CVO. if not include_debug_info: for cvo in call_variants_outputs: self.assertEqual(cvo.debug_info, deepvariant_pb2.CallVariantsOutput.DebugInfo()) else: for cvo in call_variants_outputs: self.assertEqual(cvo.debug_info.has_insertion, variantutils.has_insertion(cvo.variant)) self.assertEqual(cvo.debug_info.has_deletion, variantutils.has_deletion(cvo.variant)) self.assertEqual(cvo.debug_info.is_snp, variantutils.is_snp( cvo.variant)) self.assertEqual(cvo.debug_info.predicted_label, np.argmax(cvo.genotype_probabilities)) def example_matches_call_variants_output(example, call_variants_output): return (tf_utils.example_variant(example) == call_variants_output.variant and tf_utils.example_alt_alleles_indices( example) == call_variants_output.alt_allele_indices.indices) for call_variants_output in call_variants_outputs: # Find all matching examples. matches = [ ex for ex in examples if example_matches_call_variants_output(ex, call_variants_output) ] # We should have exactly one match. self.assertEqual(len(matches), 1) example = matches[0] # Check that we've faithfully copied in the alt alleles (though currently # as implemented we find our example using this information so it cannot # fail). Included here in case that changes in the future. self.assertEqual( list(tf_utils.example_alt_alleles_indices(example)), list(call_variants_output.alt_allele_indices.indices)) # We should have exactly three genotype probabilities (assuming our # ploidy == 2). self.assertEqual(len(call_variants_output.genotype_probabilities), 3) # These are probabilities so they should be between 0 and 1. self.assertTrue( 0 <= gp <= 1 for gp in call_variants_output.genotype_probabilities)