def test_parse_literal_with_contig_map(self, contig_name, expected): contig_map = { 'chr1': core_pb2.ContigInfo(name='chr1', n_bases=10), 'chr2': core_pb2.ContigInfo(name='chr2', n_bases=5), } self.assertEqual( ranges.parse_literal(contig_name, contig_map=contig_map), expected)
def setUp(self): self.proto1 = core_pb2.ContigInfo(name='p1', n_bases=10, pos_in_fasta=0) self.proto2 = core_pb2.ContigInfo(name='p2', n_bases=20, pos_in_fasta=1) self.protos = [self.proto1, self.proto2]
def test_from_contigs(self): contigs = [ core_pb2.ContigInfo(name='chr1', n_bases=10), core_pb2.ContigInfo(name='chr2', n_bases=5), ] self.assertCountEqual([ ranges.make_range('chr1', 0, 10), ranges.make_range('chr2', 0, 5), ], ranges.RangeSet.from_contigs(contigs))
def test_from_regions_not_empty(self): literals = ['chr1', 'chr2:10-20'] contig_map = { 'chr1': core_pb2.ContigInfo(name='chr1', n_bases=10), 'chr2': core_pb2.ContigInfo(name='chr2', n_bases=100), } self.assertItemsEqual( [ranges.make_range('chr1', 0, 10), ranges.make_range('chr2', 9, 20)], ranges.RangeSet.from_regions(literals, contig_map))
def test_contigs_n_bases(self): c1 = core_pb2.ContigInfo(name='c', n_bases=100, pos_in_fasta=0) c2 = core_pb2.ContigInfo(name='a', n_bases=50, pos_in_fasta=1) c3 = core_pb2.ContigInfo(name='b', n_bases=25, pos_in_fasta=2) self.assertEqual(100, ranges.contigs_n_bases([c1])) self.assertEqual(50, ranges.contigs_n_bases([c2])) self.assertEqual(25, ranges.contigs_n_bases([c3])) self.assertEqual(150, ranges.contigs_n_bases([c1, c2])) self.assertEqual(125, ranges.contigs_n_bases([c1, c3])) self.assertEqual(175, ranges.contigs_n_bases([c1, c2, c3]))
def setUp(self): self.out_fname = test_utils.test_tmpfile('output.vcf') self.options = core_pb2.VcfWriterOptions( contigs=[ core_pb2.ContigInfo(name='Chr1', n_bases=50, pos_in_fasta=0), core_pb2.ContigInfo(name='Chr2', n_bases=25, pos_in_fasta=1), ], sample_names=['Fido', 'Spot'], filters=[]) self.writer = vcf_writer.VcfWriter.to_file(self.out_fname, self.options) self.variant = test_utils.make_variant( chrom='Chr1', start=10, alleles=['A', 'C']) self.variant.calls.add(genotype=[0, 0], call_set_name='Fido') self.variant.calls.add(genotype=[0, 1], call_set_name='Spot')
def test_parse_literal_with_contig_map_and_bad_input_raises_exception( self, bad_literal): with self.assertRaises(ValueError): ranges.parse_literal( bad_literal, contig_map={ 'chr1': core_pb2.ContigInfo(name='chr1', n_bases=10) })
def write_variant_to_tempfile(self, variant): path = test_utils.test_tmpfile('test.vcf') writer = genomics_io.make_vcf_writer( outfile=path, contigs=[core_pb2.ContigInfo(name='20')], samples=[call.call_set_name for call in variant.calls], filters=[]) with writer: writer.write(variant) return path
def setUp(self): self.read1 = test_utils.make_read(bases='ACCGT', chrom='chr1', start=10, cigar='5M', mapq=50, quals=range(30, 35), name='read1') self.read2 = test_utils.make_read(bases='AACCTT', chrom='chr2', start=15, cigar='7M', mapq=40, quals=range(20, 26), name='read2') self.contigs = [ core_pb2.ContigInfo(name='chr1'), core_pb2.ContigInfo(name='chr2'), ]
def test_sort_ranges(self): contigs = [ core_pb2.ContigInfo(name='c', n_bases=100, pos_in_fasta=0), core_pb2.ContigInfo(name='a', n_bases=76, pos_in_fasta=1), core_pb2.ContigInfo(name='b', n_bases=121, pos_in_fasta=2), ] unsorted = ranges.parse_literals( ['a:10', 'c:20', 'b:30', 'b:10-15', 'b:10', 'a:5']) # Without contigs we sort the contigs by name lexicographically. self.assertEqual( ranges.parse_literals( ['a:5', 'a:10', 'b:10', 'b:10-15', 'b:30', 'c:20']), ranges.sorted_ranges(unsorted)) # With contigs we sort by the position of the contigs themselves. self.assertEqual( ranges.parse_literals( ['c:20', 'a:5', 'a:10', 'b:10', 'b:10-15', 'b:30']), ranges.sorted_ranges(unsorted, contigs))
def _make_contigs(specs): """Makes ContigInfo protos from specs. Args: specs: A list of 2- or 3-tuples. All tuples should be of the same length. If 2-element, these should be the name and length in basepairs of each contig, and their pos_in_fasta will be set to their index in the list. If the 3-element, the tuple should contain name, length, and pos_in_fasta. Returns: A list of ContigInfo protos, one for each spec in specs. """ if specs and len(specs[0]) == 3: return [ core_pb2.ContigInfo(name=name, n_bases=length, pos_in_fasta=i) for name, length, i in specs ] else: return [ core_pb2.ContigInfo(name=name, n_bases=length, pos_in_fasta=i) for i, (name, length) in enumerate(specs) ]
def test_writing_canned_variants(self): """Tests writing all the variants that are 'canned' in our tfrecord file.""" # This file is in the TF record format tfrecord_file = test_utils.genomics_core_testdata( 'test_samples.vcf.golden.tfrecord') writer_options = core_pb2.VcfWriterOptions( contigs=[ core_pb2.ContigInfo(name='chr1', n_bases=248956422), core_pb2.ContigInfo(name='chr2', n_bases=242193529), core_pb2.ContigInfo(name='chr3', n_bases=198295559), core_pb2.ContigInfo(name='chrX', n_bases=156040895) ], sample_names=['NA12878_18_99'], filters=[ core_pb2.VcfFilterInfo(id='LowQual'), core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL95.00to96.00'), core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL96.00to97.00'), core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL97.00to99.00'), core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.00to99.50'), core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.50to99.90'), core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.90to99.95'), core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.95to100.00+'), core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.95to100.00'), core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.50to99.60'), core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.60to99.80'), core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.80to99.90'), core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.90to99.95'), core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.95to100.00+'), core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.95to100.00'), ]) variant_records = list( io_utils.read_tfrecords(tfrecord_file, proto=variants_pb2.Variant)) out_fname = test_utils.test_tmpfile('output.vcf') with vcf_writer.VcfWriter.to_file(out_fname, writer_options) as writer: for record in variant_records[:5]: writer.write(record) # Check: are the variants written as expected? # pylint: disable=line-too-long expected_vcf_content = [ '##fileformat=VCFv4.2\n', '##FILTER=<ID=PASS,Description="All filters passed">\n', '##FILTER=<ID=LowQual,Description="">\n', '##FILTER=<ID=VQSRTrancheINDEL95.00to96.00,Description="">\n', '##FILTER=<ID=VQSRTrancheINDEL96.00to97.00,Description="">\n', '##FILTER=<ID=VQSRTrancheINDEL97.00to99.00,Description="">\n', '##FILTER=<ID=VQSRTrancheINDEL99.00to99.50,Description="">\n', '##FILTER=<ID=VQSRTrancheINDEL99.50to99.90,Description="">\n', '##FILTER=<ID=VQSRTrancheINDEL99.90to99.95,Description="">\n', '##FILTER=<ID=VQSRTrancheINDEL99.95to100.00+,Description="">\n', '##FILTER=<ID=VQSRTrancheINDEL99.95to100.00,Description="">\n', '##FILTER=<ID=VQSRTrancheSNP99.50to99.60,Description="">\n', '##FILTER=<ID=VQSRTrancheSNP99.60to99.80,Description="">\n', '##FILTER=<ID=VQSRTrancheSNP99.80to99.90,Description="">\n', '##FILTER=<ID=VQSRTrancheSNP99.90to99.95,Description="">\n', '##FILTER=<ID=VQSRTrancheSNP99.95to100.00+,Description="">\n', '##FILTER=<ID=VQSRTrancheSNP99.95to100.00,Description="">\n', '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n', '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n', '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth of all ' 'passing filters reads.">\n', '##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth of all ' 'passing filters reads for each allele.">\n', '##FORMAT=<ID=VAF,Number=A,Type=Float,Description=\"Variant allele ' 'fractions.">\n', '##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype ' 'likelihoods, log10 encoded">\n', '##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Genotype ' 'likelihoods, Phred encoded">\n', '##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of ' 'the interval">\n', '##contig=<ID=chr1,length=248956422>\n', '##contig=<ID=chr2,length=242193529>\n', '##contig=<ID=chr3,length=198295559>\n', '##contig=<ID=chrX,length=156040895>\n', '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA12878_18_99\n', 'chr1\t13613\t.\tT\tA\t39.88\tVQSRTrancheSNP99.90to99.95\t.\tGT:GQ:DP:AD:PL\t0/1:16:4:1,3:68,0,16\n', 'chr1\t13813\t.\tT\tG\t90.28\tPASS\t.\tGT:GQ:DP:AD:PL\t1/1:9:3:0,3:118,9,0\n', 'chr1\t13838\trs28428499\tC\tT\t62.74\tPASS\t.\tGT:GQ:DP:AD:PL\t1/1:6:2:0,2:90,6,0\n', 'chr1\t14397\trs756427959\tCTGT\tC\t37.73\tPASS\t.\tGT:GQ:DP:AD:PL\t0/1:75:5:3,2:75,0,152\n', 'chr1\t14522\t.\tG\tA\t49.77\tVQSRTrancheSNP99.60to99.80\t.\tGT:GQ:DP:AD:PL\t0/1:78:10:6,4:78,0,118\n' ] # pylint: enable=line-too-long with tf.gfile.GFile(out_fname, 'r') as f: self.assertEqual(f.readlines(), expected_vcf_content)
from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import absltest from deepvariant.core import ranges from deepvariant.core import test_utils from deepvariant.core.protos import core_pb2 from deepvariant.core.python import vcf_reader expected_sites_contigs = [ core_pb2.ContigInfo(name='chr1', pos_in_fasta=0, n_bases=248956422), core_pb2.ContigInfo(name='chr2', pos_in_fasta=1, n_bases=242193529), core_pb2.ContigInfo(name='chr3', pos_in_fasta=2, n_bases=198295559), core_pb2.ContigInfo(name='chr4', pos_in_fasta=3, n_bases=190214555), core_pb2.ContigInfo(name='chr5', pos_in_fasta=4, n_bases=181538259), core_pb2.ContigInfo(name='chr6', pos_in_fasta=5, n_bases=170805979), core_pb2.ContigInfo(name='chr7', pos_in_fasta=6, n_bases=159345973), core_pb2.ContigInfo(name='chr8', pos_in_fasta=7, n_bases=145138636), core_pb2.ContigInfo(name='chr9', pos_in_fasta=8, n_bases=138394717), core_pb2.ContigInfo(name='chr10', pos_in_fasta=9, n_bases=133797422), core_pb2.ContigInfo(name='chr11', pos_in_fasta=10, n_bases=135086622), core_pb2.ContigInfo(name='chr12', pos_in_fasta=11, n_bases=133275309), core_pb2.ContigInfo(name='chr13', pos_in_fasta=12, n_bases=114364328), core_pb2.ContigInfo(name='chr14', pos_in_fasta=13, n_bases=107043718), core_pb2.ContigInfo(name='chr15', pos_in_fasta=14, n_bases=101991189), core_pb2.ContigInfo(name='chr16', pos_in_fasta=15, n_bases=90338345),
from deepvariant import postprocess_variants from deepvariant import test_utils from deepvariant.core import genomics_math from deepvariant.core import io_utils from deepvariant.core import variantutils from deepvariant.core.protos import core_pb2 from deepvariant.protos import deepvariant_pb2 from deepvariant.testing import flagsaver FLAGS = flags.FLAGS _DEFAULT_SAMPLE_NAME = 'NA12878' # Test contigs for gVCF merging code. _CONTIGS = [ core_pb2.ContigInfo(name='1', n_bases=100), core_pb2.ContigInfo(name='2', n_bases=200), core_pb2.ContigInfo(name='10', n_bases=300), ] def setUpModule(): test_utils.init() def _create_variant(ref_name, start, ref_base, alt_bases, qual, filter_field, genotype, gq, likelihoods): """Creates a Variant record for testing. Args: ref_name: reference name for this variant
def test_sam_contigs(self): reader = sam_reader.SamReader.from_file(self.bam, self.options) with reader: self.assertEqual([ core_pb2.ContigInfo(name='chrM', pos_in_fasta=0, n_bases=16571), core_pb2.ContigInfo( name='chr1', pos_in_fasta=1, n_bases=249250621), core_pb2.ContigInfo( name='chr2', pos_in_fasta=2, n_bases=243199373), core_pb2.ContigInfo( name='chr3', pos_in_fasta=3, n_bases=198022430), core_pb2.ContigInfo( name='chr4', pos_in_fasta=4, n_bases=191154276), core_pb2.ContigInfo( name='chr5', pos_in_fasta=5, n_bases=180915260), core_pb2.ContigInfo( name='chr6', pos_in_fasta=6, n_bases=171115067), core_pb2.ContigInfo( name='chr7', pos_in_fasta=7, n_bases=159138663), core_pb2.ContigInfo( name='chr8', pos_in_fasta=8, n_bases=146364022), core_pb2.ContigInfo( name='chr9', pos_in_fasta=9, n_bases=141213431), core_pb2.ContigInfo( name='chr10', pos_in_fasta=10, n_bases=135534747), core_pb2.ContigInfo( name='chr11', pos_in_fasta=11, n_bases=135006516), core_pb2.ContigInfo( name='chr12', pos_in_fasta=12, n_bases=133851895), core_pb2.ContigInfo( name='chr13', pos_in_fasta=13, n_bases=115169878), core_pb2.ContigInfo( name='chr14', pos_in_fasta=14, n_bases=107349540), core_pb2.ContigInfo( name='chr15', pos_in_fasta=15, n_bases=102531392), core_pb2.ContigInfo( name='chr16', pos_in_fasta=16, n_bases=90354753), core_pb2.ContigInfo( name='chr17', pos_in_fasta=17, n_bases=81195210), core_pb2.ContigInfo( name='chr18', pos_in_fasta=18, n_bases=78077248), core_pb2.ContigInfo( name='chr19', pos_in_fasta=19, n_bases=59128983), core_pb2.ContigInfo( name='chr20', pos_in_fasta=20, n_bases=63025520), core_pb2.ContigInfo( name='chr21', pos_in_fasta=21, n_bases=48129895), core_pb2.ContigInfo( name='chr22', pos_in_fasta=22, n_bases=51304566), core_pb2.ContigInfo( name='chrX', pos_in_fasta=23, n_bases=155270560), core_pb2.ContigInfo( name='chrY', pos_in_fasta=24, n_bases=59373566), ], reader.contigs)
def write_test_protos(self, filename): protos = [core_pb2.ContigInfo(name=str(i)) for i in range(10)] path = test_utils.test_tmpfile(filename) io.write_tfrecords(protos, path) return protos, path