예제 #1
0
 def test_parse_literal_with_contig_map(self, contig_name, expected):
   contig_map = {
       'chr1': core_pb2.ContigInfo(name='chr1', n_bases=10),
       'chr2': core_pb2.ContigInfo(name='chr2', n_bases=5),
   }
   self.assertEqual(
       ranges.parse_literal(contig_name, contig_map=contig_map), expected)
예제 #2
0
 def setUp(self):
     self.proto1 = core_pb2.ContigInfo(name='p1',
                                       n_bases=10,
                                       pos_in_fasta=0)
     self.proto2 = core_pb2.ContigInfo(name='p2',
                                       n_bases=20,
                                       pos_in_fasta=1)
     self.protos = [self.proto1, self.proto2]
예제 #3
0
 def test_from_contigs(self):
   contigs = [
       core_pb2.ContigInfo(name='chr1', n_bases=10),
       core_pb2.ContigInfo(name='chr2', n_bases=5),
   ]
   self.assertCountEqual([
       ranges.make_range('chr1', 0, 10),
       ranges.make_range('chr2', 0, 5),
   ], ranges.RangeSet.from_contigs(contigs))
예제 #4
0
 def test_from_regions_not_empty(self):
   literals = ['chr1', 'chr2:10-20']
   contig_map = {
       'chr1': core_pb2.ContigInfo(name='chr1', n_bases=10),
       'chr2': core_pb2.ContigInfo(name='chr2', n_bases=100),
   }
   self.assertItemsEqual(
       [ranges.make_range('chr1', 0, 10),
        ranges.make_range('chr2', 9, 20)],
       ranges.RangeSet.from_regions(literals, contig_map))
예제 #5
0
 def test_contigs_n_bases(self):
   c1 = core_pb2.ContigInfo(name='c', n_bases=100, pos_in_fasta=0)
   c2 = core_pb2.ContigInfo(name='a', n_bases=50, pos_in_fasta=1)
   c3 = core_pb2.ContigInfo(name='b', n_bases=25, pos_in_fasta=2)
   self.assertEqual(100, ranges.contigs_n_bases([c1]))
   self.assertEqual(50, ranges.contigs_n_bases([c2]))
   self.assertEqual(25, ranges.contigs_n_bases([c3]))
   self.assertEqual(150, ranges.contigs_n_bases([c1, c2]))
   self.assertEqual(125, ranges.contigs_n_bases([c1, c3]))
   self.assertEqual(175, ranges.contigs_n_bases([c1, c2, c3]))
예제 #6
0
 def setUp(self):
   self.out_fname = test_utils.test_tmpfile('output.vcf')
   self.options = core_pb2.VcfWriterOptions(
       contigs=[
           core_pb2.ContigInfo(name='Chr1', n_bases=50, pos_in_fasta=0),
           core_pb2.ContigInfo(name='Chr2', n_bases=25, pos_in_fasta=1),
       ],
       sample_names=['Fido', 'Spot'],
       filters=[])
   self.writer = vcf_writer.VcfWriter.to_file(self.out_fname, self.options)
   self.variant = test_utils.make_variant(
       chrom='Chr1', start=10, alleles=['A', 'C'])
   self.variant.calls.add(genotype=[0, 0], call_set_name='Fido')
   self.variant.calls.add(genotype=[0, 1], call_set_name='Spot')
예제 #7
0
 def test_parse_literal_with_contig_map_and_bad_input_raises_exception(
     self, bad_literal):
   with self.assertRaises(ValueError):
     ranges.parse_literal(
         bad_literal,
         contig_map={
             'chr1': core_pb2.ContigInfo(name='chr1', n_bases=10)
         })
예제 #8
0
 def write_variant_to_tempfile(self, variant):
   path = test_utils.test_tmpfile('test.vcf')
   writer = genomics_io.make_vcf_writer(
       outfile=path,
       contigs=[core_pb2.ContigInfo(name='20')],
       samples=[call.call_set_name for call in variant.calls],
       filters=[])
   with writer:
     writer.write(variant)
   return path
예제 #9
0
 def setUp(self):
     self.read1 = test_utils.make_read(bases='ACCGT',
                                       chrom='chr1',
                                       start=10,
                                       cigar='5M',
                                       mapq=50,
                                       quals=range(30, 35),
                                       name='read1')
     self.read2 = test_utils.make_read(bases='AACCTT',
                                       chrom='chr2',
                                       start=15,
                                       cigar='7M',
                                       mapq=40,
                                       quals=range(20, 26),
                                       name='read2')
     self.contigs = [
         core_pb2.ContigInfo(name='chr1'),
         core_pb2.ContigInfo(name='chr2'),
     ]
예제 #10
0
  def test_sort_ranges(self):
    contigs = [
        core_pb2.ContigInfo(name='c', n_bases=100, pos_in_fasta=0),
        core_pb2.ContigInfo(name='a', n_bases=76, pos_in_fasta=1),
        core_pb2.ContigInfo(name='b', n_bases=121, pos_in_fasta=2),
    ]
    unsorted = ranges.parse_literals(
        ['a:10', 'c:20', 'b:30', 'b:10-15', 'b:10', 'a:5'])

    # Without contigs we sort the contigs by name lexicographically.
    self.assertEqual(
        ranges.parse_literals(
            ['a:5', 'a:10', 'b:10', 'b:10-15', 'b:30', 'c:20']),
        ranges.sorted_ranges(unsorted))

    # With contigs we sort by the position of the contigs themselves.
    self.assertEqual(
        ranges.parse_literals(
            ['c:20', 'a:5', 'a:10', 'b:10', 'b:10-15', 'b:30']),
        ranges.sorted_ranges(unsorted, contigs))
def _make_contigs(specs):
    """Makes ContigInfo protos from specs.

  Args:
    specs: A list of 2- or 3-tuples. All tuples should be of the same length.
      If 2-element, these should be the name and length in basepairs of each
      contig, and their pos_in_fasta will be set to their index in the list. If
      the 3-element, the tuple should contain name, length, and pos_in_fasta.

  Returns:
    A list of ContigInfo protos, one for each spec in specs.
  """
    if specs and len(specs[0]) == 3:
        return [
            core_pb2.ContigInfo(name=name, n_bases=length, pos_in_fasta=i)
            for name, length, i in specs
        ]
    else:
        return [
            core_pb2.ContigInfo(name=name, n_bases=length, pos_in_fasta=i)
            for i, (name, length) in enumerate(specs)
        ]
예제 #12
0
  def test_writing_canned_variants(self):
    """Tests writing all the variants that are 'canned' in our tfrecord file."""

    # This file is in the TF record format
    tfrecord_file = test_utils.genomics_core_testdata(
        'test_samples.vcf.golden.tfrecord')

    writer_options = core_pb2.VcfWriterOptions(
        contigs=[
            core_pb2.ContigInfo(name='chr1', n_bases=248956422),
            core_pb2.ContigInfo(name='chr2', n_bases=242193529),
            core_pb2.ContigInfo(name='chr3', n_bases=198295559),
            core_pb2.ContigInfo(name='chrX', n_bases=156040895)
        ],
        sample_names=['NA12878_18_99'],
        filters=[
            core_pb2.VcfFilterInfo(id='LowQual'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL95.00to96.00'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL96.00to97.00'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL97.00to99.00'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.00to99.50'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.50to99.90'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.90to99.95'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.95to100.00+'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheINDEL99.95to100.00'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.50to99.60'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.60to99.80'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.80to99.90'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.90to99.95'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.95to100.00+'),
            core_pb2.VcfFilterInfo(id='VQSRTrancheSNP99.95to100.00'),
        ])

    variant_records = list(
        io_utils.read_tfrecords(tfrecord_file, proto=variants_pb2.Variant))
    out_fname = test_utils.test_tmpfile('output.vcf')
    with vcf_writer.VcfWriter.to_file(out_fname, writer_options) as writer:
      for record in variant_records[:5]:
        writer.write(record)

    # Check: are the variants written as expected?
    # pylint: disable=line-too-long
    expected_vcf_content = [
        '##fileformat=VCFv4.2\n',
        '##FILTER=<ID=PASS,Description="All filters passed">\n',
        '##FILTER=<ID=LowQual,Description="">\n',
        '##FILTER=<ID=VQSRTrancheINDEL95.00to96.00,Description="">\n',
        '##FILTER=<ID=VQSRTrancheINDEL96.00to97.00,Description="">\n',
        '##FILTER=<ID=VQSRTrancheINDEL97.00to99.00,Description="">\n',
        '##FILTER=<ID=VQSRTrancheINDEL99.00to99.50,Description="">\n',
        '##FILTER=<ID=VQSRTrancheINDEL99.50to99.90,Description="">\n',
        '##FILTER=<ID=VQSRTrancheINDEL99.90to99.95,Description="">\n',
        '##FILTER=<ID=VQSRTrancheINDEL99.95to100.00+,Description="">\n',
        '##FILTER=<ID=VQSRTrancheINDEL99.95to100.00,Description="">\n',
        '##FILTER=<ID=VQSRTrancheSNP99.50to99.60,Description="">\n',
        '##FILTER=<ID=VQSRTrancheSNP99.60to99.80,Description="">\n',
        '##FILTER=<ID=VQSRTrancheSNP99.80to99.90,Description="">\n',
        '##FILTER=<ID=VQSRTrancheSNP99.90to99.95,Description="">\n',
        '##FILTER=<ID=VQSRTrancheSNP99.95to100.00+,Description="">\n',
        '##FILTER=<ID=VQSRTrancheSNP99.95to100.00,Description="">\n',
        '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n',
        '##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n',
        '##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth of all '
        'passing filters reads.">\n',
        '##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Read depth of all '
        'passing filters reads for each allele.">\n',
        '##FORMAT=<ID=VAF,Number=A,Type=Float,Description=\"Variant allele '
        'fractions.">\n',
        '##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype '
        'likelihoods, log10 encoded">\n',
        '##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Genotype '
        'likelihoods, Phred encoded">\n',
        '##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of '
        'the interval">\n', '##contig=<ID=chr1,length=248956422>\n',
        '##contig=<ID=chr2,length=242193529>\n',
        '##contig=<ID=chr3,length=198295559>\n',
        '##contig=<ID=chrX,length=156040895>\n',
        '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA12878_18_99\n',
        'chr1\t13613\t.\tT\tA\t39.88\tVQSRTrancheSNP99.90to99.95\t.\tGT:GQ:DP:AD:PL\t0/1:16:4:1,3:68,0,16\n',
        'chr1\t13813\t.\tT\tG\t90.28\tPASS\t.\tGT:GQ:DP:AD:PL\t1/1:9:3:0,3:118,9,0\n',
        'chr1\t13838\trs28428499\tC\tT\t62.74\tPASS\t.\tGT:GQ:DP:AD:PL\t1/1:6:2:0,2:90,6,0\n',
        'chr1\t14397\trs756427959\tCTGT\tC\t37.73\tPASS\t.\tGT:GQ:DP:AD:PL\t0/1:75:5:3,2:75,0,152\n',
        'chr1\t14522\t.\tG\tA\t49.77\tVQSRTrancheSNP99.60to99.80\t.\tGT:GQ:DP:AD:PL\t0/1:78:10:6,4:78,0,118\n'
    ]
    # pylint: enable=line-too-long

    with tf.gfile.GFile(out_fname, 'r') as f:
      self.assertEqual(f.readlines(), expected_vcf_content)
예제 #13
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function



from absl.testing import absltest

from deepvariant.core import ranges
from deepvariant.core import test_utils
from deepvariant.core.protos import core_pb2
from deepvariant.core.python import vcf_reader

expected_sites_contigs = [
    core_pb2.ContigInfo(name='chr1', pos_in_fasta=0, n_bases=248956422),
    core_pb2.ContigInfo(name='chr2', pos_in_fasta=1, n_bases=242193529),
    core_pb2.ContigInfo(name='chr3', pos_in_fasta=2, n_bases=198295559),
    core_pb2.ContigInfo(name='chr4', pos_in_fasta=3, n_bases=190214555),
    core_pb2.ContigInfo(name='chr5', pos_in_fasta=4, n_bases=181538259),
    core_pb2.ContigInfo(name='chr6', pos_in_fasta=5, n_bases=170805979),
    core_pb2.ContigInfo(name='chr7', pos_in_fasta=6, n_bases=159345973),
    core_pb2.ContigInfo(name='chr8', pos_in_fasta=7, n_bases=145138636),
    core_pb2.ContigInfo(name='chr9', pos_in_fasta=8, n_bases=138394717),
    core_pb2.ContigInfo(name='chr10', pos_in_fasta=9, n_bases=133797422),
    core_pb2.ContigInfo(name='chr11', pos_in_fasta=10, n_bases=135086622),
    core_pb2.ContigInfo(name='chr12', pos_in_fasta=11, n_bases=133275309),
    core_pb2.ContigInfo(name='chr13', pos_in_fasta=12, n_bases=114364328),
    core_pb2.ContigInfo(name='chr14', pos_in_fasta=13, n_bases=107043718),
    core_pb2.ContigInfo(name='chr15', pos_in_fasta=14, n_bases=101991189),
    core_pb2.ContigInfo(name='chr16', pos_in_fasta=15, n_bases=90338345),
from deepvariant import postprocess_variants
from deepvariant import test_utils
from deepvariant.core import genomics_math
from deepvariant.core import io_utils
from deepvariant.core import variantutils
from deepvariant.core.protos import core_pb2
from deepvariant.protos import deepvariant_pb2
from deepvariant.testing import flagsaver

FLAGS = flags.FLAGS

_DEFAULT_SAMPLE_NAME = 'NA12878'

# Test contigs for gVCF merging code.
_CONTIGS = [
    core_pb2.ContigInfo(name='1', n_bases=100),
    core_pb2.ContigInfo(name='2', n_bases=200),
    core_pb2.ContigInfo(name='10', n_bases=300),
]


def setUpModule():
  test_utils.init()


def _create_variant(ref_name, start, ref_base, alt_bases, qual, filter_field,
                    genotype, gq, likelihoods):
  """Creates a Variant record for testing.

  Args:
    ref_name: reference name for this variant
 def test_sam_contigs(self):
     reader = sam_reader.SamReader.from_file(self.bam, self.options)
     with reader:
         self.assertEqual([
             core_pb2.ContigInfo(name='chrM', pos_in_fasta=0,
                                 n_bases=16571),
             core_pb2.ContigInfo(
                 name='chr1', pos_in_fasta=1, n_bases=249250621),
             core_pb2.ContigInfo(
                 name='chr2', pos_in_fasta=2, n_bases=243199373),
             core_pb2.ContigInfo(
                 name='chr3', pos_in_fasta=3, n_bases=198022430),
             core_pb2.ContigInfo(
                 name='chr4', pos_in_fasta=4, n_bases=191154276),
             core_pb2.ContigInfo(
                 name='chr5', pos_in_fasta=5, n_bases=180915260),
             core_pb2.ContigInfo(
                 name='chr6', pos_in_fasta=6, n_bases=171115067),
             core_pb2.ContigInfo(
                 name='chr7', pos_in_fasta=7, n_bases=159138663),
             core_pb2.ContigInfo(
                 name='chr8', pos_in_fasta=8, n_bases=146364022),
             core_pb2.ContigInfo(
                 name='chr9', pos_in_fasta=9, n_bases=141213431),
             core_pb2.ContigInfo(
                 name='chr10', pos_in_fasta=10, n_bases=135534747),
             core_pb2.ContigInfo(
                 name='chr11', pos_in_fasta=11, n_bases=135006516),
             core_pb2.ContigInfo(
                 name='chr12', pos_in_fasta=12, n_bases=133851895),
             core_pb2.ContigInfo(
                 name='chr13', pos_in_fasta=13, n_bases=115169878),
             core_pb2.ContigInfo(
                 name='chr14', pos_in_fasta=14, n_bases=107349540),
             core_pb2.ContigInfo(
                 name='chr15', pos_in_fasta=15, n_bases=102531392),
             core_pb2.ContigInfo(
                 name='chr16', pos_in_fasta=16, n_bases=90354753),
             core_pb2.ContigInfo(
                 name='chr17', pos_in_fasta=17, n_bases=81195210),
             core_pb2.ContigInfo(
                 name='chr18', pos_in_fasta=18, n_bases=78077248),
             core_pb2.ContigInfo(
                 name='chr19', pos_in_fasta=19, n_bases=59128983),
             core_pb2.ContigInfo(
                 name='chr20', pos_in_fasta=20, n_bases=63025520),
             core_pb2.ContigInfo(
                 name='chr21', pos_in_fasta=21, n_bases=48129895),
             core_pb2.ContigInfo(
                 name='chr22', pos_in_fasta=22, n_bases=51304566),
             core_pb2.ContigInfo(
                 name='chrX', pos_in_fasta=23, n_bases=155270560),
             core_pb2.ContigInfo(
                 name='chrY', pos_in_fasta=24, n_bases=59373566),
         ], reader.contigs)
예제 #16
0
 def write_test_protos(self, filename):
     protos = [core_pb2.ContigInfo(name=str(i)) for i in range(10)]
     path = test_utils.test_tmpfile(filename)
     io.write_tfrecords(protos, path)
     return protos, path