Ejemplo n.º 1
0
 def test_005_check_trim_ref(self):
     v_orig = Variant('20', 14369, 'ATCGG', alt=['ATAGG', 'ATGGG'])
     v_expt = Variant('20', 14371, 'C', alt=['A', 'G'])
     v_trim = v_orig.trim()
     self.assertEqual(v_expt, v_trim,
                      'Trimming failed for {}.'.format(v_expt))
Ejemplo n.º 2
0
 def test_006_check_trim_ref(self):
     v_orig = Variant('20', 14369, 'CCTG', alt=['C'])
     v_expt = v_orig
     v_trim = v_orig.trim()
     self.assertEqual(v_expt, v_trim,
                      'Trimming failed for {}.'.format(v_expt))
Ejemplo n.º 3
0
 def test_001_check_trim_start(self):
     v_orig = Variant('20', 14369, 'GGC', alt=['GGA'])
     v_expt = Variant('20', 14371, 'C', alt=['A'])
     v_trim = v_orig.trim()
     self.assertEqual(v_expt, v_trim,
                      'Trimming failed for {}.'.format(v_expt))
Ejemplo n.º 4
0
 def test_002_check_trim_end(self):
     v_orig = Variant('20', 14369, 'CGG', alt=['AGG'])
     v_expt = Variant('20', 14369, 'C', alt=['A'])
     v_trim = v_orig.trim()
     self.assertEqual(v_expt, v_trim,
                      'Trimming failed for {}.'.format(v_expt))
Ejemplo n.º 5
0
 def test_raises(self):
     chrom = 'my_chrom'
     ref_seq = 'ATGCTACTGC'
     var = Variant(chrom, 2, 'GT', 'G')  # ref should be GC
     with self.assertRaises(ValueError):
         get_padded_haplotypes(var, ref_seq, 2)
Ejemplo n.º 6
0
    def test_003_check_merge_multi(self):
        ref_seq = 'ATGGTATGCGATTGACC'
        chrom = 'chrom1'

        h1 = [
            Variant(chrom,
                    0,
                    'ATG',
                    alt='G',
                    qual=5,
                    genotype_data={'GT': '1/1'}),
            Variant(chrom,
                    4,
                    'T',
                    alt='G',
                    qual=5,
                    genotype_data={'GT': '1/1'}),
            Variant(chrom,
                    7,
                    'G',
                    alt='GG',
                    qual=5,
                    genotype_data={'GT': '1/1'}),
            Variant(chrom,
                    9,
                    ref_seq[9],
                    alt=ref_seq[9] + 'T',
                    qual=5,
                    genotype_data={'GT': '1/1'}),
        ]
        h2 = [
            Variant(chrom,
                    1,
                    'T',
                    alt='TT',
                    qual=10,
                    genotype_data={'GT': '1/1'}),
            Variant(chrom,
                    2,
                    ref_seq[2:10],
                    alt=ref_seq[2],
                    qual=10,
                    genotype_data={'GT': '1/1'}),
        ]

        # POS  0    1   2   3   4   5   6   7   8   9   10
        # REF  A    T   G   G   T   A   T   G   C   G   A
        # H1   -    -   G   G   g   A   T   Gg  C   Gt  A
        # H2   A    Tt  G   -   -   -   -   -   -   -   A

        # expected merged variants
        ref_expt = "ATGGTATGCG"
        alt1_expt = "GGGATGGCGT"
        alt2_expt = "ATTG"

        expt = Variant(chrom,
                       0,
                       ref_expt,
                       alt=[alt1_expt, alt2_expt],
                       qual=7.5,
                       genotype_data={'GT': '1|2'})

        comb_interval, trees = self.intervaltree_prep(h1, h2, ref_seq)
        # preserve phase otherwise alts could be switched around
        got = _merge_variants(comb_interval,
                              trees,
                              ref_seq,
                              discard_phase=False)
        for key in ('chrom', 'pos', 'ref', 'qual', 'alt', 'gt', 'phased'):
            expected = getattr(expt, key)
            result = getattr(got, key)
            self.assertEqual(
                expected, result,
                'Merging failed for {}:{} {}.'.format(expt.chrom, expt.pos + 1,
                                                      key))
Ejemplo n.º 7
0
 def test_050_check_variant_contents(self):
     expected = [
         Variant(
             'chr1',
             14369,
             'G',
             alt=['A'],
             ident='rs6054257',
             qual=29,
             filt='PASS',
             info={
                 'NS': 3,
                 'DP': 14,
                 'AF': 0.5,
                 'DB': True,
                 'H2': True
             },
             genotype_data=OrderedDict([('GT', '1|0'), ('GQ', '48'),
                                        ('DP', '8'), ('HQ', '51,51')]),
         ),
         Variant(
             'chr2',
             17329,
             'T',
             alt=['A'],
             ident='.',
             qual=3,
             filt='q10',
             info={
                 'NS': 3,
                 'DP': 11,
                 'AF': 0.017
             },
             genotype_data=OrderedDict([('GT', '0|0'), ('GQ', '49'),
                                        ('DP', '3'), ('HQ', '58,50')]),
         ),
         Variant(
             'chr10',
             1110695,
             'A',
             alt=['G', 'T'],
             ident='rs6040355',
             qual=67,
             filt='PASS',
             info={
                 'NS': 2,
                 'DP': 10,
                 'AF': [0.333, 0.667],
                 'AA': 'T',
                 'DB': True
             },
             genotype_data=OrderedDict([('GT', '1|2'), ('GQ', '21'),
                                        ('DP', '6'), ('HQ', '23,27')]),
         ),
         Variant(
             'chr20',
             1230236,
             'T',
             alt=['.'],
             ident='.',
             qual=47,
             filt='PASS',
             info={
                 'NS': 3,
                 'DP': 13,
                 'AA': 'T'
             },
             genotype_data=OrderedDict([('GT', '0|0'), ('GQ', '54'),
                                        ('DP', '7'), ('HQ', '56,60')]),
         ),
         Variant(
             'chrX',
             1234566,
             'GTCT',
             alt=['G', 'GTACT'],
             ident='microsat1',
             qual=50,
             filt='PASS',
             info={
                 'NS': 3,
                 'DP': 9,
                 'AA': 'G'
             },
             genotype_data=OrderedDict([('GT', '1/1'), ('GQ', '40'),
                                        ('DP', '3')]),
         )
     ]
     result = list(self.vcf_reader.fetch())
     self.assertSequenceEqual(result, expected)
Ejemplo n.º 8
0
 def test_015_empty_info(self):
     params = deepcopy(self.base_parameters)
     del params['info']
     variant = Variant(**params)
     self.assertEqual(variant.info_string, ".")
Ejemplo n.º 9
0
 def _make_variant(self, pos, ref, alt, gt):
     return Variant(self.chrom, pos, ref, alt,
                    genotype_data={'GT': '{}|{}'.format(*gt),
                                   'GQ': self.qual},
                    info=self.info)