Ejemplo n.º 1
0
 def test_indels(self):
     ''' Indel handling '''
     row = [
         'thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD',
         '0/1:5,10,1'
     ]
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['GA']))
     row = [
         'thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD',
         '0/1:5,5,2'
     ]
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['G', 'GA']))
     row = [
         'thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD',
         '0/1:5,5,3'
     ]
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['G', 'GA', 'T']))
     row = [
         'thecontig', '105000', '.', 'AT', 'A', '.', '.', '.', 'GT:AD',
         '0/1:2,10'
     ]
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1']))
     self.assertEqual(out, [('thecontig', 105000, 105001, 's1', ['A'])])
Ejemplo n.º 2
0
 def test_dp_inaccurate(self):
     ''' The DP might not equal the sum of the ADs and that's okay apparently. '''
     row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:5:2,2']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(set(out[0][4]), set(['G', 'A']))
     row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:2:3,3']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['G', 'A']))
     row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:10:2,0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out, [])
Ejemplo n.º 3
0
 def test_dp_inaccurate(self):
     ''' The DP might not equal the sum of the ADs and that's okay apparently. '''
     row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:5:2,2']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(set(out[0][4]), set(['G', 'A']))
     row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:2:3,3']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['G', 'A']))
     row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:10:2,0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out, [])
Ejemplo n.º 4
0
 def test_indels(self):
     ''' Indel handling '''
     row = ['thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,10,1']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['GA']))
     row = ['thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,5,2']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['G', 'GA']))
     row = ['thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,5,3']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['G', 'GA', 'T']))
     row = ['thecontig', '105000', '.', 'AT', 'A', '.', '.', '.', 'GT:AD', '0/1:2,10']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1']))
     self.assertEqual(out, [('thecontig', 105000, 105001, 's1', ['A'])])
Ejemplo n.º 5
0
 def test_invariant_sites(self):
     ''' Invariant site handling is slightly different in code, so test it specially. '''
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT:DP', '0/0:3']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])])
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', '0/0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=0))
     self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])])
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', '0/0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(out, [])
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', './.']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(out, [])
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT:DP', './.:10']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])])
Ejemplo n.º 6
0
 def test_invariant_sites(self):
     ''' Invariant site handling is slightly different in code, so test it specially. '''
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT:DP', '0/0:3']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])])
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', '0/0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=0))
     self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])])
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', '0/0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(out, [])
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', './.']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(out, [])
     row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT:DP', './.:10']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])])
Ejemplo n.º 7
0
 def test_het_edgecases(self):
     ''' The interplay between min_coverage and major_cutoff is not obvious, here's
         what I understand from Kristian about the desired behavior.
         for min_dp=3:
             3G, 4A, 5C ->  G/A/C
             2G, 3A, 3T -> A/T
             2A, 2T -> no call
             2G, 3C -> C
             2A, 3C, 4T -> T
         for min_dp=2:
             2A, 2T -> A/T
      '''
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:3,4,5,0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['G', 'A', 'C']))
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:2,3,0,3']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['A', 'T']))
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,0,2']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out, [])
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,0,2']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=2))
     self.assertEqual(set(out[0][4]), set(['A', 'T']))
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:2,0,3,0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out[0][4], ['C'])
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,3,4']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out[0][4], ['T'])
Ejemplo n.º 8
0
 def test_het_edgecases(self):
     ''' The interplay between min_coverage and major_cutoff is not obvious, here's
         what I understand from Kristian about the desired behavior.
         for min_dp=3:
             3G, 4A, 5C ->  G/A/C
             2G, 3A, 3T -> A/T
             2A, 2T -> no call
             2G, 3C -> C
             2A, 3C, 4T -> T
         for min_dp=2:
             2A, 2T -> A/T
      '''
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:3,4,5,0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['G', 'A', 'C']))
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:2,3,0,3']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(set(out[0][4]), set(['A', 'T']))
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,0,2']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out, [])
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,0,2']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=2))
     self.assertEqual(set(out[0][4]), set(['A', 'T']))
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:2,0,3,0']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out[0][4], ['C'])
     row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,3,4']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3))
     self.assertEqual(out[0][4], ['T'])
Ejemplo n.º 9
0
 def test_missing_dp(self):
     ''' VCF files might contain rows with no calls or any kind of data and that's okay. '''
     row = ['chr10', '105', '.', 'G', '.', '.', '.', '.', 'GT', './.']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(out, [])
Ejemplo n.º 10
0
 def test_missing_dp(self):
     ''' VCF files might contain rows with no calls or any kind of data and that's okay. '''
     row = ['chr10', '105', '.', 'G', '.', '.', '.', '.', 'GT', './.']
     out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1))
     self.assertEqual(out, [])