def test_indels(self): ''' Indel handling ''' row = [ 'thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,10,1' ] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['GA'])) row = [ 'thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,5,2' ] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['G', 'GA'])) row = [ 'thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,5,3' ] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['G', 'GA', 'T'])) row = [ 'thecontig', '105000', '.', 'AT', 'A', '.', '.', '.', 'GT:AD', '0/1:2,10' ] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'])) self.assertEqual(out, [('thecontig', 105000, 105001, 's1', ['A'])])
def test_dp_inaccurate(self): ''' The DP might not equal the sum of the ADs and that's okay apparently. ''' row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:5:2,2'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1)) self.assertEqual(set(out[0][4]), set(['G', 'A'])) row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:2:3,3'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['G', 'A'])) row = ['chr10', '105', '.', 'G', 'A', '.', '.', '.', 'GT:DP:AD', '0/1/1:10:2,0'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(out, [])
def test_indels(self): ''' Indel handling ''' row = ['thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,10,1'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['GA'])) row = ['thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,5,2'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['G', 'GA'])) row = ['thecontig', '105000', '.', 'G', 'GA,T', '.', '.', '.', 'GT:AD', '0/1:5,5,3'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['G', 'GA', 'T'])) row = ['thecontig', '105000', '.', 'AT', 'A', '.', '.', '.', 'GT:AD', '0/1:2,10'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'])) self.assertEqual(out, [('thecontig', 105000, 105001, 's1', ['A'])])
def test_invariant_sites(self): ''' Invariant site handling is slightly different in code, so test it specially. ''' row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT:DP', '0/0:3'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])]) row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', '0/0'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=0)) self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])]) row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', '0/0'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1)) self.assertEqual(out, []) row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT', './.'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1)) self.assertEqual(out, []) row = ['LASV.l', '1', '.', 'T', '.', '.', '.', '.', 'GT:DP', './.:10'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1)) self.assertEqual(out, [('LASV.l', 1, 1, 's1', ['T'])])
def test_het_edgecases(self): ''' The interplay between min_coverage and major_cutoff is not obvious, here's what I understand from Kristian about the desired behavior. for min_dp=3: 3G, 4A, 5C -> G/A/C 2G, 3A, 3T -> A/T 2A, 2T -> no call 2G, 3C -> C 2A, 3C, 4T -> T for min_dp=2: 2A, 2T -> A/T ''' row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:3,4,5,0'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['G', 'A', 'C'])) row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:2,3,0,3'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(set(out[0][4]), set(['A', 'T'])) row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,0,2'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(out, []) row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,0,2'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=2)) self.assertEqual(set(out[0][4]), set(['A', 'T'])) row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:2,0,3,0'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(out[0][4], ['C']) row = ['thecontig', '105000', '.', 'G', 'A,C,T', '.', '.', '.', 'GT:AD', '0/1:0,2,3,4'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=3)) self.assertEqual(out[0][4], ['T'])
def test_missing_dp(self): ''' VCF files might contain rows with no calls or any kind of data and that's okay. ''' row = ['chr10', '105', '.', 'G', '.', '.', '.', '.', 'GT', './.'] out = list(assembly.vcfrow_parse_and_call_snps(row, ['s1'], min_dp=1)) self.assertEqual(out, [])