def setUp(self): self.variant = Variant(chrom='chr1', pos='14752', id_='.', ref='G', alt='A', qual='.', filter_='weak_evidence', info='DP=236;FOO', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,4:0.063', 'TUMOR': '0/1:113,4:0.063' })
def parse_vcf(stream: Iterable[str]) -> VCF: header: List[str] = [] sample_names: List[str] = [] variants: List[Variant] = [] for row in stream: if row.startswith('##'): header.append(row.rstrip('\n')) elif row.startswith('#'): sample_names = row.rstrip('\n').split('\t')[9:] else: chrom, pos, id_, ref, alt, qual, filter_, info, format_, *samples = row.rstrip( '\n').split('\t') variants.append( Variant(chrom=chrom, pos=pos, id_=id_, ref=ref, alt=alt, qual=qual, filter_=filter_, info=info, format_=format_, samples=dict(zip(sample_names, samples)))) return VCF(header=header, sample_names=sample_names, variants=variants)
def test_variant_filter_status(self): non_filtered_variant = Variant(chrom='chr1', pos='11117039', id_='.', ref='C', alt='A', qual='.', filter_='PASS', info='DP=264', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:128,0:9.577e-03', 'TUMOR': '0/1:99,33:0.204' }) self.assertTrue(self.variant.is_filtered()) self.assertFalse(non_filtered_variant.is_filtered())
def test_triager_multiple_variants_multiple_evidence(self): vcf = VCF( header=[], sample_names=[], variants=[ Variant(chrom='chr1', pos='100', id_='.', ref='A', alt='T', qual='.', filter_='PASS', info='', format_='', samples={}), Variant(chrom='chr2', pos='500', id_='.', ref='G', alt='GATATA', qual='.', filter_='weak_evidence', info='', format_='', samples={}), Variant(chrom='chr3', pos='7500', id_='.', ref='ACT', alt='A', qual='.', filter_='germline', info='', format_='', samples={}), Variant(chrom='chr4', pos='1980', id_='.', ref='GGGG', alt='G', qual='.', filter_='haplotype', info='', format_='', samples={}), Variant(chrom='chr5', pos='5700', id_='.', ref='CTCTCT', alt='C', qual='.', filter_='slippage', info='', format_='', samples={}), ] ) triager = Triager(evidence={ 'X': [ Variant(chrom='chr2', pos='500', id_='.', ref='G', alt='GATATA', qual='.', filter_='PASS', info='', format_='', samples={}), Variant(chrom='chr3', pos='7500', id_='.', ref='ACT', alt='A', qual='.', filter_='contamination', info='', format_='', samples={}) ], 'Y': [ Variant(chrom='chr2', pos='500', id_='.', ref='G', alt='GATATA', qual='.', filter_='PASS', info='', format_='', samples={}), Variant(chrom='chr4', pos='1980', id_='.', ref='GGGG', alt='G', qual='.', filter_='PASS', info='', format_='', samples={}), ] }) triager.triage(vcf=vcf) self.assertFalse(vcf.variants[0].is_filtered()) self.assertIs(vcf.variants[0].get_info('VTSO'), False) self.assertIs(vcf.variants[0].get_info('VTOF'), False) self.assertFalse(vcf.variants[1].is_filtered()) self.assertEqual(vcf.variants[1].get_info('VTSO'), 'X,Y') self.assertEqual(vcf.variants[1].get_info('VTOF'), 'weak_evidence') self.assertTrue(vcf.variants[2].is_filtered()) self.assertIs(vcf.variants[2].get_info('VTSO'), False) self.assertIs(vcf.variants[2].get_info('VTOF'), False) self.assertFalse(vcf.variants[3].is_filtered()) self.assertEqual(vcf.variants[3].get_info('VTSO'), 'Y') self.assertEqual(vcf.variants[3].get_info('VTOF'), 'haplotype') self.assertTrue(vcf.variants[4].is_filtered()) self.assertIs(vcf.variants[4].get_info('VTSO'), False) self.assertIs(vcf.variants[4].get_info('VTOF'), False)
def test_triage_filtered_variant_which_passes_in_evidence(self): vcf = VCF( header=[], sample_names=[], variants=[ Variant(chrom='chr1', pos='100', id_='.', ref='A', alt='T', qual='.', filter_='clustered_events', info='', format_='', samples={}) ] ) triager = Triager(evidence={ 'X': [ Variant(chrom='chr1', pos='100', id_='.', ref='A', alt='T', qual='.', filter_='PASS', info='', format_='', samples={}) ] }) triager.triage(vcf=vcf) self.assertFalse(vcf.variants[0].is_filtered()) self.assertEqual(vcf.variants[0].get_info('VTSO'), 'X') self.assertEqual(vcf.variants[0].get_info('VTOF'), 'clustered_events')
def test_creating_multiallelic_variant_raises_exception(self): with self.assertRaises(VariantException): Variant(chrom='chr1', pos='8013449', id_='.', ref='C', alt='G,A', qual='.', filter_='clustered_events;multiallelic', info='DP=238', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,2,0:0.030', 'TUMOR': '0/1/2:59,2,56:0.029,0.440' })
def test_variant_inequality(self): other_variant = Variant(chrom='chr1', pos='1625272', id_='.', ref='GC', alt='G', qual='.', filter_='clustered_events;weak_evidence', info='DP=455;STR', format_='GT:AD:AF', samples={ 'NORMAL': '0|0:224,3:0.015', 'TUMOR': '0|1:225,3:0.015' }) self.assertNotEqual(self.variant, other_variant)
def test_variant_equality(self): same_variant = Variant(chrom='chr1', pos='14752', id_='.', ref='G', alt='A', qual='.', filter_='weak_evidence', info='DP=236', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,4:0.063', 'TUMOR': '0/1:113,4:0.063' }) self.assertEqual(self.variant, same_variant)
def test_variant_equality_different_filter_status(self): same_variant_but_not_filtered = Variant(chrom='chr1', pos='14752', id_='.', ref='G', alt='A', qual='.', filter_='PASS', info='DP=236', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,4:0.063', 'TUMOR': '0/1:113,4:0.063' }) self.assertEqual(self.variant, same_variant_but_not_filtered)
class TestVariant(unittest.TestCase): def setUp(self): self.variant = Variant(chrom='chr1', pos='14752', id_='.', ref='G', alt='A', qual='.', filter_='weak_evidence', info='DP=236;FOO', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,4:0.063', 'TUMOR': '0/1:113,4:0.063' }) def test_variant_equality(self): same_variant = Variant(chrom='chr1', pos='14752', id_='.', ref='G', alt='A', qual='.', filter_='weak_evidence', info='DP=236', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,4:0.063', 'TUMOR': '0/1:113,4:0.063' }) self.assertEqual(self.variant, same_variant) def test_variant_equality_different_filter_status(self): same_variant_but_not_filtered = Variant(chrom='chr1', pos='14752', id_='.', ref='G', alt='A', qual='.', filter_='PASS', info='DP=236', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,4:0.063', 'TUMOR': '0/1:113,4:0.063' }) self.assertEqual(self.variant, same_variant_but_not_filtered) def test_variant_inequality(self): other_variant = Variant(chrom='chr1', pos='1625272', id_='.', ref='GC', alt='G', qual='.', filter_='clustered_events;weak_evidence', info='DP=455;STR', format_='GT:AD:AF', samples={ 'NORMAL': '0|0:224,3:0.015', 'TUMOR': '0|1:225,3:0.015' }) self.assertNotEqual(self.variant, other_variant) def test_variant_comparison_with_non_variant_object(self): not_a_variant = 'chr1 14752 . G A . weak_evidence DP=236 GT:AD:AF 0/0:113,4:0.063 0/1:113,4:0.063' self.assertNotEqual(self.variant, not_a_variant) def test_variant_filter_status(self): non_filtered_variant = Variant(chrom='chr1', pos='11117039', id_='.', ref='C', alt='A', qual='.', filter_='PASS', info='DP=264', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:128,0:9.577e-03', 'TUMOR': '0/1:99,33:0.204' }) self.assertTrue(self.variant.is_filtered()) self.assertFalse(non_filtered_variant.is_filtered()) def test_creating_multiallelic_variant_raises_exception(self): with self.assertRaises(VariantException): Variant(chrom='chr1', pos='8013449', id_='.', ref='C', alt='G,A', qual='.', filter_='clustered_events;multiallelic', info='DP=238', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,2,0:0.030', 'TUMOR': '0/1/2:59,2,56:0.029,0.440' }) def test_variant_to_string(self): self.assertEqual( self.variant.__repr__(), 'chr1 14752 . G A . weak_evidence DP=236;FOO GT:AD:AF 0/0:113,4:0.063 0/1:113,4:0.063' ) def test_variant_get_info(self): self.assertEqual(self.variant.get_info('DP'), '236') def test_variant_get_info_not_set(self): self.assertIs(self.variant.get_info('TLOD'), False) def test_variant_set_info_with_value(self): self.variant.set_info(key='TLOD', value='3.14') self.assertEqual(self.variant.get_info('TLOD'), '3.14') def test_variant_set_info_with_value_already_set(self): self.variant.set_info(key='DP', value='42') self.assertEqual(self.variant.get_info('DP'), '42') def test_variant_set_info_without_value(self): self.variant.set_info(key='STR') self.assertIs(self.variant.get_info('STR'), True) def test_variant_set_info_without_value_already_set(self): self.variant.set_info(key='FOO') self.assertIs(self.variant.get_info('FOO'), True)
def test_parse_vcf(self): self.maxDiff = None vcf_header = [ '##fileformat=VCFv4.2', '##FILTER=<ID=PASS,Description="Site contains at least one allele that passes filters">', '##FILTER=<ID=clustered_events,Description="Clustered events observed in the tumor">', '##FILTER=<ID=weak_evidence,Description="Mutation does not meet likelihood threshold">', '##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">', '##FORMAT=<ID=AF,Number=A,Type=Float,Description="Allele fractions of alternate alleles in the tumor">', '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">', '##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">', '##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">', '##contig=<ID=chr1,length=248956422>' ] vcf_columns = [ '\t'.join([ '#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', 'NORMAL', 'TUMOR' ]) ] vcf_data = [ '\t'.join([ 'chr1', '14752', '.', 'G', 'A', '.', 'weak_evidence', 'DP=236', 'GT:AD:AF', '0/0:113,4:0.063', '0/1:113,4:0.063' ]), '\t'.join([ 'chr1', '1625272', '.', 'GC', 'G', '.', 'clustered_events;weak_evidence', 'DP=455;STR', 'GT:AD:AF', '0|0:224,3:0.015', '0|1:225,3:0.015' ]), '\t'.join([ 'chr1', '11117039', '.', 'C', 'A', '.', 'PASS', 'DP=264', 'GT:AD:AF', '0/0:128,0:9.577e-03', '0/1:99,33:0.204' ]) ] vcf_file_lines = [ line + '\n' for line in vcf_header + vcf_columns + vcf_data ] vcf = parse_vcf(stream=vcf_file_lines) self.assertEqual(vcf.header, vcf_header) self.assertEqual(vcf.sample_names, ['NORMAL', 'TUMOR']) self.assertEqual(vcf.variants, [ Variant(chrom='chr1', pos='14752', id_='.', ref='G', alt='A', qual='.', filter_='weak_evidence', info='DP=236', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:113,4:0.063', 'TUMOR': '0/1:113,4:0.063' }), Variant(chrom='chr1', pos='1625272', id_='.', ref='GC', alt='G', qual='.', filter_='clustered_events;weak_evidence', info='DP=455;STR', format_='GT:AD:AF', samples={ 'NORMAL': '0|0:224,3:0.015', 'TUMOR': '0|1:225,3:0.015' }), Variant(chrom='chr1', pos='11117039', id_='.', ref='C', alt='A', qual='.', filter_='PASS', info='DP=264', format_='GT:AD:AF', samples={ 'NORMAL': '0/0:128,0:9.577e-03', 'TUMOR': '0/1:99,33:0.204' }) ])