Example #1
0
 def setUp(self):
     self.variant = Variant(chrom='chr1',
                            pos='14752',
                            id_='.',
                            ref='G',
                            alt='A',
                            qual='.',
                            filter_='weak_evidence',
                            info='DP=236;FOO',
                            format_='GT:AD:AF',
                            samples={
                                'NORMAL': '0/0:113,4:0.063',
                                'TUMOR': '0/1:113,4:0.063'
                            })
Example #2
0
def parse_vcf(stream: Iterable[str]) -> VCF:

    header: List[str] = []
    sample_names: List[str] = []
    variants: List[Variant] = []

    for row in stream:
        if row.startswith('##'):
            header.append(row.rstrip('\n'))
        elif row.startswith('#'):
            sample_names = row.rstrip('\n').split('\t')[9:]
        else:
            chrom, pos, id_, ref, alt, qual, filter_, info, format_, *samples = row.rstrip(
                '\n').split('\t')
            variants.append(
                Variant(chrom=chrom,
                        pos=pos,
                        id_=id_,
                        ref=ref,
                        alt=alt,
                        qual=qual,
                        filter_=filter_,
                        info=info,
                        format_=format_,
                        samples=dict(zip(sample_names, samples))))

    return VCF(header=header, sample_names=sample_names, variants=variants)
Example #3
0
 def test_variant_filter_status(self):
     non_filtered_variant = Variant(chrom='chr1',
                                    pos='11117039',
                                    id_='.',
                                    ref='C',
                                    alt='A',
                                    qual='.',
                                    filter_='PASS',
                                    info='DP=264',
                                    format_='GT:AD:AF',
                                    samples={
                                        'NORMAL': '0/0:128,0:9.577e-03',
                                        'TUMOR': '0/1:99,33:0.204'
                                    })
     self.assertTrue(self.variant.is_filtered())
     self.assertFalse(non_filtered_variant.is_filtered())
Example #4
0
    def test_triager_multiple_variants_multiple_evidence(self):

        vcf = VCF(
            header=[],
            sample_names=[],
            variants=[
                Variant(chrom='chr1', pos='100', id_='.', ref='A', alt='T', qual='.', filter_='PASS', info='', format_='', samples={}),
                Variant(chrom='chr2', pos='500', id_='.', ref='G', alt='GATATA', qual='.', filter_='weak_evidence', info='', format_='', samples={}),
                Variant(chrom='chr3', pos='7500', id_='.', ref='ACT', alt='A', qual='.', filter_='germline', info='', format_='', samples={}),
                Variant(chrom='chr4', pos='1980', id_='.', ref='GGGG', alt='G', qual='.', filter_='haplotype', info='', format_='', samples={}),
                Variant(chrom='chr5', pos='5700', id_='.', ref='CTCTCT', alt='C', qual='.', filter_='slippage', info='', format_='', samples={}),
            ]
        )

        triager = Triager(evidence={
            'X': [
                Variant(chrom='chr2', pos='500', id_='.', ref='G', alt='GATATA', qual='.', filter_='PASS', info='', format_='', samples={}),
                Variant(chrom='chr3', pos='7500', id_='.', ref='ACT', alt='A', qual='.', filter_='contamination', info='', format_='', samples={})
            ],
            'Y': [
                Variant(chrom='chr2', pos='500', id_='.', ref='G', alt='GATATA', qual='.', filter_='PASS', info='', format_='', samples={}),
                Variant(chrom='chr4', pos='1980', id_='.', ref='GGGG', alt='G', qual='.', filter_='PASS', info='', format_='', samples={}),
            ]
        })

        triager.triage(vcf=vcf)

        self.assertFalse(vcf.variants[0].is_filtered())
        self.assertIs(vcf.variants[0].get_info('VTSO'), False)
        self.assertIs(vcf.variants[0].get_info('VTOF'), False)

        self.assertFalse(vcf.variants[1].is_filtered())
        self.assertEqual(vcf.variants[1].get_info('VTSO'), 'X,Y')
        self.assertEqual(vcf.variants[1].get_info('VTOF'), 'weak_evidence')

        self.assertTrue(vcf.variants[2].is_filtered())
        self.assertIs(vcf.variants[2].get_info('VTSO'), False)
        self.assertIs(vcf.variants[2].get_info('VTOF'), False)

        self.assertFalse(vcf.variants[3].is_filtered())
        self.assertEqual(vcf.variants[3].get_info('VTSO'), 'Y')
        self.assertEqual(vcf.variants[3].get_info('VTOF'), 'haplotype')

        self.assertTrue(vcf.variants[4].is_filtered())
        self.assertIs(vcf.variants[4].get_info('VTSO'), False)
        self.assertIs(vcf.variants[4].get_info('VTOF'), False)
Example #5
0
    def test_triage_filtered_variant_which_passes_in_evidence(self):

        vcf = VCF(
            header=[],
            sample_names=[],
            variants=[
                Variant(chrom='chr1', pos='100', id_='.', ref='A', alt='T', qual='.', filter_='clustered_events', info='', format_='', samples={})
            ]
        )

        triager = Triager(evidence={
            'X': [
                Variant(chrom='chr1', pos='100', id_='.', ref='A', alt='T', qual='.', filter_='PASS', info='', format_='', samples={})
            ]
        })

        triager.triage(vcf=vcf)

        self.assertFalse(vcf.variants[0].is_filtered())
        self.assertEqual(vcf.variants[0].get_info('VTSO'), 'X')
        self.assertEqual(vcf.variants[0].get_info('VTOF'), 'clustered_events')
Example #6
0
 def test_creating_multiallelic_variant_raises_exception(self):
     with self.assertRaises(VariantException):
         Variant(chrom='chr1',
                 pos='8013449',
                 id_='.',
                 ref='C',
                 alt='G,A',
                 qual='.',
                 filter_='clustered_events;multiallelic',
                 info='DP=238',
                 format_='GT:AD:AF',
                 samples={
                     'NORMAL': '0/0:113,2,0:0.030',
                     'TUMOR': '0/1/2:59,2,56:0.029,0.440'
                 })
Example #7
0
 def test_variant_inequality(self):
     other_variant = Variant(chrom='chr1',
                             pos='1625272',
                             id_='.',
                             ref='GC',
                             alt='G',
                             qual='.',
                             filter_='clustered_events;weak_evidence',
                             info='DP=455;STR',
                             format_='GT:AD:AF',
                             samples={
                                 'NORMAL': '0|0:224,3:0.015',
                                 'TUMOR': '0|1:225,3:0.015'
                             })
     self.assertNotEqual(self.variant, other_variant)
Example #8
0
 def test_variant_equality(self):
     same_variant = Variant(chrom='chr1',
                            pos='14752',
                            id_='.',
                            ref='G',
                            alt='A',
                            qual='.',
                            filter_='weak_evidence',
                            info='DP=236',
                            format_='GT:AD:AF',
                            samples={
                                'NORMAL': '0/0:113,4:0.063',
                                'TUMOR': '0/1:113,4:0.063'
                            })
     self.assertEqual(self.variant, same_variant)
Example #9
0
 def test_variant_equality_different_filter_status(self):
     same_variant_but_not_filtered = Variant(chrom='chr1',
                                             pos='14752',
                                             id_='.',
                                             ref='G',
                                             alt='A',
                                             qual='.',
                                             filter_='PASS',
                                             info='DP=236',
                                             format_='GT:AD:AF',
                                             samples={
                                                 'NORMAL':
                                                 '0/0:113,4:0.063',
                                                 'TUMOR': '0/1:113,4:0.063'
                                             })
     self.assertEqual(self.variant, same_variant_but_not_filtered)
Example #10
0
class TestVariant(unittest.TestCase):
    def setUp(self):
        self.variant = Variant(chrom='chr1',
                               pos='14752',
                               id_='.',
                               ref='G',
                               alt='A',
                               qual='.',
                               filter_='weak_evidence',
                               info='DP=236;FOO',
                               format_='GT:AD:AF',
                               samples={
                                   'NORMAL': '0/0:113,4:0.063',
                                   'TUMOR': '0/1:113,4:0.063'
                               })

    def test_variant_equality(self):
        same_variant = Variant(chrom='chr1',
                               pos='14752',
                               id_='.',
                               ref='G',
                               alt='A',
                               qual='.',
                               filter_='weak_evidence',
                               info='DP=236',
                               format_='GT:AD:AF',
                               samples={
                                   'NORMAL': '0/0:113,4:0.063',
                                   'TUMOR': '0/1:113,4:0.063'
                               })
        self.assertEqual(self.variant, same_variant)

    def test_variant_equality_different_filter_status(self):
        same_variant_but_not_filtered = Variant(chrom='chr1',
                                                pos='14752',
                                                id_='.',
                                                ref='G',
                                                alt='A',
                                                qual='.',
                                                filter_='PASS',
                                                info='DP=236',
                                                format_='GT:AD:AF',
                                                samples={
                                                    'NORMAL':
                                                    '0/0:113,4:0.063',
                                                    'TUMOR': '0/1:113,4:0.063'
                                                })
        self.assertEqual(self.variant, same_variant_but_not_filtered)

    def test_variant_inequality(self):
        other_variant = Variant(chrom='chr1',
                                pos='1625272',
                                id_='.',
                                ref='GC',
                                alt='G',
                                qual='.',
                                filter_='clustered_events;weak_evidence',
                                info='DP=455;STR',
                                format_='GT:AD:AF',
                                samples={
                                    'NORMAL': '0|0:224,3:0.015',
                                    'TUMOR': '0|1:225,3:0.015'
                                })
        self.assertNotEqual(self.variant, other_variant)

    def test_variant_comparison_with_non_variant_object(self):
        not_a_variant = 'chr1	14752	.	G	A	.	weak_evidence	DP=236	GT:AD:AF	0/0:113,4:0.063	0/1:113,4:0.063'
        self.assertNotEqual(self.variant, not_a_variant)

    def test_variant_filter_status(self):
        non_filtered_variant = Variant(chrom='chr1',
                                       pos='11117039',
                                       id_='.',
                                       ref='C',
                                       alt='A',
                                       qual='.',
                                       filter_='PASS',
                                       info='DP=264',
                                       format_='GT:AD:AF',
                                       samples={
                                           'NORMAL': '0/0:128,0:9.577e-03',
                                           'TUMOR': '0/1:99,33:0.204'
                                       })
        self.assertTrue(self.variant.is_filtered())
        self.assertFalse(non_filtered_variant.is_filtered())

    def test_creating_multiallelic_variant_raises_exception(self):
        with self.assertRaises(VariantException):
            Variant(chrom='chr1',
                    pos='8013449',
                    id_='.',
                    ref='C',
                    alt='G,A',
                    qual='.',
                    filter_='clustered_events;multiallelic',
                    info='DP=238',
                    format_='GT:AD:AF',
                    samples={
                        'NORMAL': '0/0:113,2,0:0.030',
                        'TUMOR': '0/1/2:59,2,56:0.029,0.440'
                    })

    def test_variant_to_string(self):
        self.assertEqual(
            self.variant.__repr__(),
            'chr1	14752	.	G	A	.	weak_evidence	DP=236;FOO	GT:AD:AF	0/0:113,4:0.063	0/1:113,4:0.063'
        )

    def test_variant_get_info(self):
        self.assertEqual(self.variant.get_info('DP'), '236')

    def test_variant_get_info_not_set(self):
        self.assertIs(self.variant.get_info('TLOD'), False)

    def test_variant_set_info_with_value(self):
        self.variant.set_info(key='TLOD', value='3.14')
        self.assertEqual(self.variant.get_info('TLOD'), '3.14')

    def test_variant_set_info_with_value_already_set(self):
        self.variant.set_info(key='DP', value='42')
        self.assertEqual(self.variant.get_info('DP'), '42')

    def test_variant_set_info_without_value(self):
        self.variant.set_info(key='STR')
        self.assertIs(self.variant.get_info('STR'), True)

    def test_variant_set_info_without_value_already_set(self):
        self.variant.set_info(key='FOO')
        self.assertIs(self.variant.get_info('FOO'), True)
Example #11
0
    def test_parse_vcf(self):

        self.maxDiff = None

        vcf_header = [
            '##fileformat=VCFv4.2',
            '##FILTER=<ID=PASS,Description="Site contains at least one allele that passes filters">',
            '##FILTER=<ID=clustered_events,Description="Clustered events observed in the tumor">',
            '##FILTER=<ID=weak_evidence,Description="Mutation does not meet likelihood threshold">',
            '##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">',
            '##FORMAT=<ID=AF,Number=A,Type=Float,Description="Allele fractions of alternate alleles in the tumor">',
            '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">',
            '##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">',
            '##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">',
            '##contig=<ID=chr1,length=248956422>'
        ]

        vcf_columns = [
            '\t'.join([
                '#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO',
                'FORMAT', 'NORMAL', 'TUMOR'
            ])
        ]

        vcf_data = [
            '\t'.join([
                'chr1', '14752', '.', 'G', 'A', '.', 'weak_evidence', 'DP=236',
                'GT:AD:AF', '0/0:113,4:0.063', '0/1:113,4:0.063'
            ]), '\t'.join([
                'chr1', '1625272', '.', 'GC', 'G', '.',
                'clustered_events;weak_evidence', 'DP=455;STR', 'GT:AD:AF',
                '0|0:224,3:0.015', '0|1:225,3:0.015'
            ]), '\t'.join([
                'chr1', '11117039', '.', 'C', 'A', '.', 'PASS', 'DP=264',
                'GT:AD:AF', '0/0:128,0:9.577e-03', '0/1:99,33:0.204'
            ])
        ]

        vcf_file_lines = [
            line + '\n' for line in vcf_header + vcf_columns + vcf_data
        ]

        vcf = parse_vcf(stream=vcf_file_lines)

        self.assertEqual(vcf.header, vcf_header)
        self.assertEqual(vcf.sample_names, ['NORMAL', 'TUMOR'])

        self.assertEqual(vcf.variants, [
            Variant(chrom='chr1',
                    pos='14752',
                    id_='.',
                    ref='G',
                    alt='A',
                    qual='.',
                    filter_='weak_evidence',
                    info='DP=236',
                    format_='GT:AD:AF',
                    samples={
                        'NORMAL': '0/0:113,4:0.063',
                        'TUMOR': '0/1:113,4:0.063'
                    }),
            Variant(chrom='chr1',
                    pos='1625272',
                    id_='.',
                    ref='GC',
                    alt='G',
                    qual='.',
                    filter_='clustered_events;weak_evidence',
                    info='DP=455;STR',
                    format_='GT:AD:AF',
                    samples={
                        'NORMAL': '0|0:224,3:0.015',
                        'TUMOR': '0|1:225,3:0.015'
                    }),
            Variant(chrom='chr1',
                    pos='11117039',
                    id_='.',
                    ref='C',
                    alt='A',
                    qual='.',
                    filter_='PASS',
                    info='DP=264',
                    format_='GT:AD:AF',
                    samples={
                        'NORMAL': '0/0:128,0:9.577e-03',
                        'TUMOR': '0/1:99,33:0.204'
                    })
        ])