Esempio n. 1
0
    def test_minimal_VCF_definition_io(self):
        buf = io.StringIO()
        with open(pkg_file('genomvar.test', 'data/example1.vcf'), 'rt') as fh:
            for line in fh:
                if line.startswith('##fileformat') \
                          or line.startswith('#CHROM') \
                          or not line.startswith('#'):
                    buf.write(line)

        buf.seek(0)
        reader = VCFReader(buf)

        outbuf = io.StringIO()
        writer = VCFWriter(format_spec=[RESERVED_FORMAT.GT],
                           samples=reader.samples)
        variants1 = []
        for vrt in reader.iter_vrt(parse_samples=True):
            self.assertTrue(
                isinstance(vrt.attrib['samples']['SAMP1']['GT'], str))
            if vrt.attrib['samples']['SAMP1'].get('GT') == '0/1':
                vrt.attrib['samples']['SAMP1']['GT'] = (0, 1)
            else:
                vrt.attrib['samples']['SAMP1']['GT'] = None
            outbuf.write(str(writer.get_row(vrt)))
            variants1.append(vrt)
        variants1.sort(key=lambda v: v.start)

        outbuf.seek(0)
        variants2 = list(VCFReader(outbuf).iter_vrt())
        variants2.sort(key=lambda v: v.start)

        for v1, v2 in zip(variants1, variants2):
            self.assertTrue(v1.edit_equal(v2))
Esempio n. 2
0
    def test_example3(self):
        reader = VCFReader(pkg_file('genomvar.test', 'data/example3.vcf'))
        self.assertEqual(list(reader.get_chroms(allow_no_index=True)),
                         ['chr1', 'chr2', 'chr10'])
        vrt = list(reader.iter_vrt(parse_info=True, parse_samples=True))
        self.assertGreater(len(vrt), 0)

        v = vrt[3]
        self.assertEqual(v.attrib['id'], None)
Esempio n. 3
0
    def test_iter_vrt_gzipped(self):
        reader = VCFReader(pkg_file('genomvar.test', 'data/example2.vcf.gz'),
                           index=True)
        self.assertEqual(list(reader.chroms), ['chr23', 'chr24'])
        for vrt in reader.iter_vrt():
            self.assertIn(vrt.chrom, ['chr24', 'chr23'])

        self.assertEqual(len(list(reader.find_vrt(chrom='chr24'))), 4)
        self.assertEqual(len(list(reader.find_vrt('chr23', 7464, 7465))), 3)
Esempio n. 4
0
    def test_sv_types(self):
        reader = VCFReader(pkg_file('genomvar.test', 'data/example4.vcf.gz'))

        with warnings.catch_warnings(record=True) as wrn:
            # warnings.simplefilter(append=True)
            for cnt, vrt in enumerate(reader.iter_vrt()):
                pass
            self.assertEqual(cnt, 99)
            self.assertGreater(len(wrn), 1)
            self.assertIn('Structural', str(wrn[-1].message))
Esempio n. 5
0
    def test_check_getting_vrt_is_sorted(self):
        reader = VCFReader(pkg_file('genomvar.test',
                                    'data/example_gnomad_2.vcf.gz'),
                           index=True)
        starts = [v.start for v in reader.iter_vrt()]
        self.assertEqual(starts, sorted(starts))

        starts2 = [
            v.start for v in reader.find_vrt('chr15', 74719587, 74824401)
        ]
        self.assertEqual(starts2, sorted(starts2))
Esempio n. 6
0
    def test_from_variants_with_attributes(self):
        reader = VCFReader(pkg_file('genomvar.test', 'data/example1.vcf'))
        vset = VariantSet.from_variants(list(reader.iter_vrt(parse_info=True)))
        vrt = list(vset.find_vrt('chr24', 1200, 1210))
        self.assertEqual(len(vrt), 2)

        v1 = vrt[0]
        self.assertEqual(v1.attrib['info']['NSV'], 1)
        self.assertEqual(v1.attrib['id'], '5')

        v2 = vrt[1]
        self.assertEqual(v2.attrib['id'], None)

        recs = vset.to_records()
        self.assertEqual(recs[0]['attrib']['info']['NSV'], 2)
Esempio n. 7
0
    def test_from_vcf_missing_values(self):
        buf = io.StringIO()
        format_fields = (RESERVED_FORMAT.AD, RESERVED_FORMAT.DP,
                         RESERVED_FORMAT.GT)
        header = vcf_header.render(
            samples=['S1','S2'],
            ctg_len={},
            format=[{k:getattr(spec, k.upper()) for k in \
                     ('name', 'number', 'type', 'description')} \
                    for spec in format_fields])

        buf.write(header)
        buf.write('chr15\t17017413\t.\tA\tG\t38\t.\t.\tGT\t./.\t0/1\n')
        buf.write('chr15\t17017413\t.\tA\tG\t38\t.\t.\tGT:AD\t./.\t0/1:10,.\n')
        buf.write('chr15\t17017413\t.\tA\tG\t38\t.\t.\tGT:DP\t./.\t1/1:.\n')
        buf.seek(0)
        buf.seek(0)
        vs = VCFReader(buf)

        v = list(vs.iter_vrt(parse_samples=True))[0]
        self.assertEqual(v.attrib['samples']['S1']['GT'], (None, None))
Esempio n. 8
0
    def test_to_vcf_row_from_file(self):
        def _split_multiallelic(rows):
            for row in rows:
                for alt in row.ALT.split(','):
                    kwds = {f:getattr(row,f) for f in VCF_FIELDS}
                    kwds['ALT'] = alt
                    kwds['INFO'] = '.'
                    kwds['FORMAT'] = None
                    kwds['SAMPLES'] = None
                    yield str(VCFRow(**kwds))

        reader = VCFReader(pkg_file('genomvar.test','data/example1.vcf'))
        variants = list(reader.iter_vrt(
            parse_info=False,parse_samples=False))
        rows = [str(self.writer.get_row(v)) for v in variants]
        
        for r1, r2 in zip(
                _split_multiallelic(reader.iter_rows()), rows):
            if 'AG\tAGG' in r1: # stripping 
                continue
            self.assertEqual(r1,r2)
        reader.close()
Esempio n. 9
0
    def test_iter_vrt_example1(self):
        reader = VCFReader(pkg_file('genomvar.test', 'data/example1.vcf'))
        self.assertEqual(reader.samples, ['SAMP1'])
        vrts = list(reader.iter_vrt(parse_info=True, parse_samples=True))
        vrt1, vrt2 = vrts[:2]

        # Test Ref and Alt
        self.assertEqual([vrt1.start, vrt1.ref, vrt1.alt], [23, 'G', ''])
        self.assertEqual([vrt2.start, vrt2.ref, vrt2.alt], [24, '', 'G'])

        # Check row numbers
        self.assertEqual(vrt1.attrib['vcf_notation']['row'], 0)
        self.assertEqual(vrt2.attrib['vcf_notation']['row'], 0)
        self.assertEqual(vrt1.attrib['allele_num'], 0)
        self.assertEqual(vrt2.attrib['allele_num'], 1)
        # Test INFO
        self.assertEqual(vrt1.attrib['info']['AF'], 0.5)
        self.assertEqual(vrt2.attrib['info']['AF'], 0.5)

        # Test SAMPLES fields
        self.assertEqual(vrt1.attrib['samples']['SAMP1']['GT'], (0, 1, 0))
        self.assertEqual(vrt2.attrib['samples']['SAMP1']['GT'], (0, 0, 1))
Esempio n. 10
0
 def test_change_of_attributes(self):
     reader = VCFReader(
         pkg_file('genomvar.test','data/example1.vcf'))
     vrt = list(reader.iter_vrt())[0]
     self.assertEqual(str(self.writer.get_row(vrt)),
                      'chr24\t23\t1\tAG\tA\t100\tPASS\t.')
     vrt2 = copy.deepcopy(vrt)
     vrt2.attrib['id'] = '.'
     vrt2.attrib['qual'] = '.'
     vrt2.attrib['filter'] = '.'
     self.assertEqual(str(self.writer.get_row(vrt2)),
                      'chr24\t23\t.\tAG\tA\t.\t.\t.')
     self.assertEqual(str(self.writer.get_row(vrt2, id='.', qual='.', filter='.')),
                      'chr24\t23\t.\tAG\tA\t.\t.\t.')
     
     vrt3 = copy.deepcopy(vrt)
     vrt3.attrib['id'] = None
     vrt3.attrib['qual'] = None
     vrt3.attrib['filter'] = None
     self.assertEqual(str(self.writer.get_row(vrt3)),
                      'chr24\t23\t.\tAG\tA\t.\t.\t.')
     reader.close()
     reader.close()