Ejemplo n.º 1
0
 def test_get_sample_names(self):
     with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
         vcffile.jump2variants()
         sample_names = vcffile.get_sample_names()
         self.assertEqual(len(sample_names), 3)
         self.assertTrue('NA00001' in sample_names)
         self.assertTrue('NA00002' in sample_names)
         self.assertTrue('NA00003' in sample_names)
Ejemplo n.º 2
0
 def test_is_variant_line(self):
     with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
         self.assertFalse(vcffile.is_variant_line(vcffile.readline()))
         self.assertFalse(vcffile.is_variant_line(vcffile.readline()))
         vcffile.jump2variants()
         self.assertTrue(vcffile.is_variant_line(vcffile.readline()))
         self.assertTrue(
             vcffile.is_variant_line(
                 '20     14370   rs6054257 G      A       29   PASS   NS=3;DP=14;AF=0.5;DB;H2           GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.\n'
             ))
Ejemplo n.º 3
0
 def test_jump2variants(self):
     with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
         vcffile.jump2variants()
         # Test that column names are set
         self.assertEqual(vcffile.column_names,
                          self.example_vcf_column_names)
         # Test that the next line is the first variant
         self.assertEqual(
             '20     14370   rs6054257 G      A       29   PASS   NS=3;DP=14;AF=0.5;DB;H2           GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.\n',
             vcffile.readline())
Ejemplo n.º 4
0
    def test_set_column_names(self):
        with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
            # Test that error is raised when trying to input non-header line
            self.assertRaises(ValueError, vcffile.set_column_names,
                              (vcffile.readline()))

            # Check that the header line is correctly parsed
            vcffile.set_column_names(self.example_vcf_column_names_str)
            self.assertEqual(vcffile.column_names[0], 'CHROM')
            self.assertEqual(vcffile.column_names[1], 'POS')
            self.assertEqual(vcffile.column_names[7], 'INFO')
Ejemplo n.º 5
0
 def test_parse_line(self):
     with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
         vcffile.jump2variants()
         i = 0
         for line in vcffile:
             variant = vcffile.parse_line(line)
             if i == 0:
                 self.assertEqual(variant['CHROM'], '20')
                 self.assertEqual(variant['ID'], 'rs6054257')
                 self.assertEqual(variant['NA00003'], '1/1:43:5:.,.')
             elif i == 1:
                 self.assertEqual(variant['INFO'], 'NS=3;DP=11;AF=0.017')
                 self.assertEqual(variant['FORMAT'], 'GT:GQ:DP:HQ')
                 self.assertEqual(variant['NA00003'], '0/0:41:3')
                 break  # exit loop after 2 iterations
             i += 1
Ejemplo n.º 6
0
    def test_read_variant(self):
        with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
            # Check that error is raised when reading non-variant line
            self.assertRaises(ValueError, vcffile.read_variant)

            # Check that the variant row is read in correctly
            vcffile.jump2variants()
            variant = vcffile.read_variant()
            self.assertEqual(variant['CHROM'], '20')
            self.assertEqual(variant['ID'], 'rs6054257')
            self.assertEqual(variant['NA00003'], '1/1:43:5:.,.')

            variant = vcffile.read_variant()
            self.assertEqual(variant['INFO'], 'NS=3;DP=11;AF=0.017')
            self.assertEqual(variant['FORMAT'], 'GT:GQ:DP:HQ')
            self.assertEqual(variant['NA00003'], '0/0:41:3')
Ejemplo n.º 7
0
    def test_parse_info(self):
        with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
            # Check that the variant row is read in correctly
            vcffile.jump2variants()
            variant = vcffile.read_variant()
            info_map, info_single = vcffile.parse_info(variant)
            self.assertEqual(info_map['AF'], '0.5')
            self.assertEqual(info_map['DP'], '14')
            self.assertEqual(len(info_single), 2)
            self.assertTrue('DB' in info_single)
            self.assertTrue('H2' in info_single)

            variant = vcffile.read_variant()
            info_map, info_single = vcffile.parse_info(variant)
            self.assertEqual(info_map['NS'], '3')
            self.assertEqual(info_map['AF'], '0.017')
Ejemplo n.º 8
0
    def test_parse_samples(self):
        with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
            vcffile.jump2variants()
            variant = vcffile.read_variant()
            sample2field2val = vcffile.parse_samples(variant)
            self.assertEqual(len(sample2field2val), 3)
            self.assertTrue('NA00001' in sample2field2val)
            self.assertTrue('NA00002' in sample2field2val)
            self.assertTrue('NA00003' in sample2field2val)
            self.assertEqual(sample2field2val['NA00001']['GT'], '0|0')
            self.assertEqual(sample2field2val['NA00002']['GQ'], '48')
            self.assertEqual(sample2field2val['NA00003']['HQ'], '.,.')

            variant = vcffile.read_variant()
            sample2field2val = vcffile.parse_samples(variant)
            self.assertEqual(sample2field2val['NA00001']['HQ'], '58,50')
            self.assertEqual(sample2field2val['NA00002']['GT'], '0|1')
            self.assertEqual(sample2field2val['NA00003']['GQ'], '41')
Ejemplo n.º 9
0
    def test_get_sample_gt(self):
        with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
            vcffile.jump2variants()
            variant = vcffile.read_variant()
            gt = vcffile.get_sample_gt(variant, 'NA00001', phased=True)
            self.assertEqual(gt, 'G|G')

            variant = vcffile.read_variant()
            gt = vcffile.get_sample_gt(variant, 'NA00002', phased=True)
            self.assertEqual(gt, 'T|A')

            variant = vcffile.read_variant()
            gt = vcffile.get_sample_gt(variant, 'NA00003', phased=False)
            self.assertEqual(gt, 'T/T')

            variant = vcffile.read_variant()
            variant = vcffile.read_variant()
            gt = vcffile.get_sample_gt(variant, 'NA00001', phased=False)
            self.assertEqual(gt, 'G/GTC')
Ejemplo n.º 10
0
def parse_vcf(vcf_in, fout):
    '''
    Read through the vcf file, and parse it.
    Output results in tsv format
    '''
    with vcf.VcfFile(vcf_in, 'r') as vcffile:

        # Skip to the variants section of the vcf file
        vcffile.jump2variants()
        sample_names = vcffile.get_sample_names()
        sample_names_gt = [sn + '_gt' for sn in sample_names]
        sample_names_dp = [sn + '_dp' for sn in sample_names]

        # Output header line
        fout.write('%s\n' % '\t'.join(
            ['Chrom', 'Position', 'Ref', 'Alt', 'Type', 'AF', 'NoCall'] +
            sample_names_gt + sample_names_dp))

        # Read in the variant lines
        for line in vcffile:

            # Get parsed variant data
            variant = vcffile.parse_line(line)
            # Record columns
            chrom = variant['CHROM']
            pos = variant['POS']
            ref = variant['REF']
            alt = variant['ALT']

            # Parse the info column
            info_map, info_single = vcffile.parse_info(variant)
            af = info_map['AF']

            # Variant type
            variant_type = 'snp'
            len_ref = len(ref)
            len_alt = len(alt)
            if len_ref > 1 or len_alt > 1:
                if len_ref > len_alt:
                    variant_type = 'del'
                elif len_ref < len_alt:
                    variant_type = 'ins'
                else:  # len_ref == len_alt
                    if len_ref == 2:
                        variant_type = 'dnp'
                    elif len_ref == 3:
                        variant_type = 'tnp'
                    else:
                        variant_type = 'onp'

            # Sample genotypes
            samples2field2val = vcffile.parse_samples(variant)
            samples_gts = []
            samples_dps = []
            num_nocall = 0
            for sample in sample_names:
                sample_gt = vcffile.get_sample_gt(variant, sample)
                if sample_gt == 'N/N':
                    num_nocall += 1
                if 'DP' not in samples2field2val[sample]:
                    sample_dp = 'NA'
                else:
                    sample_dp = samples2field2val[sample]['DP']
                samples_gts.append(sample_gt)
                samples_dps.append(sample_dp)

            # Output to standard output
            fout.write('%s\n' % '\t'.join(
                [chrom, pos, ref, alt, variant_type, af,
                 str(num_nocall)] + samples_gts + samples_dps))
Ejemplo n.º 11
0
 def test_is_header(self):
     with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
         self.assertTrue(
             vcffile.is_header(self.example_vcf_column_names_str))
         self.assertFalse(
             vcffile.is_header('#' + self.example_vcf_column_names_str))
Ejemplo n.º 12
0
 def test_is_meta(self):
     with vcf.VcfFile(self.example_vcf, 'r') as vcffile:
         self.assertTrue(vcffile.is_meta('##fileDate=20120818'))
         self.assertFalse(vcffile.is_meta('# blahblah'))
         self.assertFalse(vcffile.is_meta('foo'))