Python VCF Examples, pypgen.parser.VCF Python Examples

Example #1

0

Show file

    def test_make_slices_default_settings(self):
        """Test slicing function with default settings: 500 bp slices"""

        for count, i in enumerate(VCF.get_slice_indicies(self.bgzip_path, regions=None, window_size=500)):
            if count > 10:
                break

        self.assertEqual(i, ('Chr01', 5501, 6000))

Example #2

0

Show file

    def test_heterozyogote_outgroup_calling(self):
        vcf_line = self.vcf_line

        vcf_line['out1']['GT'] = '1/1'
        vcf_line['out2']['GT'] = '1/0'

        het = VCF.process_outgroup(vcf_line, self.populations)
        self.assertEqual(het, None)

Example #3

0

Show file

    def test_diff_homozygotes_outgroups_calling(self):
        vcf_line = self.vcf_line

        vcf_line['out1']['GT'] = '1/1'
        vcf_line['out2']['GT'] = '0/0'

        diff = VCF.process_outgroup(vcf_line, self.populations)
        self.assertEqual(diff, None)

Example #4

0

Show file

    def test_make_slices_default_with_params_set(self):
        """Test slicing function with window_size set"""

        for count, i in enumerate(VCF.get_slice_indicies(self.bgzip_path, regions=None, window_size=1008)):
            if count > 10:
                break

        self.assertEqual(i, ('Chr01', 11089, 12096))

Example #5

0

Show file

    def test_home_alt_outgroup_calling(self):
        vcf_line = self.vcf_line

        vcf_line['out1']['GT'] = '1/1'
        vcf_line['out2']['GT'] = '1/1'

        homo_alt = VCF.process_outgroup(vcf_line, self.populations)
        self.assertEqual(homo_alt, '1')

Example #6

0

Show file

    def test_header_vs_population_sample_ids(self):
        """Check that the sample IDs parsed from the population arguement
            match those in the VCF file.

            NOTE: In practice the populations arguement can contain fewer
            samples and populations than actually contained in the VCF file.
        """

        header = VCF.make_empty_vcf_ordered_dict(self.bgzip_path, )
        header_sample_ids = [item for count, item in enumerate(header) if count >= 9]

        populations_dict = VCF.parse_populations_list(self.populations_list)
        populations_sample_ids = [i for l in populations_dict.values() for i in l]

        # Check both unique IDs and equal length
        self.assertEqual(set(header_sample_ids), set(populations_sample_ids))
        self.assertEqual(len(header_sample_ids), len(populations_sample_ids))

Example #7

0

Show file

    def test_population_string_parsing(self):
        populations = VCF.parse_populations_list(self.populations_list)

        self.assertEqual(populations, {'melpo': ['m523', 'm524', 'm525',
            'm589', 'm675', 'm676', 'm682', 'm683', 'm687', 'm689'],
            'pachi': ['p516', 'p517', 'p518', 'p519', 'p520', 'p591',
            'p596', 'p690', 'p694', 'p696'], 'cydno': ['c511',
            'c512', 'c513', 'c514', 'c515', 'c563', 'c614', 'c630',
            'c639', 'c640'], 'outgroups': ['h665', 'i02-210']})

Example #8

0

Show file

 def test_calc_fstats_normal_allele_counts(self):
     f_statistics = VCF.calc_fstats(self.normal_allele_counts)
     self.assertEqual(
         {'G_prime_st_est': 0.9140159767610748,
         'D_est': 0.7581699346405228,
         'G_double_prime_st_est': 0.9477124183006534,
         'Gst_est': 0.6444444444444445,
         'Hs_est': 0.1729729729729729,
         'Ht_est': 0.48648648648648646},
         f_statistics[('pachi', 'outgroups')])

Example #9

0

Show file

 def test_trivial_calc_multilocus_f_statistics(self):
     ml_stats = VCF.calc_multilocus_f_statistics(self.trivial_Hs_est_dict, self.trivial_Ht_est_dict)
     self.assertEqual({('pop1', 'pop2'):
         {'G_prime_st_est': 0.0,
         'Gst_est.stdev': 0.0,
         'G_double_prime_st_est.stdev': 0.0,
         'G_double_prime_st_est': 0.0,
         'Gst_est': 0.0,
         'D_est.stdev': 0.0,
         'G_prime_st_est.stdev': 0.0,
         'D_est': 0.0}}, ml_stats)

Example #10

0

Show file

 def test_trivial_2_calc_multilocus_f_statistics(self):
     ml_stats = VCF.calc_multilocus_f_statistics(self.trivial_2_Hs_est_dict, self.trivial_2_Ht_est_dict)
     self.assertEqual({('pop1', 'pop2'):
     {'G_prime_st_est': 0.8333333333333334,
     'Gst_est.stdev': 0.09999999999999998,
     'G_double_prime_st_est.stdev': 0.060586734693877375,
     'G_double_prime_st_est': 0.8888888888888888,
     'Gst_est': 0.5,
     'D_est.stdev': 0.08928571428571419,
     'G_prime_st_est.stdev': 0.07857142857142851,
     'D_est': 0.6488650338184054}}, ml_stats)

Example #11

0

Show file

 def test_calc_multilocus_f_statistics(self):
     ml_stats = VCF.calc_multilocus_f_statistics(self.Hs_est_dict, self.Ht_est_dict)
     self.assertEqual({('pop1', 'pop2'):
         {'Gst_est': 0.30312672938572266,
         'Gst_est.stdev': 1.3360742705570265,
         'G_double_prime_st_est': 0.5003506191636901,
         'G_double_prime_st_est.stdev': 2.394045897494315,
         'G_prime_st_est': 0.34889353651117816,
         'G_prime_st_est.stdev': 1.6091544573608876,
         'D_est': 0.0015629851350084287,
         'D_est.stdev': 0.25110803099568757}}, ml_stats)

Example #12

0

Show file

 def test_calc_fstats_trivial_allele_counts(self):
     f_statistics = VCF.calc_fstats(self.trivial_allele_counts)
     self.assertEqual(
         {('pop2', 'pop1'):
             {'G_prime_st_est': 0.6803049722304385,
             'D_est': 0.517460317460318,
             'G_double_prime_st_est': 0.7609710550887027,
             'Gst_est': 0.33747412008281613,
             'Hs_est': 0.3368421052631577,
             'Ht_est': 0.508421052631579}},
         f_statistics)

Example #13

0

Show file

def callSNPs(current_base, numb_of_seqs):
    """Call the SNPs. Duh!"""

    blanks =  np.zeros(numb_of_seqs, np.string0)

    if current_base.FILTER == 'LowQual':
        blanks.fill("-")

    if current_base.FORMAT == 'GT':
        blanks.fill("-")

    for count, snp_call in enumerate(current_base[9:]):
        base = VCF.process_snp_call(snp_call, current_base.REF, current_base.ALT)
        blanks[count] = base

    return blanks

Example #14

0

Show file

File: vcf2phylip.py Project: tw7649116/pypgen

def callSNPs(current_base, numb_of_seqs, IUPAC_ambiguities=True):
    """Call the SNPs. Duh!"""

    blanks =  np.zeros(numb_of_seqs, np.string0)

    #print current_base.REF, current_base.ALT
    if current_base.FILTER == 'LowQual':
        blanks.fill("-")

    #elif current_base.FORMAT == 'GT':
    #    blanks.fill("-")

    elif len(current_base.ALT) > 1 or len(current_base.REF) > 1:
        blanks.fill("-")

    else:
        for count, snp_call in enumerate(current_base[9:]):
            base = VCF.process_snp_call(snp_call, current_base.REF, current_base.ALT, IUPAC_ambiguities=True)
            blanks[count] = base

    return blanks

Example #15

0

Show file

 def test_homo_alt_genotype_calling(self):
     homo_alt = VCF.process_snp_call('1/1:10,9:19:99:254,0,337', 'A', 'T', IUPAC_ambiguities=True)
     self.assertEqual(homo_alt, 'T')

Example #16

0

Show file

 def test_double_alt_genotype_calling(self):
     double_alt = VCF.process_snp_call('1/2:10,9:19:99:254,0,337', 'A', 'T,G', IUPAC_ambiguities=True)
     self.assertEqual(double_alt, 'K')

Example #17

0

Show file

    def test_header_to_ordered_dict_parsing(self):
        header = VCF.make_empty_vcf_ordered_dict(self.bgzip_path)

        self.assertEqual(header, self.header_dict)

Example #18

0

Show file

 def test_heterozygote_as_N_genotype_calling(self):
     heterozygote_as_N = VCF.process_snp_call('0/1:10,9:19:99:254,0,337', 'A', 'T', IUPAC_ambiguities=False)
     self.assertEqual(heterozygote_as_N, 'N')

Example #19

0

Show file

 def test_double_alt_het_as_N_genotype_calling(self):
     double_alt_het_as_N = VCF.process_snp_call('1/2:10,9:19:99:254,0,337', 'A', 'T,G', IUPAC_ambiguities=False)
     self.assertEqual(double_alt_het_as_N, 'N')

Example #20

0

Show file

 def test_home_ref_outgroup_calling(self):
     homo_ref = VCF.process_outgroup(self.vcf_line, self.populations)
     self.assertEqual(homo_ref, '0')

Example #21

0

Show file

 def test_second_alt_genotype_calling(self):
     second_alt = VCF.process_snp_call('0/2:10,9:19:99:254,0,337', 'A', 'T,G', IUPAC_ambiguities=True)
     self.assertEqual(second_alt, 'R')