Ejemplo n.º 1
0
    def setUp(self):
        self.vcf_file1 = [
            '\t'.join(['chr1', '16688', '.', 'G', 'A', '.', '', '', '', '', '']),
            '\t'.join(['chr1', '186478', '.', 'A', 'G', '.', '', '', '', '', '']),
            '\t'.join(['chr1', '16577291', '.', 'T', 'C', '.', '', '', '', '', ''])]

        self.vcf1 = parse_vcf_file(self.vcf_file1)
        self.vcf2 = parse_vcf_file(self.vcf_file1)
Ejemplo n.º 2
0
    def test_remove_duplicates_at_same_loc(self):
        vcf_loc_dup = ['\t'.join(['chr1', '186478', '.', 'T', 'G', '.', '', '', '', '', ''])]
        vcf = parse_vcf_file(vcf_loc_dup) + self.vcf1
        vcf_rem_true_dup = vcf.remove_true_duplicates()
        vcf_rem_loc_dup = vcf.remove_loc_dup()

        self.assertEqual(len(vcf), len(self.vcf1) + len(vcf_loc_dup))
        self.assertEqual(len(vcf_rem_true_dup), len(vcf))
        self.assertEqual(vcf_rem_true_dup, vcf)

        self.assertEqual(len(vcf_rem_loc_dup), len(self.vcf1))
Ejemplo n.º 3
0
def merge_vcfs(ctx,
               vcf_file1,
               vcf_file2,
               vcf_out,
               ann_vcf1=None,
               ann_vcf2=None):
    logging.info(f'Reading file: {vcf_file1}')
    vcf1 = parse_vcf_file(read_vcf(vcf_file1))
    logging.info(f'Reading file: {vcf_file2}')
    vcf2 = parse_vcf_file(read_vcf(vcf_file2))
    if ann_vcf1 is not None:
        vcf1 = vcf1.annotate(ann_vcf1)
    if ann_vcf2 is not None:
        vcf2 = vcf2.annotate(ann_vcf2)

    logging.info('Merging VCFs...')
    vcf = vcf1 + vcf2
    n1 = len(vcf)
    vcf = vcf.remove_true_duplicates()
    n2 = len(vcf)
    logging.info(f'{n1-n2} duplicates removed')
    vcf.to_file(vcf_out)
Ejemplo n.º 4
0
 def test_merge_two_vcfs(self):
     vcf = self.vcf1 + self.vcf2
     self.assertEqual(vcf, parse_vcf_file(self.vcf_file1 * 2))
Ejemplo n.º 5
0
 def test_parse_vcf_file_with_header(self):
     header = ['# This is a header', '# Multiline header here']
     vcf = parse_vcf_file(header + self.vcf_file)
     self.assertEqual(len(vcf), len(self.vcf_file))
     self.assertEqual(len(vcf.header), len(header))
Ejemplo n.º 6
0
 def test_parse_vcf_file(self):
     vcf = parse_vcf_file(self.vcf_file)
     self.assertIsInstance(vcf, VCF)
     self.assertEqual(len(vcf), len(self.vcf_file))
     self.assertEqual(vcf[0], _parse_vcf_line(self.vcf_file[0]))