def setUp(self): self.vcf_file1 = [ '\t'.join(['chr1', '16688', '.', 'G', 'A', '.', '', '', '', '', '']), '\t'.join(['chr1', '186478', '.', 'A', 'G', '.', '', '', '', '', '']), '\t'.join(['chr1', '16577291', '.', 'T', 'C', '.', '', '', '', '', ''])] self.vcf1 = parse_vcf_file(self.vcf_file1) self.vcf2 = parse_vcf_file(self.vcf_file1)
def test_remove_duplicates_at_same_loc(self): vcf_loc_dup = ['\t'.join(['chr1', '186478', '.', 'T', 'G', '.', '', '', '', '', ''])] vcf = parse_vcf_file(vcf_loc_dup) + self.vcf1 vcf_rem_true_dup = vcf.remove_true_duplicates() vcf_rem_loc_dup = vcf.remove_loc_dup() self.assertEqual(len(vcf), len(self.vcf1) + len(vcf_loc_dup)) self.assertEqual(len(vcf_rem_true_dup), len(vcf)) self.assertEqual(vcf_rem_true_dup, vcf) self.assertEqual(len(vcf_rem_loc_dup), len(self.vcf1))
def merge_vcfs(ctx, vcf_file1, vcf_file2, vcf_out, ann_vcf1=None, ann_vcf2=None): logging.info(f'Reading file: {vcf_file1}') vcf1 = parse_vcf_file(read_vcf(vcf_file1)) logging.info(f'Reading file: {vcf_file2}') vcf2 = parse_vcf_file(read_vcf(vcf_file2)) if ann_vcf1 is not None: vcf1 = vcf1.annotate(ann_vcf1) if ann_vcf2 is not None: vcf2 = vcf2.annotate(ann_vcf2) logging.info('Merging VCFs...') vcf = vcf1 + vcf2 n1 = len(vcf) vcf = vcf.remove_true_duplicates() n2 = len(vcf) logging.info(f'{n1-n2} duplicates removed') vcf.to_file(vcf_out)
def test_merge_two_vcfs(self): vcf = self.vcf1 + self.vcf2 self.assertEqual(vcf, parse_vcf_file(self.vcf_file1 * 2))
def test_parse_vcf_file_with_header(self): header = ['# This is a header', '# Multiline header here'] vcf = parse_vcf_file(header + self.vcf_file) self.assertEqual(len(vcf), len(self.vcf_file)) self.assertEqual(len(vcf.header), len(header))
def test_parse_vcf_file(self): vcf = parse_vcf_file(self.vcf_file) self.assertIsInstance(vcf, VCF) self.assertEqual(len(vcf), len(self.vcf_file)) self.assertEqual(vcf[0], _parse_vcf_line(self.vcf_file[0]))