def bedpeToVcf(bedpe_file, vcf_out): myvcf = Vcf() converter = BedpeToVcfConverter(myvcf) in_header = True # parse the bedpe data header = list() for line in bedpe_file: if in_header: if line[0:2] == '##': header.append(line) continue elif line[0] == '#' and line[1] != '#': sample_list_str = line.rstrip().split('\t', 20)[-1] header.append('\t'.join([ '#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', sample_list_str ])) continue else: in_header = False myvcf.add_header(header) myvcf.file_format = 'VCFv4.2' vcf_out.write(myvcf.get_header() + '\n') # bedpe = Bedpe(line.rstrip().split('\t')) variants = converter.convert(bedpe) for v in variants: vcf_out.write(v.get_var_string() + '\n') # close the VCF output file vcf_out.close() return
class TestBedpeToVcfConverter(TestCase): def setUp(self): vcf = Vcf() self.converter = BedpeToVcfConverter(vcf) def test_adjust_by_tag(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '+', 200), 200) self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '-', 200), 201) b1.info1 = 'SVTYPE=BND;AF=0.2;CIPOS=-2,3' self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '-', 200), 203) self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '+', 200), 202) def test_determine_sep(self): self.assertEqual(self.converter.determine_sep('-'), '[') self.assertEqual(self.converter.determine_sep('+'), ']') def test_determine_flanks(self): self.assertEqual(self.converter.determine_flanks('-'), ('', 'N')) self.assertEqual(self.converter.determine_flanks('+'), ('N', '')) def test_bnd_alt_string(self): self.assertEqual(self.converter.bnd_alt_string('+', '-', '2', '22222'), 'N[2:22222[') self.assertEqual(self.converter.bnd_alt_string('-', '+', '2', '22222'), ']2:22222]N')
def bedpeToVcf(bedpe_file, vcf_out): myvcf = Vcf() converter = BedpeToVcfConverter(myvcf) in_header = True # parse the bedpe data header = list() for line in bedpe_file: if in_header: if line[0:2] == '##': header.append(line) continue elif line[0] == '#' and line[1] != '#': sample_list_str = line.rstrip().split('\t', 20)[-1] header.append('\t'.join([ '#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', sample_list_str ] )) continue else: in_header = False myvcf.add_header(header) myvcf.file_format='VCFv4.2' vcf_out.write(myvcf.get_header() + '\n') # bedpe = Bedpe(line.rstrip().split('\t')) variants = converter.convert(bedpe) for v in variants: vcf_out.write(v.get_var_string() + '\n') # close the VCF output file and header if no variants found if in_header == True: myvcf.add_header(header) myvcf.file_format='VCFv4.2' vcf_out.write(myvcf.get_header() + '\n') vcf_out.close() return
def setUp(self): vcf = Vcf() self.converter = BedpeToVcfConverter(vcf)