def test_retrieve_af(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(b1.retrieve_af(), '0.2') entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'SVTYPE=BND' ] b2 = Bedpe(entry2) self.assertIsNone(b2.retrieve_af())
def test_retrieve_svtype(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(b1.retrieve_svtype(), 'BND') entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'AF=0.2', 'AF=0.2' ] with self.assertRaises(SystemExit): b = Bedpe(entry2)
def test_adjust_by_tag(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '+', 200), 200) self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '-', 200), 201) b1.info1 = 'SVTYPE=BND;AF=0.2;CIPOS=-2,3' self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '-', 200), 203) self.assertEqual(self.converter.adjust_by_tag(b1, 'CIPOS', '+', 200), 202)
def test_parse_info_tag(self): self.assertEqual(Bedpe.parse_info_tag('SVTYPE', 'SVTYPE'), True) self.assertEqual(Bedpe.parse_info_tag('SVTYPE', 'AF='), False) self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;AF=0.2', 'AF='), '0.2') self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;AF=0.2', 'SVTYPE='), 'BND') self.assertEqual( Bedpe.parse_info_tag('SVTYPE=BND;SECONDARY;AF=0.2', 'SECONDARY'), True)
def test_adjust_by_ciend(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(self.converter.adjust_by_ciend(b1), 301) b1.o2 = '+' self.assertEqual(self.converter.adjust_by_ciend(b1), 300) b1.info1 = 'SVTYPE=BND;AF=0.2;CIEND=-2,3' self.assertEqual(self.converter.adjust_by_ciend(b1), 302) b1.o2 = '-' self.assertEqual(self.converter.adjust_by_ciend(b1), 303)
def test__combine_sname_values(self): self.assertEqual( set( Bedpe._combine_sname_values('sample1:2', 'sample2:4,sample3:5').split(',')), set(['sample1:2', 'sample2:4', 'sample3:5'])) self.assertEqual( Bedpe._combine_sname_values(None, 'sample2:4,sample3:5'), 'sample2:4,sample3:5') self.assertEqual( Bedpe._combine_sname_values('sample2:4,sample3:5', None), 'sample2:4,sample3:5')
def test_adjust_by_cipos(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(b1.b1, 200) b1.o1 = '-' b1.adjust_by_cipos() self.assertEqual(b1.b1, 201) b1.misc[0] = 'SVTYPE=BND;AF=0.2;CIPOS=-2,3' b1.adjust_by_cipos() self.assertEqual(b1.b1, 203) b1.o1 = '+' b1.adjust_by_cipos() self.assertEqual(b1.b1, 202)
def test_sname_value(self): self.assertEqual(Bedpe.sname_value('SNAME=sample1:2,sample2:3'), 'sample1:2,sample2:3') self.assertIsNone(Bedpe.sname_value('AF')) self.assertIsNone(Bedpe.sname_value('SNAME=')) self.assertEqual( Bedpe.sname_value( 'SNAME1=older_sample1:2,older_sample2:2;SNAME=sample1:2,sample2:3' ), 'sample1:2,sample2:3') self.assertEqual( Bedpe.sname_value( 'SNAME1=older_sample 1:2,older_sample2:2;SNAME=sample1:2,sample2:3;AF=1' ), 'sample1:2,sample2:3')
def test_adjust_by_ciend(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(b1.b2, 301) b1.o2 = '+' b1.adjust_by_ciend() self.assertEqual(b1.b2, 300) b1.misc[0] = 'SVTYPE=BND;AF=0.2;CIEND=-2,3' b1.adjust_by_ciend() self.assertEqual(b1.b2, 302) b1.o2 = '-' b1.adjust_by_ciend() self.assertEqual(b1.b2, 303)
def test__update_sname_field(self): expected = set(['sample 2:4', 'sample3:12']) result = Bedpe._update_sname_field('SNAME=sample 2:4', 'SNAME=sample3:12') tag_name, values = result.split('=') self.assertEqual(tag_name, 'SNAME') result_set = set(values.split(',')) self.assertEqual(result_set, expected) # Test to ensure we don't fail if no SNAME is present result2 = Bedpe._update_sname_field('AF=0.5', 'AF=0.1') self.assertEqual(result2, 'AF=0.5') # Test if the first sample is missing SNAME result2 = Bedpe._update_sname_field('AF=0.5', 'AF=0.1;SNAME=sample5:12') self.assertEqual(result2, 'AF=0.5;SNAME=sample5:12')
def cluster_bedpe(self, in_file, bedpe_out, is_sorted): # Locally alias instance variables max_distance = self.max_distance eval_param = self.eval_param in_header = True for line in in_file: if line.startswith('#') and in_header: if line.startswith('#CHROM'): bedpe_out.write('##INFO=<ID=RETAINED,Number=0,Type=Flag,Description="Variants clustering with this call were pruned">\n') bedpe_out.write(line) continue in_header = False self.bedpe_lines += 1 bedpe = Bedpe(line.rstrip().split('\t')) if bedpe.af is None: sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n') sys.exit(1) if bedpe.af == '.': self.skipped_lines += 1 continue matched_clusters = [] for cluster in self.cluster_list: if cluster.can_add(bedpe, max_distance): cluster.add(bedpe, eval_param) matched_clusters.append(cluster) if not matched_clusters: new_cluster = Cluster() new_cluster.add(bedpe, eval_param) self.cluster_list.append(new_cluster) else: if len(matched_clusters) > 1: i = 0 pruned_clusters = [] while i < (len(matched_clusters) - 1): j = i + 1 to_delete = set() while j < len(matched_clusters): if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance): matched_clusters[i].add(matched_clusters[j].elements[0], eval_param) pruned_clusters.append(matched_clusters[j]) to_delete.add(j) j += 1 for index in sorted(to_delete, reverse=True): del matched_clusters[index] i += 1 if pruned_clusters: self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in pruned_clusters] #prune and print eligible clusters if self.bedpe_lines % 1000 == 0 and is_sorted: self.cluster_list = self.prune(bedpe, False, bedpe_out) self.cluster_list = self.prune(None, True, bedpe_out) sys.stderr.write(self.stats_report_string()) return
def test__update_sname_field(self): expected = set(['sample2:4', 'sample3:12']) result = Bedpe._update_sname_field('SNAME=sample2:4', 'SNAME=sample3:12') tag_name, values = result.split('=') self.assertEqual(tag_name, 'SNAME') result_set = set(values.split(',')) self.assertEqual(result_set, expected)
def bedpeToVcf(bedpe_file, vcf_out): myvcf = Vcf() converter = BedpeToVcfConverter(myvcf) in_header = True # parse the bedpe data header = list() for line in bedpe_file: if in_header: if line[0:2] == '##': header.append(line) continue elif line[0] == '#' and line[1] != '#': sample_list_str = line.rstrip().split('\t', 20)[-1] header.append('\t'.join([ '#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', sample_list_str ])) continue else: in_header = False myvcf.add_header(header) myvcf.file_format = 'VCFv4.2' vcf_out.write(myvcf.get_header() + '\n') # bedpe = Bedpe(line.rstrip().split('\t')) variants = converter.convert(bedpe) for v in variants: vcf_out.write(v.get_var_string() + '\n') # close the VCF output file vcf_out.close() return
def test_malformed(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(b1.malformedFlag, 1) entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'MISSING' ] b2 = Bedpe(entry2) self.assertEqual(b2.malformedFlag, 2) self.assertEqual(b2.info1, entry2[18])
def test_parse_info_tag(self): self.assertEqual(Bedpe.update_info_tag('SNAME=sample', 'SNAME=', 'sample,sample2'), 'SNAME=sample,sample2') self.assertEqual(Bedpe.update_info_tag('SNAME=sample;AF=0.75', 'SNAME=', 'sample,sample2'), 'SNAME=sample,sample2;AF=0.75') with self.assertRaises(ValueError): Bedpe.update_info_tag('AF=0.75', 'SNAME=', 'sample,sample2') with self.assertRaises(ValueError): Bedpe.update_info_tag('SECONDARY;AF=0.5', 'SECONDARY', 'NEW_VALUE') with self.assertRaises(ValueError): Bedpe.update_info_tag('AF=0.5;SECONDARY', 'SECONDARY', 'NEW_VALUE')
def test_str(self): # Note that we are testing float to float equivalence. Actually passing in an integer will result in it being converted to float with # with decimal place entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57.0', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(str(b1), '\t'.join(entry1))
def cluster_bedpe(self, in_file, bedpe_out, is_sorted): # Locally alias instance variables max_distance = self.max_distance eval_param = self.eval_param in_header = True for line in in_file: if line.startswith('#') and in_header: bedpe_out.write(line) continue in_header = False self.bedpe_lines += 1 bedpe = Bedpe(line.rstrip().split('\t')) if bedpe.af is None: sys.stderr.write('No allele frequency for variant found. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n') sys.exit(1) if bedpe.af == '.': self.skipped_lines += 1 continue matched_clusters = [] for cluster in self.cluster_list: if cluster.can_add(bedpe, max_distance): cluster.add(bedpe, eval_param) matched_clusters.append(cluster) if not matched_clusters: new_cluster = Cluster() new_cluster.add(bedpe, eval_param) self.cluster_list.append(new_cluster) else: if len(matched_clusters) > 1: i = 0 matched_cluster_pruned = False while i < (len(matched_clusters) - 1): j = i + 1 while j < len(matched_clusters): if matched_clusters[i].can_add(matched_clusters[j].elements[0], max_distance): matched_clusters[i].add(matched_clusters[j].elements[0], eval_param) matched_cluster_pruned = True del matched_clusters[j] j += 1 i += 1 if matched_cluster_pruned: self.cluster_list = [cluster for cluster in self.cluster_list if cluster not in matched_clusters] #prune and print eligible clusters if self.bedpe_lines % 1000 == 0 and is_sorted: self.cluster_list = self.prune(bedpe, False, bedpe_out) self.cluster_list = self.prune(None, True, bedpe_out) sys.stderr.write(self.stats_report_string()) return
def test_add(self): bedpe1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(bedpe1) bedpe2 = [ '1', '195', '305', '2', '295', '405', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.3' ] b2 = Bedpe(bedpe2) c = Cluster() c.add(b1, None) self.assertEqual(c.size, 1) self.assertEqual(c.sv_event, 'BND') self.assertEqual(c.filter, '0.2') self.assertEqual(c.chrom_a, '1') self.assertEqual(c.min_a, 200) self.assertEqual(c.max_a, 300) self.assertEqual(c.chrom_b, '2') self.assertEqual(c.min_b, 300) self.assertEqual(c.max_b, 400) self.assertEqual(c.strand_a, '+') self.assertEqual(c.strand_b, '-') c.add(b2, None) self.assertEqual(c.size, 2) self.assertEqual(c.sv_event, 'BND') self.assertEqual(c.filter, '0.3') self.assertEqual(c.chrom_a, '1') self.assertEqual(c.min_a, 195) self.assertEqual(c.max_a, 305) self.assertEqual(c.chrom_b, '2') self.assertEqual(c.min_b, 295) self.assertEqual(c.max_b, 405) self.assertEqual(c.strand_a, '+') self.assertEqual(c.strand_b, '-')
def processBEDPE(bedpe_stream, name, dist, output_handle): #Process the BEDPE file and convert each entry to SAM. converter = BedpetoBlockedBedConverter(name, dist) output_handle.write(converter.track_name()) for line in bedpe_stream: # ignore header if line[0] == "#": continue lineList = line.rstrip().split('\t') if lineList: bedpe = Bedpe(lineList) output_handle.write('\n'.join(converter.convert(bedpe)) + '\n')
def test_parse_info_tag(self): self.assertEqual(Bedpe.parse_info_tag('SVTYPE', 'SVTYPE'), True) self.assertEqual(Bedpe.parse_info_tag('SVTYPE', 'AF='), False) self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;AF=0.2', 'AF='), '0.2') self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;AF=0.2', 'SVTYPE='), 'BND') self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;SECONDARY;AF=0.2', 'SECONDARY'), True) self.assertEqual(Bedpe.parse_info_tag('SVTYPE=BND;SECONDARY;BAD_AF=0.3;AF=0.2', 'AF='), '0.2')
def test_parse_info_tag(self): self.assertEqual( Bedpe.update_info_tag('SNAME=sample', 'SNAME=', 'sample,sample2'), 'SNAME=sample,sample2') self.assertEqual( Bedpe.update_info_tag('SNAME=sample;AF=0.75', 'SNAME=', 'sample,sample2'), 'SNAME=sample,sample2;AF=0.75') with self.assertRaises(ValueError): Bedpe.update_info_tag('AF=0.75', 'SNAME=', 'sample,sample2') with self.assertRaises(ValueError): Bedpe.update_info_tag('SECONDARY;AF=0.5', 'SECONDARY', 'NEW_VALUE') with self.assertRaises(ValueError): Bedpe.update_info_tag('AF=0.5;SECONDARY', 'SECONDARY', 'NEW_VALUE')
def test_can_add(self): bedpe = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b = Bedpe(bedpe) c = Cluster() c.chrom_a = b.c1 c.chrom_b = b.c2 c.min_a = b.s1 c.max_a = b.e1 c.min_b = b.s2 c.max_b = b.e2 c.strand_a = b.o1 c.strand_b = b.o2 self.assertTrue(c.can_add(b, 1)) c.size = 1 c.sv_event = 'DEL' self.assertFalse(c.can_add(b, 1)) c.sv_event = 'BND' self.assertTrue(c.can_add(b, 1)) c.chrom_a = 'X' self.assertFalse(c.can_add(b, 1)) c.chrom_a = b.c1 c.chrom_b = 'X' self.assertFalse(c.can_add(b, 1)) c.chrom_b = b.c2 c.min_a = 305 self.assertFalse(c.can_add(b, 1)) c.min_a = b.s1 c.max_a = 150 self.assertFalse(c.can_add(b, 1)) c.max_a = b.e1 c.min_b = 405 self.assertFalse(c.can_add(b, 1)) c.min_b = b.s1 c.max_b = 150 self.assertFalse(c.can_add(b, 1))
def test_get_cluster_string(self): bedpe = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b = Bedpe(bedpe) c = Cluster() with self.assertRaises(ValueError): c.get_cluster_string() c.add(b, None) self.assertEqual(c.get_cluster_string(), str(b))
def test_info(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(b1.info, 'SVTYPE=BND;AF=0.2') entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'MISSING' ] b2 = Bedpe(entry2) self.assertEqual(b2.info, 'SVTYPE=BND;AF=0.2') entry3 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SECONDARY' ] b3 = Bedpe(entry3) self.assertEqual(b3.info, 'SVTYPE=BND;AF=0.2')
def test_retrieve_af(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2', 'SVTYPE=BND;AF=0.2' ] b1 = Bedpe(entry1) self.assertEqual(b1.retrieve_af(), '0.2') entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'SVTYPE=BND' ] b2 = Bedpe(entry2) self.assertIsNone(b2.retrieve_af()) entry3 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND;AF=0.2;FIN_AF=0.01', 'SVTYPE=BND;AF=0.2;FIN_AF=0.01' ] b3 = Bedpe(entry3) self.assertEqual(b3.retrieve_af(), '0.2')
def test_parse_score(self): self.assertEqual(Bedpe.parse_score('20'), 20) self.assertEqual(Bedpe.parse_score('.'), '.')
def flag_as_pruned(bedpe): ''' Mark a BEDPE info field(s) as RETAINED ''' if not Bedpe.parse_info_tag(bedpe.info, 'RETAINED'): bedpe.set_info('RETAINED', None)
def test_set_info(self): entry1 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'MISSING', 'SVTYPE=BND' ] b1 = Bedpe(entry1) b1.set_info('AF', '0.2') self.assertEqual(b1.info, 'SVTYPE=BND;AF=0.2') entry2 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'MISSING' ] b2 = Bedpe(entry2) b2.set_info('AF', '0.2') self.assertEqual(b2.info, 'SVTYPE=BND;AF=0.2') entry3 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', 'SECONDARY' ] b3 = Bedpe(entry3) b3.set_info('AF', '0.2') self.assertEqual(b3.info1, 'SVTYPE=BND;AF=0.2') self.assertEqual(b3.info2, 'SECONDARY;AF=0.2') entry4 = [ '1', '200', '300', '2', '300', '400', '777_1', '57', '+', '-', 'BND', 'PASS', '.', '.', '.', '.', '.', '.', 'SVTYPE=BND', '.' ] b4 = Bedpe(entry4) b4.set_info('PRESENT', None) self.assertEqual(b4.info, 'SVTYPE=BND;PRESENT') self.assertEqual(b4.info2, '.')
def varLookup(aFile, bFile, bedpe_out, max_distance, pass_prefix, cohort_name): # FIXME The following code is heavily duplicated with vcftobedpe and bedpetovcf. Harmonize!!! bList = list() headerObj = Vcf() #co-opt the VCF header object if cohort_name is None: cohort_name = str(str(bFile).split('/')[-1]) if bFile == "stdin": bData = sys.stdin elif bFile.endswith('.gz'): bData = gzip.open(bFile, 'rb') else: bData = open(bFile, 'r') for bLine in bData: if bLine.startswith(pass_prefix): continue bentry = Bedpe(bLine.rstrip().split('\t')) if bentry.af is None: sys.stderr.write( 'No allele frequency for variant found in -b file. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n' ) sys.exit(1) bList.append(bentry) if aFile == "stdin": aData = sys.stdin elif aFile.endswith('.gz'): aData = gzip.open(aFile, 'rb') else: aData = open(aFile, 'r') in_header = True header_lines = [] sample_list = None for aLine in aData: if pass_prefix is not None and aLine.startswith(pass_prefix): if aLine[0] == '#' and aLine[1] != '#': sample_list = aLine.rstrip().split('\t', 14)[-1] else: header_lines.append(aLine) continue else: if in_header == True: headerObj.add_header(header_lines) headerObj.add_info( cohort_name + '_AF', '.', 'Float', 'Allele frequency(ies) for matching variants found in the ' + cohort_name + ' vcf' + ' (' + str(str(bFile).split('/')[-1]) + ')') headerObj.add_info( cohort_name + '_VarID', '.', 'Integer', 'List of Variant ID(s) for matching variants found in the ' + cohort_name + ' vcf' + ' (' + str(str(bFile).split('/')[-1]) + ')') header = headerObj.get_header() bedpe_out.write(header[:header.rfind('\n')] + '\n') if len(sample_list) > 0: bedpe_out.write('\t'.join([ '#CHROM_A', 'START_A', 'END_A', 'CHROM_B', 'START_B', 'END_B', 'ID', 'QUAL', 'STRAND_A', 'STRAND_B', 'TYPE', 'FILTER', 'INFO_A', 'INFO_B', sample_list ]) + '\n') else: bedpe_out.write('\t'.join([ '#CHROM_A', 'START_A', 'END_A', 'CHROM_B', 'START_B', 'END_B', 'ID', 'QUAL', 'STRAND_A', 'STRAND_B', 'TYPE', 'FILTER', 'INFO_A', 'INFO_B' ]) + '\n') in_header = False a = Bedpe(aLine.rstrip().split('\t')) if a.af is None: sys.stderr.write( 'No allele frequency for variant found in -a file. This tool requires allele frequency information to function. Please add with svtools afreq and rerun\n' ) sys.exit(1) for b in bList: add(a, b, max_distance) bedpe_out.write(get_var_string(a, cohort_name) + '\n')
def test__combine_sname_values(self): self.assertEqual(set(Bedpe._combine_sname_values('sample1:2', 'sample2:4,sample3:5').split(',')), set(['sample1:2', 'sample2:4', 'sample3:5'])) self.assertEqual(Bedpe._combine_sname_values(None, 'sample2:4,sample3:5'), 'sample2:4,sample3:5') self.assertEqual(Bedpe._combine_sname_values('sample2:4,sample3:5', None), 'sample2:4,sample3:5')
def convert(self, primary_variant, secondary_variant=None): ''' Convert the passed VCF variant(s) into a BEDPE object ''' vcf_variant = primary_variant if primary_variant is None: vcf_variant = secondary_variant try: sv_type = vcf_variant.info['SVTYPE'] except KeyError: raise ValueError('SVTYPE field required for conversion to BEDPE') parser = self.simple_breakpoints if sv_type == 'BND': parser = self.bnd_breakpoints c1, s1, e1, c2, s2, e2, o1, o2 = parser(vcf_variant) s1, e1 = self.adjust_coordinate(vcf_variant, 'CIPOS', s1, e1) s2, e2 = self.adjust_coordinate(vcf_variant, 'CIEND', s2, e2) orig_name_a = vcf_variant.var_id orig_ref_a = vcf_variant.ref orig_alt_a = vcf_variant.alt info_a = vcf_variant.get_info_string() if primary_variant is None: info_a = "MISSING" orig_name_a = orig_ref_a = orig_alt_a = '.' c1, s1, e1, o1, c2, s2, e2, o2 = c2, s2, e2, o2, c1, s1, e1, o1 info_b = '.' orig_name_b = orig_ref_b = orig_alt_b = '.' if sv_type == 'BND': if secondary_variant is None: info_b = "MISSING" else: info_b = secondary_variant.get_info_string() orig_name_b = secondary_variant.var_id orig_ref_b = secondary_variant.ref orig_alt_b = secondary_variant.alt sc1, ss1, se1, sc2, ss2, se2, so1, so2 = parser( secondary_variant) s2, e2 = self.adjust_coordinate(secondary_variant, 'CIPOS', ss1, se1) # For MANTA single-ended BNDs, EVENT is not present. # XXX This has probably already been calculated outside of this method. May be a candidate to memoize or otherwise cache? # By adding to the variant class, perhaps? name = vcf_variant.var_id if 'EVENT' in vcf_variant.info: name = vcf_variant.info['EVENT'] elif 'MATEID' in vcf_variant.info and vcf_variant.var_id.startswith( 'Manta'): # Specifically handle Manta name, end = vcf_variant.var_id.rsplit(':', 1) fields = map(str, [ c1, max(s1, 0), max(e1, 0), c2, max(s2, 0), max(e2, 0), name, vcf_variant.qual, o1, o2, sv_type, vcf_variant.filter, orig_name_a, orig_ref_a, orig_alt_a, orig_name_b, orig_ref_b, orig_alt_b, info_a, info_b, ]) if vcf_variant.get_format_string() is not None: fields += [ vcf_variant.get_format_string(), vcf_variant.get_gt_string() ] return Bedpe(fields)
def test_sname_value(self): self.assertEqual(Bedpe.sname_value('SNAME=sample1:2,sample2:3'), 'sample1:2,sample2:3') self.assertIsNone(Bedpe.sname_value('AF')) self.assertIsNone(Bedpe.sname_value('SNAME='))