def annotate_fusion(ref_f, input_f, output_f): """ Align fusion juncrions to gene annotations """ print('Start to annotate fusion junctions...') genes, gene_info = parse_ref1(ref_f) # gene annotations fusions, fusion_index = parse_bed(input_f) # fusion junctions total = set() with open(output_f, 'w') as outf: for chrom in genes: # overlap gene annotations with fusion juncrions result = Interval.overlapwith(genes[chrom].interval, fusions[chrom]) for itl in result: # extract gene annotations iso = list(filter(lambda x: x.startswith('iso'), itl[2:])) # for each overlapped fusion junction for fus in itl[(2 + len(iso)):]: reads = fus.split()[1] fus_start, fus_end = fusion_index[fus] edge_annotations = [] # first or last exon flag for iso_id in iso: g, i, c, s = iso_id.split()[1:] start = gene_info[iso_id][0][0] end = gene_info[iso_id][-1][-1] # fusion junction excesses boundaries of gene # annotation if fus_start < start - 10 or fus_end > end + 10: continue (fusion_info, index, edge) = map_fusion_to_iso(fus_start, fus_end, s, gene_info[iso_id]) if fusion_info: fus_start_str = str(fus_start) fus_end_str = str(fus_end) bed_info = '\t'.join([chrom, fus_start_str, fus_end_str, 'FUSIONJUNC/%s' % reads, '0', s, fus_start_str, fus_start_str, '0,0,0']) bed = '\t'.join([bed_info, fusion_info, g, i, index]) if not edge: # not first or last exon outf.write(bed + '\n') total.add(fus) else: # first or last exon edge_annotations.append(bed) if edge_annotations: # first or last exon for bed in edge_annotations: outf.write(bed + '\n') total.add(fus) print('Annotated %d fusion junctions!' % len(total))
def testOverlapwith(self): r = Interval.overlapwith(self.c, self.d) self.assertListEqual(r, [[3, 7, 'I', 'a', 'b', 'e', 'd', 'f'], [10, 12, 'II', 'd', 'x'], [16, 20, 'III', 'x', 'h', 'i'], [23, 25, 'IV', 'x']], 'Failed in Overlapwith')