Example #1
0
def annotate_fusion(ref_f, input_f, output_f):
    """
    Align fusion juncrions to gene annotations
    """
    print('Start to annotate fusion junctions...')
    genes, gene_info = parse_ref1(ref_f)  # gene annotations
    fusions, fusion_index = parse_bed(input_f)  # fusion junctions
    total = set()
    with open(output_f, 'w') as outf:
        for chrom in genes:
            # overlap gene annotations with fusion juncrions
            result = Interval.overlapwith(genes[chrom].interval,
                                          fusions[chrom])
            for itl in result:
                # extract gene annotations
                iso = list(filter(lambda x: x.startswith('iso'), itl[2:]))
                # for each overlapped fusion junction
                for fus in itl[(2 + len(iso)):]:
                    reads = fus.split()[1]
                    fus_start, fus_end = fusion_index[fus]
                    edge_annotations = []  # first or last exon flag
                    for iso_id in iso:
                        g, i, c, s = iso_id.split()[1:]
                        start = gene_info[iso_id][0][0]
                        end = gene_info[iso_id][-1][-1]
                        # fusion junction excesses boundaries of gene
                        # annotation
                        if fus_start < start - 10 or fus_end > end + 10:
                            continue
                        (fusion_info,
                         index,
                         edge) = map_fusion_to_iso(fus_start,
                                                   fus_end, s,
                                                   gene_info[iso_id])
                        if fusion_info:
                            fus_start_str = str(fus_start)
                            fus_end_str = str(fus_end)
                            bed_info = '\t'.join([chrom, fus_start_str,
                                                  fus_end_str,
                                                  'FUSIONJUNC/%s' % reads,
                                                  '0', s, fus_start_str,
                                                  fus_start_str, '0,0,0'])
                            bed = '\t'.join([bed_info, fusion_info, g, i,
                                             index])
                            if not edge:  # not first or last exon
                                outf.write(bed + '\n')
                                total.add(fus)
                            else:  # first or last exon
                                edge_annotations.append(bed)
                    if edge_annotations:  # first or last exon
                        for bed in edge_annotations:
                            outf.write(bed + '\n')
                        total.add(fus)
    print('Annotated %d fusion junctions!' % len(total))
Example #2
0
 def testOverlapwith(self):
     r = Interval.overlapwith(self.c, self.d)
     self.assertListEqual(r, [[3, 7, 'I', 'a', 'b', 'e', 'd', 'f'], [10, 12, 'II', 'd', 'x'], [16, 20, 'III', 'x', 'h', 'i'],
                              [23, 25, 'IV', 'x']], 'Failed in Overlapwith')