def confirm_breakpoint_contig(self, target, pos, only_uniq_frags=False, seq=False, max_depth=None, ctg_len=None): """Finds spanning reads for fusion events Returns read objects """ num_reads = 0 reads = [] # coordinate must start from 1 pos[0] = max(pos[0], 1) # check if region is suspiciously too highly covered (low-complexity repeat contig) too_deep = False if max_depth is not None: for pilecolumn in self.bam.pileup(target, pos[0], pos[1] + 1): if pilecolumn.pos >= pos[0] and pilecolumn.pos <= pos[1] and pilecolumn.n > max_depth: too_deep = True sys.stdout.write('%s:%s-%s too deeply covered %d (limit:%d)\n' % (target, pos[0], pos[1], pilecolumn.n, max_depth)) break if too_deep: return reads last_read = None for read in self.bam.fetch(target, pos[0], pos[1] + 1): # don't need to check if current read is same as last read # in terms of pos and cigar - for performance reason if last_read is not None and read.pos == last_read.pos and read.cigar == last_read.cigar: if reads and reads[-1].pos == last_read.pos and reads[-1].cigar == last_read.cigar: reads.append(read) continue # skip if mapq lower than minimum if int(read.mapq) < int(self.min_mapq): continue # skip if read is unmapped if read.is_unmapped: continue # skip if read is not perfectly aligned if not self.is_perfect_align(read, ctg_len): continue # skip if read and mate are mapped to same position if not read.mate_is_unmapped and read.rnext == read.tid and read.pnext == read.pos: continue # keep if breakpoint region is subsumed in read alignment region if subsume(pos, [read.pos + 1, read.pos + read.alen]): reads.append(read) last_read = read return reads
def find_mates_in_genome(self, region, breakpoint, breakpoint_buffer=0, missing_mates=None, outside_breakpoint=True, maximum=None, no_duplicates=True, no_chastity=True, no_proper=True): """Find mates of fusion events in genome alignments""" if not self.check_genome_region(region): return [] # make sure read name doesn't have '_' - GSC hack if missing_mates: missing_mates = dict((r.replace('_', ':'), missing_mates[r]) for r in missing_mates) reads = {} for read in self.bam.fetch(region[0], region[1], region[2]): # if read is not mapped, skip if read.alen is None: continue # skip PCR duplicate if not missing_mates and no_duplicates and read.is_duplicate: continue # skip read below minimum mapq if int(read.mapq) < int(self.min_mapq): continue # filter out chastity-failed reads if not missing_mates and no_chastity and read.flag & 512 != 0: continue # if want to use 'proper_pair' flag, then make sure read is not 'proper_pair' if no_proper and read.is_proper_pair: continue # make sure read is in between interval if not missing_mates and not subsume([read.pos + 1, read.pos + read.rlen], [region[1] - read.alen, region[2] + read.alen]): continue # if given list of missing mates, make sure read is one of them if missing_mates and not self.is_mate(read, missing_mates): continue # for checking if mate is point towards breakpoint pointing_correctly = False # for checking if mate lies completely on one side of breakpoint completely_on_one_side = False # allows read to overlap breakpoint by one read length if missing mates given if missing_mates: breakpoint_buffer = read.rlen # checks if read is pointing towards breakpoint and lies completely on one side # sense if not read.is_reverse: if read.pos < breakpoint[1]: pointing_correctly = True if read.pos + read.alen - 1 <= breakpoint[1] + breakpoint_buffer: completely_on_one_side = True # anti-sense elif read.pos + read.alen - 1 > breakpoint[1]: pointing_correctly = True if read.pos >= breakpoint[1] - breakpoint_buffer: completely_on_one_side = True if pointing_correctly: if outside_breakpoint: if completely_on_one_side: # if 2 mates are both candidates, choose the one closer to breakpoint if not reads.has_key(read.qname) or abs(read.pos - breakpoint[1]) < abs(reads[read.qname].pos - breakpoint[1]): reads[read.qname] = read else: if not reads.has_key(read.qname) or abs(read.pos - breakpoint[1]) < abs(reads[read.qname].pos - breakpoint[1]): reads[read.qname] = read # check if maximum is reached if maximum is imposed if maximum is not None and len(reads) == maximum: break return reads.values()