예제 #1
0
파일: bam.py 프로젝트: jmeppley/transabyss
    def confirm_breakpoint_contig(self, target, pos, only_uniq_frags=False, seq=False, max_depth=None, ctg_len=None):
	"""Finds spanning reads for fusion events
	Returns read objects
	"""
        num_reads = 0
	reads = []

	# coordinate must start from 1
        pos[0] = max(pos[0], 1)
	
	# check if region is suspiciously too highly covered (low-complexity repeat contig)
	too_deep = False
	if max_depth is not None:
	    for pilecolumn in self.bam.pileup(target, pos[0], pos[1] + 1):
		if pilecolumn.pos >= pos[0] and pilecolumn.pos <= pos[1] and pilecolumn.n > max_depth:
		    too_deep = True
		    sys.stdout.write('%s:%s-%s too deeply covered %d (limit:%d)\n' % (target, pos[0], pos[1], pilecolumn.n, max_depth))
		    break
	if too_deep:
	    return reads
		
	last_read = None
        for read in self.bam.fetch(target, pos[0], pos[1] + 1):
	    # don't need to check if current read is same as last read
	    # in terms of pos and cigar - for performance reason
	    if last_read is not None and read.pos == last_read.pos and read.cigar == last_read.cigar:
		if reads and reads[-1].pos == last_read.pos and reads[-1].cigar == last_read.cigar:
		    reads.append(read)
		continue
	    
	    # skip if mapq lower than minimum
            if int(read.mapq) < int(self.min_mapq):
		continue
	    
	    # skip if read is unmapped
	    if read.is_unmapped:
                continue
	    	    
	    # skip if read is not perfectly aligned
	    if not self.is_perfect_align(read, ctg_len):
		continue
	    
	    # skip if read and mate are mapped to same position
	    if not read.mate_is_unmapped and read.rnext == read.tid and read.pnext == read.pos:
		continue
	    
	    # keep if breakpoint region is subsumed in read alignment region
	    if subsume(pos, [read.pos + 1, read.pos + read.alen]):		
		reads.append(read)
		    
	    last_read = read
				
	return reads    
예제 #2
0
파일: bam.py 프로젝트: jmeppley/transabyss
    def find_mates_in_genome(self, region, breakpoint, 
                             breakpoint_buffer=0, missing_mates=None, outside_breakpoint=True, 
                             maximum=None, no_duplicates=True, no_chastity=True, no_proper=True):
	"""Find mates of fusion events in genome alignments"""
	if not self.check_genome_region(region):
	    return []
	
	# make sure read name doesn't have '_' - GSC hack
	if missing_mates:
	    missing_mates = dict((r.replace('_', ':'), missing_mates[r]) for r in missing_mates)
			
	reads = {}
	for read in self.bam.fetch(region[0], region[1], region[2]):  
	    # if read is not mapped, skip
	    if read.alen is None:
		continue
	    
	    # skip PCR duplicate
	    if not missing_mates and no_duplicates and read.is_duplicate:
		continue
	    
	    # skip read below minimum mapq
	    if int(read.mapq) < int(self.min_mapq):
		continue
	    
	    # filter out chastity-failed reads
	    if not missing_mates and no_chastity and read.flag & 512 != 0:
		continue
	    
	    # if want to use 'proper_pair' flag, then make sure read is not 'proper_pair'
	    if no_proper and read.is_proper_pair:
		continue
	    
	    # make sure read is in between interval
	    if not missing_mates and not subsume([read.pos + 1, read.pos + read.rlen], [region[1] - read.alen, region[2] + read.alen]):
		continue
	    	    
	    # if given list of missing mates, make sure read is one of them
	    if missing_mates and not self.is_mate(read, missing_mates):
		continue
	    	
	    # for checking if mate is point towards breakpoint
	    pointing_correctly = False

	    # for checking if mate lies completely on one side of breakpoint
	    completely_on_one_side = False
	    # allows read to overlap breakpoint by one read length if missing mates given
	    if missing_mates:
		breakpoint_buffer = read.rlen
	    
	    # checks if read is pointing towards breakpoint and lies completely on one side
	    # sense
	    if not read.is_reverse:
		if read.pos < breakpoint[1]:
		    pointing_correctly = True		
		    if read.pos + read.alen - 1 <= breakpoint[1] + breakpoint_buffer:
			completely_on_one_side = True						
	    # anti-sense
	    elif read.pos + read.alen - 1 > breakpoint[1]:
		pointing_correctly = True		
		if read.pos >= breakpoint[1] - breakpoint_buffer:
		    completely_on_one_side = True
		    
		    	
	    if pointing_correctly:		
		if outside_breakpoint:
		    if completely_on_one_side:
			# if 2 mates are both candidates, choose the one closer to breakpoint
			if not reads.has_key(read.qname) or abs(read.pos - breakpoint[1]) < abs(reads[read.qname].pos - breakpoint[1]):
			    reads[read.qname] = read
			    
		else:
		    if not reads.has_key(read.qname) or abs(read.pos - breakpoint[1]) < abs(reads[read.qname].pos - breakpoint[1]):
			reads[read.qname] = read
	    
	    # check if maximum is reached if maximum is imposed
	    if maximum is not None and len(reads) == maximum:
		break
	    	    
	return reads.values()