raise ValueError("Read pair " + str(i_pairs) + ": not filtered properly") # If the reads are mapped to different fragments, that's mismapping if read1.tid != read2.tid: raise ValueError("Read pair " + str(i_pairs) + ": not filtered properly") # Find out on what chromosome the read has been mapped fragment = read1.tid ref = refs[fragment] # Make a list of mutations for the read_pair muts = [] for read in reads: seq = read.seq good_cigar = get_ind_good_cigars(read.cigar, match_len_min=match_len_min) # The following two indices indicate the block position in the read # and in the reference sequence. Because of indels, they are updated # separately pos_read = 0 pos_ref = read.pos # TODO: include indels as 'mutations' # TODO: include CIGAR trimming (we should really filter them out!) for (block_type, block_len), is_good in izip(read.cigar, good_cigar): # if read.is_read2: # print pos_read, pos_ref # Match if block_type == 0: if is_good:
# If the reads are mapped to different fragments, that's mismapping if read1.tid != read2.tid: raise ValueError('Read pair ' + str(i_pairs) + ': not filtered properly') # Find out on what chromosome the read has been mapped fragment = read1.tid ref = refs[fragment] # Make a list of mutations for the read_pair muts = [] for read in reads: seq = read.seq good_cigar = get_ind_good_cigars(read.cigar, match_len_min=match_len_min) # The following two indices indicate the block position in the read # and in the reference sequence. Because of indels, they are updated # separately pos_read = 0 pos_ref = read.pos # TODO: include indels as 'mutations' # TODO: include CIGAR trimming (we should really filter them out!) for (block_type, block_len), is_good in izip(read.cigar, good_cigar): #if read.is_read2: # print pos_read, pos_ref # Match if block_type == 0:
def trim_bad_cigar(reads, match_len_min=match_len_min, trim_left=trim_bad_cigars, trim_right=trim_bad_cigars, cons=None): '''Trim away short CIGARs from both edges, they are likely mapping artifacts''' for read in reads: # Get good CIGARs (good_cigars, first_good_cigar, last_good_cigar) = \ get_ind_good_cigars(read.cigar, match_len_min=match_len_min, full_output=True) # If no good CIGARs, give up if not good_cigars.any(): return True else: # Get the good CIGARs coordinates ((start_read, end_read), (start_ref, end_ref)) = \ get_range_good_cigars(read.cigar, read.pos, match_len_min=match_len_min, trim_left=trim_left, trim_right=trim_right) # Trim CIGAR because of bad CIGARs at the edges cigar = read.cigar[first_good_cigar:last_good_cigar + 1] # Trim cigar block lengths if first_good_cigar != 0: cigar[0] = (cigar[0][0], cigar[0][1] - trim_left) if last_good_cigar != len(read.cigar) - 1: cigar[-1] = (cigar[-1][0], cigar[-1][1] - trim_right) # Reset attributes seq = read.seq qual = read.qual read.seq = seq[start_read:end_read] read.qual = qual[start_read:end_read] read.pos = start_ref read.cigar = cigar # Mate position reads[0].mpos = reads[1].pos reads[1].mpos = reads[0].pos # Insert size i_fwd = reads[0].is_reverse i_rev = not i_fwd isize = reads[i_rev].pos + sum(bl for (bt, bl) in reads[i_rev].cigar if bt in (0, 2)) - reads[i_fwd].pos # Trash pair if the insert size is negative (complete cross-overhang) # -----> # <------ if isize <= 0: return True reads[i_fwd].isize = isize reads[i_rev].isize = -isize return False
def trim_bad_cigar(reads, match_len_min=match_len_min, trim_left=trim_bad_cigars, trim_right=trim_bad_cigars, cons=None): '''Trim away short CIGARs from both edges, they are likely mapping artifacts''' for read in reads: # Get good CIGARs (good_cigars, first_good_cigar, last_good_cigar) = \ get_ind_good_cigars(read.cigar, match_len_min=match_len_min, full_output=True) # If no good CIGARs, give up if not good_cigars.any(): return True else: # Get the good CIGARs coordinates ((start_read, end_read), (start_ref, end_ref)) = \ get_range_good_cigars(read.cigar, read.pos, match_len_min=match_len_min, trim_left=trim_left, trim_right=trim_right) # Trim CIGAR because of bad CIGARs at the edges cigar = read.cigar[first_good_cigar: last_good_cigar + 1] # Trim cigar block lengths if first_good_cigar != 0: cigar[0] = (cigar[0][0], cigar[0][1] - trim_left) if last_good_cigar != len(read.cigar) - 1: cigar[-1] = (cigar[-1][0], cigar[-1][1] - trim_right) # Reset attributes seq = read.seq qual = read.qual read.seq = seq[start_read: end_read] read.qual = qual[start_read: end_read] read.pos = start_ref read.cigar = cigar # Mate position reads[0].mpos = reads[1].pos reads[1].mpos = reads[0].pos # Insert size i_fwd = reads[0].is_reverse i_rev = not i_fwd isize = reads[i_rev].pos + sum(bl for (bt, bl) in reads[i_rev].cigar if bt in (0, 2)) - reads[i_fwd].pos # Trash pair if the insert size is negative (complete cross-overhang) # -----> # <------ if isize <= 0: return True reads[i_fwd].isize = isize reads[i_rev].isize = -isize return False