def test_sanity(reads, n_frag, len_frag): '''Perform sanity checks on supposedly good reads''' if not test_integrity(reads): return False if not test_coh(reads): return False if not test_fragment_assignment(reads, n_frag, len_frag): return False return True
def trim_primers(reads, frag_pos, include_tests=False): '''Trim inner primers - frag_pos are the coordinates of the fragment trimmed of inner primers ''' # Note: this function is robust against fuzzy ends, i.e. it works also if the # insert reads into the adapters and crap like that. # Because we accept short inserts, the primer can also be at the end if include_tests: if test_integrity(reads): print 'trim_primers (entry):' import ipdb; ipdb.set_trace() tampered = False for read in reads: # FWD primer if read.pos < frag_pos[0]: tampered = True ref_pos = read.pos read_pos = 0 cigar = read.cigar[::-1] for i, (bt, bl) in enumerate(read.cigar): if bt == 0: # Strictly greater, because we dump the CIGAR ending at the # right position. In the corner case, ref_pos == frag_pos[0] # and we add the whole block (and do not move read_pos). if ref_pos + bl > frag_pos[0]: cigar[-1] = (bt, ref_pos + bl - frag_pos[0]) read_pos += frag_pos[0] - ref_pos ref_pos = frag_pos[0] break cigar.pop(-1) read_pos += bl ref_pos += bl elif bt == 1: cigar.pop(-1) read_pos += bl elif bt == 2: # Starting with a deletion is not allowed cigar.pop(-1) ref_pos += bl if ref_pos > frag_pos[0]: break cigar = cigar[::-1] # If you cut away everything, trash if not len(cigar): return True seq = read.seq qual = read.qual read.pos = ref_pos read.seq = seq[read_pos:] read.qual = qual[read_pos:] read.cigar = cigar # REV primer ref_pos = read.pos + sum(bl for (bt, bl) in read.cigar if bt in (0, 2)) if ref_pos > frag_pos[1]: tampered = True read_pos = read.rlen cigar = read.cigar for i, (bt, bl) in enumerate(read.cigar[::-1]): if bt == 0: # Strictly less, because we dump the CIGAR starting at the # right position. In the corner case, ref_pos == frag_pos[1] # and we add the whole block (and do not move read_pos). if ref_pos - bl < frag_pos[1]: cigar[-1] = (bt, frag_pos[1] - (ref_pos - bl)) read_pos -= ref_pos - frag_pos[1] break cigar.pop(-1) read_pos -= bl ref_pos -= bl elif bt == 1: cigar.pop(-1) read_pos -= bl elif bt == 2: # Ending with a deletion is not allowed cigar.pop(-1) ref_pos -= bl if ref_pos < frag_pos[1]: break # If you cut away everything, trash if not len(cigar): return True seq = read.seq qual = read.qual read.seq = seq[:read_pos] read.qual = qual[:read_pos] read.cigar = cigar # Fix mate pair if tampered: i_fwd = reads[0].is_reverse i_rev = not i_fwd reads[i_fwd].mpos = reads[i_rev].pos reads[i_rev].mpos = reads[i_fwd].pos isize = reads[i_rev].pos + sum(bl for bt, bl in reads[i_rev].cigar if bt in (0, 2)) - reads[i_fwd].pos reads[i_fwd].isize = isize reads[i_rev].isize = -isize if include_tests: if test_integrity(reads): print 'trim_primers (exit):' import ipdb; ipdb.set_trace() return False