def genrefblks(readseq, chrom, start, stop, strand, cigar, nreads): refpos = start readpos = 0 if strand == '-': readseq = reverse_complement(readseq) tleftlim, trightlim = start + ENDTRIM, stop - ENDTRIM qleftlim, qrightlim = ENDTRIM, len(readseq) - ENDTRIM cigarcommands = cigar_pattern.findall(cigar) if cigarcommands[0][1] == 'S': # shift start site for the first soft clipping start -= int(cigarcommands[0][0]) if len(cigarcommands) > 1 and cigarcommands[-1][1] == 'S': # last soft clipping stop += int(cigarcommands[-1][0]) for num, cmd in cigarcommands: num = int(num) if cmd == 'M': # match mleft = max(qleftlim, readpos) mright = min(qrightlim, readpos + num) if mleft < mright: seq = readseq[mleft:mright] yield ('M', nreads, max(refpos, tleftlim), seq) refpos += num readpos += num elif cmd == 'S': # soft clip readpos += num elif cmd == 'N': # skip refpos += num elif cmd == 'D': # deletion if tleftlim <= refpos < trightlim: yield ('D', nreads, refpos, num) refpos += num elif cmd == 'I': # insertion ppos = (refpos if strand == '+' else (refpos-1)) if tleftlim <= ppos < trightlim: yield ('I', nreads, ppos, num) readpos += num elif cmd == 'H': # hard clipping pass else: yield ('E', nreads, num, cmd, readseq) raise ValueError if strand == '+': fivep, threep = start, stop-1 else: fivep, threep = stop-1, start yield ('5', nreads, fivep) yield ('3', nreads, threep)
def get_refseq(self, chrom, start, stop): # with automatic (-) strand detection if start >= 0: return self.seqs.get(chrom, start, stop).upper() start, stop = -stop, -start return sequtils.reverse_complement(self.seqs.get(chrom, start, stop).upper())