Exemple #1
0
def clip_primer_overlap_bases(alignedread, primer_overlap_peak_pos, primer_len):
    """Clip alignread's bases overlapping with primer pos by changing
    cigar string
    """

    if alignedread.is_reverse:
        # WARNING: partial code duplication with else branch
        #
        # this is a reverse read overlapping with it's 5p end,
        # so starting from there clip bases, skipping any
        # non-match ops. do this p+primer_len -
        # alignedread.pos + 1 times
        num_clip = primer_overlap_peak_pos + primer_len - alignedread.pos + 1
        LOG.debug("rv read starting at %d overlapping with fw peak start %d+%d by %d: %s" % (
            alignedread.pos, primer_overlap_peak_pos, primer_len, num_clip, alignedread))
        new_cigar_decoded = []
        for op in cigar.decoded_ops(alignedread.cigar):
            if num_clip == 0 or op == 'D':
                new_cigar_decoded.append(op)
            else:
                # replace everything else with S
                new_cigar_decoded.append('S')
                if op in 'MI=X':
                    num_clip -= 1

    else:
        # WARNING: partial code duplication with if branch
        #
        # this is a forward read overlapping with its' 3p end, so
        # starting from there clip bases, skipping any non-match
        # ops. do this p+primer_len - alignedread.pos + 1 times
        num_clip = alignedread.aend - primer_overlap_peak_pos + primer_len + 1
        LOG.debug("fw read ending at %d overlapping with rv peak end %d-%d by %d: %s" % (
            alignedread.aend, primer_overlap_peak_pos, primer_len, num_clip, alignedread))
        new_cigar_decoded = []
        for op in list(cigar.decoded_ops(alignedread.cigar))[::-1]:
            if num_clip == 0 or op == 'D':
                new_cigar_decoded.insert(0, op)
            else:
                # replace everything else with S
                new_cigar_decoded.insert(0, 'S')
                if op in 'MI=X':
                    num_clip -= 1
        
    new_cigar = cigar.parse(
        cigar.cigar_from_decoded_ops(new_cigar_decoded))
    # According to spec http://samtools.sourceforge.net/SAM1.pdf:
    # "Sum of lengths of the M/I/S/=/X operations shall equal the
    # length of SEQ"
    cigar_len = sum([1 for op in new_cigar_decoded if op in 'MISX='])
    assert cigar_len == alignedread.rlen, (
        "read length derived from new cigar (%s -> %d) mismatches rlen=%d for %s read: %s" % (
            new_cigar, cigar_len, alignedread.rlen,
            "reverse" if alignedread.is_reverse else "forward", alignedread))

    alignedread.cigar = new_cigar
Exemple #2
0
def print_alignedread_info(alignedread):
    """Debugging only"""

    print "- pos, aend-1, alen = %d %d %s" % (
        alignedread.pos, alignedread.aend-1, alignedread.alen)
    print "- cigar = %s" % (alignedread.cigar)
    print "- decoded cigar = %s" % (
        ''.join(list(cigar.decoded_ops(alignedread.cigar))))
    try:
        alnlen = cigar.aligned_length(alignedread.cigar)
    except ValueError:
        alnlen =  -1
        print "- cigar aln-len = %d" % (alnlen)
        print "- read sequence = %s" % (alignedread.query)
        print "- aligned parts = %s" % (alignedread.query)