raise ValueError("Read pair " + str(i_pairs) + ": not filtered properly")

        # If the reads are mapped to different fragments, that's mismapping
        if read1.tid != read2.tid:
            raise ValueError("Read pair " + str(i_pairs) + ": not filtered properly")

        # Find out on what chromosome the read has been mapped
        fragment = read1.tid
        ref = refs[fragment]

        # Make a list of mutations for the read_pair
        muts = []
        for read in reads:

            seq = read.seq
            good_cigar = get_ind_good_cigars(read.cigar, match_len_min=match_len_min)

            # The following two indices indicate the block position in the read
            # and in the reference sequence. Because of indels, they are updated
            # separately
            pos_read = 0
            pos_ref = read.pos

            # TODO: include indels as 'mutations'
            # TODO: include CIGAR trimming (we should really filter them out!)
            for (block_type, block_len), is_good in izip(read.cigar, good_cigar):
                # if read.is_read2:
                #    print pos_read, pos_ref
                # Match
                if block_type == 0:
                    if is_good:
Example #2
0
        # If the reads are mapped to different fragments, that's mismapping
        if read1.tid != read2.tid:
            raise ValueError('Read pair ' + str(i_pairs) +
                             ': not filtered properly')

        # Find out on what chromosome the read has been mapped
        fragment = read1.tid
        ref = refs[fragment]

        # Make a list of mutations for the read_pair
        muts = []
        for read in reads:

            seq = read.seq
            good_cigar = get_ind_good_cigars(read.cigar,
                                             match_len_min=match_len_min)

            # The following two indices indicate the block position in the read
            # and in the reference sequence. Because of indels, they are updated
            # separately
            pos_read = 0
            pos_ref = read.pos

            # TODO: include indels as 'mutations'
            # TODO: include CIGAR trimming (we should really filter them out!)
            for (block_type,
                 block_len), is_good in izip(read.cigar, good_cigar):
                #if read.is_read2:
                #    print pos_read, pos_ref
                # Match
                if block_type == 0:
Example #3
0
def trim_bad_cigar(reads,
                   match_len_min=match_len_min,
                   trim_left=trim_bad_cigars,
                   trim_right=trim_bad_cigars,
                   cons=None):
    '''Trim away short CIGARs from both edges, they are likely mapping artifacts'''

    for read in reads:
        # Get good CIGARs
        (good_cigars, first_good_cigar, last_good_cigar) = \
                get_ind_good_cigars(read.cigar,
                                    match_len_min=match_len_min,
                                    full_output=True)

        # If no good CIGARs, give up
        if not good_cigars.any():
            return True

        else:

            # Get the good CIGARs coordinates
            ((start_read, end_read),
             (start_ref, end_ref)) = \
                    get_range_good_cigars(read.cigar, read.pos,
                                          match_len_min=match_len_min,
                                          trim_left=trim_left,
                                          trim_right=trim_right)

            # Trim CIGAR because of bad CIGARs at the edges
            cigar = read.cigar[first_good_cigar:last_good_cigar + 1]
            # Trim cigar block lengths
            if first_good_cigar != 0:
                cigar[0] = (cigar[0][0], cigar[0][1] - trim_left)
            if last_good_cigar != len(read.cigar) - 1:
                cigar[-1] = (cigar[-1][0], cigar[-1][1] - trim_right)

            # Reset attributes
            seq = read.seq
            qual = read.qual
            read.seq = seq[start_read:end_read]
            read.qual = qual[start_read:end_read]
            read.pos = start_ref
            read.cigar = cigar

    # Mate position
    reads[0].mpos = reads[1].pos
    reads[1].mpos = reads[0].pos

    # Insert size
    i_fwd = reads[0].is_reverse
    i_rev = not i_fwd
    isize = reads[i_rev].pos + sum(bl for (bt, bl) in reads[i_rev].cigar
                                   if bt in (0, 2)) - reads[i_fwd].pos

    # Trash pair if the insert size is negative (complete cross-overhang)
    #               ----->
    #   <------
    if isize <= 0:
        return True

    reads[i_fwd].isize = isize
    reads[i_rev].isize = -isize

    return False
def trim_bad_cigar(reads,
                   match_len_min=match_len_min,
                   trim_left=trim_bad_cigars,
                   trim_right=trim_bad_cigars,
                   cons=None):
    '''Trim away short CIGARs from both edges, they are likely mapping artifacts'''

    for read in reads:
        # Get good CIGARs
        (good_cigars, first_good_cigar, last_good_cigar) = \
                get_ind_good_cigars(read.cigar,
                                    match_len_min=match_len_min,
                                    full_output=True)

        # If no good CIGARs, give up
        if not good_cigars.any():
            return True

        else:

            # Get the good CIGARs coordinates
            ((start_read, end_read),
             (start_ref, end_ref)) = \
                    get_range_good_cigars(read.cigar, read.pos,
                                          match_len_min=match_len_min,
                                          trim_left=trim_left,
                                          trim_right=trim_right)

            # Trim CIGAR because of bad CIGARs at the edges
            cigar = read.cigar[first_good_cigar: last_good_cigar + 1]
            # Trim cigar block lengths
            if first_good_cigar != 0:
                cigar[0] = (cigar[0][0],
                            cigar[0][1] - trim_left)
            if last_good_cigar != len(read.cigar) - 1:
                cigar[-1] = (cigar[-1][0],
                             cigar[-1][1] - trim_right)

            # Reset attributes
            seq = read.seq
            qual = read.qual
            read.seq = seq[start_read: end_read]
            read.qual = qual[start_read: end_read]
            read.pos = start_ref
            read.cigar = cigar    

    # Mate position
    reads[0].mpos = reads[1].pos
    reads[1].mpos = reads[0].pos

    # Insert size
    i_fwd = reads[0].is_reverse
    i_rev = not i_fwd
    isize = reads[i_rev].pos + sum(bl for (bt, bl) in reads[i_rev].cigar
                                   if bt in (0, 2)) - reads[i_fwd].pos

    # Trash pair if the insert size is negative (complete cross-overhang)
    #               ----->
    #   <------
    if isize <= 0:
        return True

    reads[i_fwd].isize = isize
    reads[i_rev].isize = -isize

    return False