Beispiel #1
0
    def __call__(self, read1, read2):
        len1 = len(read1.sequence)
        len2 = len(read2.sequence)
        min_overlap = self.min_overlap
        if min_overlap <= 1:
            min_overlap = max(2, round(self.min_overlap * min(len1, len2)))

        if len1 < min_overlap or len2 < min_overlap:
            return (read1, read2)

        insert_matched = read1.insert_overlap and read2.insert_overlap

        if insert_matched:
            # If we've already determined that there is an insert overlap
            # with a 3' overhang, we can constrain our alignment
            aflags = START_WITHIN_SEQ1 | STOP_WITHIN_SEQ2
        else:
            aflags = SEMIGLOBAL
        # align read1 to read2 reverse-complement to be compatible with
        # InsertAligner
        read2_rc = reverse_complement(read2.sequence)
        aligner = Aligner(read2_rc, self.error_rate, aflags)
        alignment = aligner.locate(read1.sequence)

        if alignment:
            r2_start, r2_stop, r1_start, r1_stop, matches, errors = alignment
            if matches >= min_overlap:
                # Only correct errors if we haven't already done correction in
                # the InsertAligner
                if (self.mismatch_action and errors > 0 and not insert_matched
                        and read1.corrected == 0 and read2.corrected == 0):
                    self.correct_errors(read1, read2, alignment)

                if r2_start == 0 and r2_stop == len2:
                    # r2 is fully contained in r1
                    pass
                elif r1_start == 0 and r1_stop == len1:
                    # r1 is fully contained in r2
                    read1.sequence = read2_rc
                    read1.qualities = "".join(reversed(read2.qualities))
                elif r1_start > 0:
                    read1.sequence += read2_rc[r2_stop:]
                    if read1.qualities and read2.qualities:
                        read1.qualities += "".join(reversed(
                            read2.qualities))[r2_stop:]
                elif r2_start > 0:
                    read1.sequence = read2_rc + read1.sequence[r1_stop:]
                    if read1.qualities and read2.qualities:
                        read1.qualities = ("".join(reversed(read2.qualities)) +
                                           read1.qualities[r1_stop:])
                else:
                    raise AtroposError(
                        "Invalid alignment while trying to merge read "
                        "{}: {}".format(read1.name,
                                        ",".join(str(i) for i in alignment)))

                read1.merged = True
                read2 = None

        return (read1, read2)
Beispiel #2
0
def align(seq1, seq2, min_overlap_frac=0.9):
    aligner = Aligner(seq1, 0.0, SEMIGLOBAL, False, False)
    aligner.min_overlap = math.ceil(
        min(len(seq1), len(seq2)) * min_overlap_frac)
    aligner.indel_cost = 100000
    match = aligner.locate(seq2)
    if match:
        return seq1[match[0]:match[1]]
    else:
        return None
Beispiel #3
0
def align(seq1, seq2, min_overlap_frac=0.9):
    """Align two sequences.

    Args:
        seq1: The second sequence to align.
        seq2: The first sequence to align.
        min_overlap_frac: Minimum fraction of overlapping bases required for a
            match.

    Returns:
        The matching portion of the sequence.
    """
    aligner = Aligner(seq1, 0.0, SEMIGLOBAL, False, False)
    aligner.min_overlap = math.ceil(
        min(len(seq1), len(seq2)) * min_overlap_frac)
    aligner.indel_cost = 100000
    match = aligner.locate(seq2)
    if match:
        return seq1[match[0]:match[1]]
    else:
        return None
Beispiel #4
0
 def test_100_percent_error_rate(self):
     reference = 'GCTTAGACATATC'
     aligner = Aligner(reference, 1.0, flags=BACK)
     aligner.locate('CAA')
Beispiel #5
0
 def test(self):
     reference = 'CTCCAGCTTAGACATATC'
     aligner = Aligner(reference, 0.1, flags=BACK)
     aligner.locate('CC')