def __call__(self, read1, read2): len1 = len(read1.sequence) len2 = len(read2.sequence) min_overlap = self.min_overlap if min_overlap <= 1: min_overlap = max(2, round(self.min_overlap * min(len1, len2))) if len1 < min_overlap or len2 < min_overlap: return (read1, read2) insert_matched = read1.insert_overlap and read2.insert_overlap if insert_matched: # If we've already determined that there is an insert overlap # with a 3' overhang, we can constrain our alignment aflags = START_WITHIN_SEQ1 | STOP_WITHIN_SEQ2 else: aflags = SEMIGLOBAL # align read1 to read2 reverse-complement to be compatible with # InsertAligner read2_rc = reverse_complement(read2.sequence) aligner = Aligner(read2_rc, self.error_rate, aflags) alignment = aligner.locate(read1.sequence) if alignment: r2_start, r2_stop, r1_start, r1_stop, matches, errors = alignment if matches >= min_overlap: # Only correct errors if we haven't already done correction in # the InsertAligner if (self.mismatch_action and errors > 0 and not insert_matched and read1.corrected == 0 and read2.corrected == 0): self.correct_errors(read1, read2, alignment) if r2_start == 0 and r2_stop == len2: # r2 is fully contained in r1 pass elif r1_start == 0 and r1_stop == len1: # r1 is fully contained in r2 read1.sequence = read2_rc read1.qualities = "".join(reversed(read2.qualities)) elif r1_start > 0: read1.sequence += read2_rc[r2_stop:] if read1.qualities and read2.qualities: read1.qualities += "".join(reversed( read2.qualities))[r2_stop:] elif r2_start > 0: read1.sequence = read2_rc + read1.sequence[r1_stop:] if read1.qualities and read2.qualities: read1.qualities = ("".join(reversed(read2.qualities)) + read1.qualities[r1_stop:]) else: raise AtroposError( "Invalid alignment while trying to merge read " "{}: {}".format(read1.name, ",".join(str(i) for i in alignment))) read1.merged = True read2 = None return (read1, read2)
def __call__(self, read1, read2): len1 = len(read1.sequence) len2 = len(read2.sequence) min_overlap = self.min_overlap if min_overlap <= 1: min_overlap = max(2, round(self.min_overlap * min(len1, len2))) if len1 < min_overlap or len2 < min_overlap: return (read1, read2) insert_matched = read1.insert_overlap and read2.insert_overlap if insert_matched: # If we've already determined that there is an insert overlap # with a 3' overhang, we can constrain our alignment aflags = START_WITHIN_SEQ1 | STOP_WITHIN_SEQ2 else: aflags = SEMIGLOBAL # align read1 to read2 reverse-complement to be compatible with # InsertAligner read2_rc = reverse_complement(read2.sequence) aligner = Aligner(read2_rc, self.error_rate, aflags) alignment = aligner.locate(read1.sequence) if alignment: r2_start, r2_stop, r1_start, r1_stop, matches, errors = alignment if matches >= min_overlap: # Only correct errors if we haven't already done correction in # the InsertAligner if self.mismatch_action and errors > 0 and not insert_matched: self.correct_errors(read1, read2, alignment) if r2_start == 0 and r2_stop == len2: # r2 is fully contained in r1 pass elif r1_start == 0 and r1_stop == len1: # r1 is fully contained in r2 read1.sequence = read2_rc read1.qualities = "".join(reversed(read2.qualities)) elif r1_start > 0: read1.sequence += read2_rc[r2_stop:] if read1.qualities and read2.qualities: read1.qualities += "".join( reversed(read2.qualities))[r2_stop:] elif r2_start > 0: read1.sequence = read2_rc + read1.sequence[r1_stop:] if read1.qualities and read2.qualities: read1.qualities = ( "".join(reversed(read2.qualities)) + read1.qualities[r1_stop:]) else: raise AtroposError( "Invalid alignment while trying to merge read " "{}: {}".format( read1.name, ",".join(str(i) for i in alignment))) read1.merged = True read2 = None return (read1, read2)
def align(seq1, seq2, min_overlap_frac=0.9): aligner = Aligner(seq1, 0.0, SEMIGLOBAL, False, False) aligner.min_overlap = math.ceil(min(len(seq1), len(seq2)) * min_overlap_frac) aligner.indel_cost = 100000 match = aligner.locate(seq2) if match: return seq1[match[0] : match[1]] else: return None
def align(seq1, seq2, min_overlap_frac=0.9): aligner = Aligner(seq1, 0.0, SEMIGLOBAL, False, False) aligner.min_overlap = math.ceil( min(len(seq1), len(seq2)) * min_overlap_frac) aligner.indel_cost = 100000 match = aligner.locate(seq2) if match: return seq1[match[0]:match[1]] else: return None
def align(seq1, seq2, min_overlap_frac=0.9): """Align two sequences. Args: seq1: The second sequence to align. seq2: The first sequence to align. min_overlap_frac: Minimum fraction of overlapping bases required for a match. Returns: The matching portion of the sequence. """ aligner = Aligner(seq1, 0.0, SEMIGLOBAL, False, False) aligner.min_overlap = math.ceil( min(len(seq1), len(seq2)) * min_overlap_frac) aligner.indel_cost = 100000 match = aligner.locate(seq2) if match: return seq1[match[0]:match[1]] else: return None
def align(seq1, seq2, min_overlap_frac=0.9): """Align two sequences. Args: seq1, seq2: The sequences to align. min_overlap_frac: Minimum fraction of overlapping bases required for a match. Returns: The matching portion of the sequence. """ aligner = Aligner( seq1, 0.0, SEMIGLOBAL, False, False) aligner.min_overlap = math.ceil( min(len(seq1), len(seq2)) * min_overlap_frac) aligner.indel_cost = 100000 match = aligner.locate(seq2) if match: return seq1[match[0]:match[1]] else: return None
def test_100_percent_error_rate(self): reference = 'GCTTAGACATATC' aligner = Aligner(reference, 1.0, flags=BACK) aligner.locate('CAA')
def test(self): reference = 'CTCCAGCTTAGACATATC' aligner = Aligner(reference, 0.1, flags=BACK) aligner.locate('CC')