Exemple #1
0
        def _match(insert_match, offset, insert_match_size,
                   prob):  # pylint disable=unused-argument
            if offset < self.min_adapter_overlap:
                # The reads are mostly overlapping, to the point where
                # there's not enough overhang to do a confident adapter
                # match. We return just the insert match to signal that
                # error correction can be done even though no adapter
                # trimming is required.
                return (insert_match, None, None)

            # TODO: this is very sensitive to the exact correct choice of
            # adapter. For example, if you specifiy GATCGGAA... and the correct
            # adapter is AGATCGGAA..., the prefixes will not match exactly and
            # the alignment will fail. We need to use a comparison that is a bit
            # more forgiving.

            a1_match = compare_prefixes(seq1[insert_match_size:],
                                        self.adapter1,
                                        wildcard_ref=self.adapter_wildcards,
                                        wildcard_query=self.read_wildcards)
            a2_match = compare_prefixes(seq2[insert_match_size:],
                                        self.adapter2,
                                        wildcard_ref=self.adapter_wildcards,
                                        wildcard_query=self.read_wildcards)
            adapter_len = min(offset, self.adapter1_len, self.adapter2_len)
            max_adapter_mismatches = round(adapter_len *
                                           self.max_adapter_mismatch_frac)
            if (a1_match[5] > max_adapter_mismatches
                    and a2_match[5] > max_adapter_mismatches):
                return None

            a1_prob = self.match_probability(a1_match[4], adapter_len)
            a2_prob = self.match_probability(a2_match[4], adapter_len)
            if ((adapter_len > self.adapter_check_cutoff)
                    and ((a1_prob * a2_prob) > self.adapter_max_rmp)):
                return None

            adapter_len1 = min(self.adapter1_len, len1 - insert_match_size)
            adapter_len2 = min(self.adapter2_len, len2 - insert_match_size)
            best_adapter_matches, best_adapter_mismatches = (
                a1_match if a1_prob < a2_prob else a2_match)[4:6]

            return (insert_match,
                    Match(0, adapter_len1, insert_match_size, len1,
                          best_adapter_matches, best_adapter_mismatches),
                    Match(0, adapter_len2, insert_match_size, len2,
                          best_adapter_matches, best_adapter_mismatches))
Exemple #2
0
 def _adapter_match(insert_seq, adapter_seq, adapter_len):
     amatch = compare_prefixes(insert_seq[_insert_match_size:],
                               adapter_seq,
                               wildcard_ref=self.adapter_wildcards,
                               wildcard_query=self.read_wildcards)
     alen = min(_offset, adapter_len)
     return amatch, alen, round(alen *
                                self.max_adapter_mismatch_frac)
Exemple #3
0
def compare_suffixes(suffix_ref,
                     suffix_query,
                     wildcard_ref=False,
                     wildcard_query=False):
    """Find out whether one string is the suffix of the other one, allowing
    mismatches. Used to find an anchored 3' adapter when no indels are allowed.
    
    Args:
        suffix_ref, suffix_query: The suffices to compare.
        wildcard_ref, wildcard_query: Whether wildcards are valid in either of
            the suffices.
    """
    suffix_ref = suffix_ref[::-1]
    suffix_query = suffix_query[::-1]
    _, length, _, _, matches, errors = compare_prefixes(
        suffix_ref, suffix_query, wildcard_ref, wildcard_query)
    return (len(suffix_ref) - length, len(suffix_ref),
            len(suffix_query) - length, len(suffix_query), matches, errors)