コード例 #1
0
        def _match(insert_match, offset, insert_match_size, prob):
            if offset < self.min_adapter_overlap:
                # The reads are mostly overlapping, to the point where
                # there's not enough overhang to do a confident adapter
                # match. We return just the insert match to signal that
                # error correction can be done even though no adapter
                # trimming is required.
                return (insert_match, None, None)
            
            # TODO: this is very sensitive to the exact correct choice of adapter.
            # For example, if you specifiy GATCGGAA... and the correct adapter is
            # AGATCGGAA..., the prefixes will not match exactly and the alignment
            # will fail. We need to use a comparison that is a bit more forgiving.
            
            a1_match = compare_prefixes(seq1[insert_match_size:], self.adapter1)
            a2_match = compare_prefixes(seq2[insert_match_size:], self.adapter2)
            adapter_len = min(offset, self.adapter1_len, self.adapter2_len)
            max_adapter_mismatches = round(adapter_len * self.max_adapter_mismatch_frac)
            if a1_match[5] > max_adapter_mismatches and a2_match[5] > max_adapter_mismatches:
                return None
            
            a1_prob = self.match_probability(a1_match[4], adapter_len)
            a2_prob = self.match_probability(a2_match[4], adapter_len)
            if (adapter_len > self.adapter_check_cutoff) and ((a1_prob * a2_prob) > self.adapter_max_rmp):
                return None

            adapter_len1 = min(self.adapter1_len, l1 - insert_match_size)
            adapter_len2 = min(self.adapter2_len, l2 - insert_match_size)
            best_adapter_matches, best_adapter_mismatches = (a1_match if a1_prob < a2_prob else a2_match)[4:6]
            
            return (
                insert_match,
                Match(0, adapter_len1, insert_match_size, l1, best_adapter_matches, best_adapter_mismatches),
                Match(0, adapter_len2, insert_match_size, l2, best_adapter_matches, best_adapter_mismatches)
            )
コード例 #2
0
def compare_suffixes(s1, s2, wildcard_ref=False, wildcard_query=False):
    """
    Find out whether one string is the suffix of the other one, allowing
    mismatches. Used to find an anchored 3' adapter when no indels are allowed.
    """
    s1 = s1[::-1]
    s2 = s2[::-1]
    _, length, _, _, matches, errors = compare_prefixes(s1, s2, wildcard_ref, wildcard_query)
    return (len(s1) - length, len(s1), len(s2) - length, len(s2), matches, errors)
コード例 #3
0
ファイル: align.py プロジェクト: llllaaaa/atropos
def compare_suffixes(s1, s2, wildcard_ref=False, wildcard_query=False):
    """
    Find out whether one string is the suffix of the other one, allowing
    mismatches. Used to find an anchored 3' adapter when no indels are allowed.
    """
    s1 = s1[::-1]
    s2 = s2[::-1]
    _, length, _, _, matches, errors = compare_prefixes(
        s1, s2, wildcard_ref, wildcard_query)
    return (len(s1) - length, len(s1), len(s2) - length, len(s2), matches,
            errors)
コード例 #4
0
ファイル: align.py プロジェクト: llllaaaa/atropos
        def _match(insert_match, offset, insert_match_size, prob):
            if offset < self.min_adapter_overlap:
                # The reads are mostly overlapping, to the point where
                # there's not enough overhang to do a confident adapter
                # match. We return just the insert match to signal that
                # error correction can be done even though no adapter
                # trimming is required.
                return (insert_match, None, None)

            # TODO: this is very sensitive to the exact correct choice of adapter.
            # For example, if you specifiy GATCGGAA... and the correct adapter is
            # AGATCGGAA..., the prefixes will not match exactly and the alignment
            # will fail. We need to use a comparison that is a bit more forgiving.

            a1_match = compare_prefixes(seq1[insert_match_size:],
                                        self.adapter1)
            a2_match = compare_prefixes(seq2[insert_match_size:],
                                        self.adapter2)
            adapter_len = min(offset, self.adapter1_len, self.adapter2_len)
            max_adapter_mismatches = round(adapter_len *
                                           self.max_adapter_mismatch_frac)
            if a1_match[5] > max_adapter_mismatches and a2_match[
                    5] > max_adapter_mismatches:
                return None

            a1_prob = self.match_probability(a1_match[4], adapter_len)
            a2_prob = self.match_probability(a2_match[4], adapter_len)
            if (adapter_len > self.adapter_check_cutoff) and (
                (a1_prob * a2_prob) > self.adapter_max_rmp):
                return None

            adapter_len1 = min(self.adapter1_len, l1 - insert_match_size)
            adapter_len2 = min(self.adapter2_len, l2 - insert_match_size)
            best_adapter_matches, best_adapter_mismatches = (
                a1_match if a1_prob < a2_prob else a2_match)[4:6]

            return (insert_match,
                    Match(0, adapter_len1, insert_match_size, l1,
                          best_adapter_matches, best_adapter_mismatches),
                    Match(0, adapter_len2, insert_match_size, l2,
                          best_adapter_matches, best_adapter_mismatches))