def test_compare_suffixes(): assert compare_suffixes('AAXAA', 'TTTTTTTAAAAA') == (0, 5, 7, 12, 4, 1) assert compare_suffixes('AANAA', 'TTTTTTTAACAA', wildcard_ref=True) == (0, 5, 7, 12, 5, 0) assert compare_suffixes('AANAA', 'TTTTTTTAACAA', wildcard_ref=True) == (0, 5, 7, 12, 5, 0) assert compare_suffixes('AAAAAX', 'TTTTTTTAAAAA') == (0, 6, 6, 12, 4, 2)
def match_to(self, read): """Attempt to match this adapter to the given read. Args: read: A :class:`Sequence` instance. Returns: A :class:`Match` instance if a match was found; return None if no match was found given the matching criteria (minimum overlap length, maximum error rate). """ read_seq = read.sequence.upper() # try to find an exact match first unless wildcards are allowed pos = -1 if not self.adapter_wildcards: if self.where == PREFIX: if read_seq.startswith(self.sequence): pos = 0 elif self.where == SUFFIX: if read_seq.endswith(self.sequence): pos = (len(read_seq) - len(self.sequence)) else: pos = read_seq.find(self.sequence) if pos >= 0: seqlen = len(self.sequence) return Match(0, seqlen, pos, pos + seqlen, seqlen, 0, self._front_flag, self, read) # try approximate matching if not self.indels and self.where in (PREFIX, SUFFIX): if self.where == PREFIX: alignment = align.compare_prefixes( self.sequence, read_seq, wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards) else: alignment = align.compare_suffixes( self.sequence, read_seq, wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards) else: alignment = self.aligner.locate(read_seq) if self.debug: print(self.aligner.dpmatrix) # pragma: no cover if alignment: astart, astop, rstart, rstop, matches, errors = alignment size = astop - astart if ((size >= self.min_overlap and errors / size <= self.max_error_rate) and (self.max_rmp is None or self.match_probability(matches, size) <= self.max_rmp)): return Match(astart, astop, rstart, rstop, matches, errors, self._front_flag, self, read) return None
def match_to(self, read): """ Attempt to match this adapter to the given read. Return an Match instance if a match was found; return None if no match was found given the matching criteria (minimum overlap length, maximum error rate). """ read_seq = read.sequence.upper() # try to find an exact match first unless wildcards are allowed pos = -1 if not self.adapter_wildcards: if self.where == PREFIX: pos = 0 if read_seq.startswith(self.sequence) else -1 elif self.where == SUFFIX: pos = (len(read_seq) - len(self.sequence)) if read_seq.endswith(self.sequence) else -1 else: pos = read_seq.find(self.sequence) if pos >= 0: l = len(self.sequence) return Match(0, l, pos, pos + l, l, 0, self._front_flag, self, read) # try approximate matching alignment = None if not self.indels and self.where in (PREFIX, SUFFIX): if self.where == PREFIX: alignment = align.compare_prefixes(self.sequence, read_seq, wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards) else: alignment = align.compare_suffixes(self.sequence, read_seq, wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards) else: alignment = self.aligner.locate(read_seq) if self.debug: print(self.aligner.dpmatrix) # pragma: no cover if alignment: astart, astop, rstart, rstop, matches, errors = alignment size = astop - astart if (size >= self.min_overlap and errors / size <= self.max_error_rate and ( self.match_probability is None or self.match_probability(matches, size) <= self.max_rmp)): return Match( astart, astop, rstart, rstop, matches, errors, self._front_flag, self, read) return None