예제 #1
0
파일: target.py 프로젝트: LucksLab/spats
 def longest_target_self_matches(self, minimum_length = None):
     min_len = self._minimum_length
     matches = {}
     for target in self.targets:
         seq = target.seq
         seq_len = target.n
         index = 0
         candidate = (None, None, None)
         while True:
             query = seq[index:index+min_len]
             match_target, match_index = self.find_exact(query, exclude = (target, index))
             if match_target == target:
                 assert(match_index != index)
                 left, right = longest_match(seq, (index, min_len), match_target.seq, (match_index, min_len))
                 total_len = min_len + left + right
                 if not candidate[1] or total_len > candidate[1]:
                     candidate = (index, total_len, match_index)
                     #print("C {} {} matches {}".format(target.name, candidate, match_target.name))
             index += 1
             if index >= seq_len - max(min_len, candidate[1]):
                 break
         # note that we can't say there's no self-match below min_len,
         # so if we didn't find one, just return that
         matches[target.name] = candidate[1] or min_len
     return matches
예제 #2
0
파일: target.py 프로젝트: LucksLab/spats
 def find_partial_prefix(self, query):
     min_len = self._minimum_length
     word_len = self._index_word_length
     query_len = len(query)
     candidate = [None, None, None, None]
     if query_len < word_len:
         return candidate
     site_key = query[:word_len]
     #print("  fpp query: " + query + " --> " + site_key)
     for target, index in self._index.get(site_key, []):
         left, right = longest_match(query, (0, word_len), target.seq, (index, word_len))
         total_len = left + right + word_len
         #print("    extends: <--{}, -->{} / {} ({})".format(left, right, total_len, min_len))
         if total_len >= min_len:
             if not candidate[2] or total_len > candidate[2]:
                 # keep it if it's the best match so far
                 candidate = [target, 0 - left, total_len, index - left]
                 #print("C: {}".format(candidate))
             elif total_len == candidate[2] and target != candidate[0]:
                 # need to keep track if multiple candidates have this same max length
                 if isinstance(candidate[0], list):
                     candidate[0] = candidate[0] if target in candidate[0] else [ target ] + candidate[0]
                 else:
                     candidate[0] = [ target, candidate[0] ]
     return candidate
예제 #3
0
파일: target.py 프로젝트: LucksLab/spats
 def _find_partial(self, query, force_target, min_length_override, multiple):
     min_len = min_length_override or self._minimum_length * multiple
     word_len = self._index_word_length
     check_every = max(min_len - word_len, 1) # norah has proved that this guarantees finding a match if it exists
     query_len = len(query)
     last = query_len - max(check_every, word_len)
     check_sites = range(0, last, check_every)
     check_sites.append(last)
     candidate = [None, None, None, None]
     # NOTE: it's important to check all sites, and all hits -- to find the longest match.
     for site in check_sites:
         site_key = query[site:site+word_len]
         #print("CS: {}, {}".format(site, site_key))
         for target, index in self._index.get(site_key, []):
             if force_target and target != force_target:
                 continue
             #print("GOT: " + str(index) + " -- " + target.name)
             left, right = longest_match(query, (site, word_len), target.seq, (index, word_len))
             total_len = left + right + word_len
             #print("extends: <--{}, -->{} / {} ({})".format(left, right, total_len, min_len))
             if total_len >= min_len:
                 if not candidate[2] or total_len > candidate[2]:
                     # keep it if it's the best match so far
                     candidate = [target, site - left, total_len, index - left]
                     #print("C: {}".format(candidate))
                 elif total_len == candidate[2] and target != candidate[0]:
                     # need to keep track if multiple candidates have this same max length
                     if isinstance(candidate[0], list):
                         candidate[0] = candidate[0] if target in candidate[0] else [ target ] + candidate[0]
                     else:
                         candidate[0] = [ target, candidate[0] ]
     return candidate
예제 #4
0
 def longest_target_self_matches(self, minimum_length=None):
     min_len = self._minimum_length
     matches = {}
     for target in self.targets:
         seq = target.seq
         seq_len = target.n
         index = 0
         candidate = (None, None, None)
         while True:
             query = seq[index:index + min_len]
             match_target, match_index = self.find_exact(query,
                                                         exclude=(target,
                                                                  index))
             if match_target == target:
                 assert (match_index != index)
                 left, right = longest_match(seq, (index, min_len),
                                             match_target.seq,
                                             (match_index, min_len))
                 total_len = min_len + left + right
                 if not candidate[1] or total_len > candidate[1]:
                     candidate = (index, total_len, match_index)
                     #print("C {} {} matches {}".format(target.name, candidate, match_target.name))
             index += 1
             if index >= seq_len - max(min_len, candidate[1]):
                 break
         # note that we can't say there's no self-match below min_len,
         # so if we didn't find one, just return that
         matches[target.name] = candidate[1] or min_len
     return matches
예제 #5
0
 def find_partial_all(self, query, min_length=0):
     min_len = min_length or self._minimum_length
     word_len = self._index_word_length
     check_every = max(
         min_len - word_len, 1
     )  # norah has proved that this guarantees finding a match if it exists
     query_len = len(query)
     last = query_len - max(check_every, word_len)
     check_sites = range(0, last, check_every)
     check_sites.append(last)
     candidates = []
     # NOTE: it's important to check all sites, and all hits -- to find the longest match.
     for site in check_sites:
         site_key = query[site:site + word_len]
         #print("CS: {}, {}".format(site, site_key))
         for target, index in self._index.get(site_key, []):
             #print("GOT: " + str(index) + " -- " + target.name)
             left, right = longest_match(query, (site, word_len),
                                         target.seq, (index, word_len))
             total_len = left + right + word_len
             #print("extends: <--{}, -->{} / {} ({})".format(left, right, total_len, min_len))
             if total_len >= min_len:
                 candidate = [target, site - left, total_len, index - left]
                 if candidate not in candidates:
                     candidates.append(candidate)
     return candidates
예제 #6
0
 def find_partial_prefix(self, query):
     min_len = self._minimum_length
     word_len = self._index_word_length
     query_len = len(query)
     candidate = [None, None, None, None]
     if query_len < word_len:
         return candidate
     site_key = query[:word_len]
     #print("  fpp query: " + query + " --> " + site_key)
     for target, index in self._index.get(site_key, []):
         left, right = longest_match(query, (0, word_len), target.seq,
                                     (index, word_len))
         total_len = left + right + word_len
         #print("    extends: <--{}, -->{} / {} ({})".format(left, right, total_len, min_len))
         if total_len >= min_len:
             if not candidate[2] or total_len > candidate[2]:
                 # keep it if it's the best match so far
                 candidate = [target, 0 - left, total_len, index - left]
                 #print("C: {}".format(candidate))
             elif total_len == candidate[2] and target != candidate[0]:
                 # need to keep track if multiple candidates have this same max length
                 if isinstance(candidate[0], list):
                     candidate[0] = candidate[0] if target in candidate[
                         0] else [target] + candidate[0]
                 else:
                     candidate[0] = [target, candidate[0]]
     return candidate
예제 #7
0
 def _find_partial(self, query, force_target, min_length_override,
                   multiple):
     min_len = min_length_override or self._minimum_length * multiple
     word_len = self._index_word_length
     check_every = max(
         min_len - word_len, 1
     )  # norah has proved that this guarantees finding a match if it exists
     query_len = len(query)
     last = query_len - max(check_every, word_len)
     check_sites = range(0, last, check_every)
     check_sites.append(last)
     candidate = [None, None, None, None]
     # NOTE: it's important to check all sites, and all hits -- to find the longest match.
     for site in check_sites:
         site_key = query[site:site + word_len]
         #print("CS: {}, {}".format(site, site_key))
         for target, index in self._index.get(site_key, []):
             if force_target and target != force_target:
                 continue
             #print("GOT: " + str(index) + " -- " + target.name)
             left, right = longest_match(query, (site, word_len),
                                         target.seq, (index, word_len))
             total_len = left + right + word_len
             #print("extends: <--{}, -->{} / {} ({})".format(left, right, total_len, min_len))
             if total_len >= min_len:
                 if not candidate[2] or total_len > candidate[2]:
                     # keep it if it's the best match so far
                     candidate = [
                         target, site - left, total_len, index - left
                     ]
                     #print("C: {}".format(candidate))
                 elif total_len == candidate[2] and target != candidate[0]:
                     # need to keep track if multiple candidates have this same max length
                     if isinstance(candidate[0], list):
                         candidate[0] = candidate[0] if target in candidate[
                             0] else [target] + candidate[0]
                     else:
                         candidate[0] = [target, candidate[0]]
     return candidate
예제 #8
0
파일: target.py 프로젝트: LucksLab/spats
 def find_partial_all(self, query, min_length = 0):
     min_len = min_length or self._minimum_length
     word_len = self._index_word_length
     check_every = max(min_len - word_len, 1) # norah has proved that this guarantees finding a match if it exists
     query_len = len(query)
     last = query_len - max(check_every, word_len)
     check_sites = range(0, last, check_every)
     check_sites.append(last)
     candidates = []
     # NOTE: it's important to check all sites, and all hits -- to find the longest match.
     for site in check_sites:
         site_key = query[site:site+word_len]
         #print("CS: {}, {}".format(site, site_key))
         for target, index in self._index.get(site_key, []):
             #print("GOT: " + str(index) + " -- " + target.name)
             left, right = longest_match(query, (site, word_len), target.seq, (index, word_len))
             total_len = left + right + word_len
             #print("extends: <--{}, -->{} / {} ({})".format(left, right, total_len, min_len))
             if total_len >= min_len:
                 candidate = [target, site - left, total_len, index - left]
                 if candidate not in candidates:
                     candidates.append(candidate)
     return candidates