def search(self, pattern, sequence, max_subs, max_ins, max_dels,
            max_l_dist=None):
     search_params = LevenshteinSearchParams(max_subs, max_ins,
                                             max_dels, max_l_dist)
     return list(
         fnm_generic_lp(pattern, sequence, search_params)
     )
Esempio n. 2
0
 def search(self, subsequence, sequence, max_subs):
     if max_subs >= len(subsequence):
         self.skipTest("avoiding calling fnm_generic_ngrams() " +
                       "with max_subs >= len(subsequence)")
     search_params = LevenshteinSearchParams(max_subs, 0, 0, max_subs)
     return consolidate_overlapping_matches(
         fnm_generic_ngrams(subsequence, sequence, search_params))
 def search(self, subsequence, sequence, max_subs):
     return [
         get_best_match_in_group(group) for group in group_matches(
             c_fnm_generic_ngrams(
                 subsequence, sequence,
                 LevenshteinSearchParams(max_subs, 0, 0, max_subs)))
     ]
 def search(self, pattern, sequence, max_subs, max_ins, max_dels,
            max_l_dist=None):
     search_params = LevenshteinSearchParams(max_subs, max_ins,
                                             max_dels, max_l_dist)
     return consolidate_overlapping_matches(
         find_near_matches_generic(pattern, sequence, search_params)
     )
Esempio n. 5
0
 def search(self, subsequence, sequence, max_l_dist):
     return [
         get_best_match_in_group(group) for group in group_matches(
             fnm_generic_lp(
                 subsequence, sequence,
                 LevenshteinSearchParams(max_l_dist, max_l_dist, max_l_dist,
                                         max_l_dist)))
     ]
Esempio n. 6
0
 def search(self, subsequence, sequence, max_subs):
     if max_subs >= len(subsequence):
         self.skipTest("avoiding calling c_fnm_generic_ngrams() " +
                       "with max_subs >= len(subsequence)")
     return [
         get_best_match_in_group(group) for group in group_matches(
             c_fnm_generic_ngrams(
                 subsequence, sequence,
                 LevenshteinSearchParams(max_subs, 0, 0, max_subs)))
     ]
Esempio n. 7
0
 def search(self,
            pattern,
            sequence,
            max_subs,
            max_ins,
            max_dels,
            max_l_dist=None):
     return hnm_generic_ngrams(
         pattern, sequence,
         LevenshteinSearchParams(max_subs, max_ins, max_dels, max_l_dist))
Esempio n. 8
0
 def search(self,
            pattern,
            sequence,
            max_subs,
            max_ins,
            max_dels,
            max_l_dist=None):
     return list(
         find_near_matches_generic(
             pattern, sequence,
             LevenshteinSearchParams(max_subs, max_ins, max_dels,
                                     max_l_dist)))
 def search(self,
            pattern,
            sequence,
            max_subs,
            max_ins,
            max_dels,
            max_l_dist=None):
     return list(
         c_fnm_generic_lp(
             pattern, sequence,
             LevenshteinSearchParams(
                 max_subs,
                 max_ins,
                 max_dels,
                 max_l_dist,
             )))
 def search(self,
            pattern,
            sequence,
            max_subs,
            max_ins,
            max_dels,
            max_l_dist=None):
     return [
         get_best_match_in_group(group) for group in group_matches(
             c_fnm_generic_ngrams(
                 pattern, sequence,
                 LevenshteinSearchParams(
                     max_subs,
                     max_ins,
                     max_dels,
                     max_l_dist,
                 )))
     ]
Esempio n. 11
0
def find_near_matches(subsequence,
                      sequence,
                      max_substitutions=None,
                      max_insertions=None,
                      max_deletions=None,
                      max_l_dist=None):
    """search for near-matches of subsequence in sequence

    This searches for near-matches, where the nearly-matching parts of the
    sequence must meet the following limitations (relative to the subsequence):

    * the maximum allowed number of character substitutions
    * the maximum allowed number of new characters inserted
    * and the maximum allowed number of character deletions
    * the total number of substitutions, insertions and deletions
      (a.k.a. the Levenshtein distance)
    """
    search_params = LevenshteinSearchParams(max_substitutions, max_insertions,
                                            max_deletions, max_l_dist)
    search_func = choose_search_func(search_params)
    return search_func(subsequence, sequence, search_params)
Esempio n. 12
0
def find_near_matches_in_file(subsequence,
                              sequence_file,
                              max_substitutions=None,
                              max_insertions=None,
                              max_deletions=None,
                              max_l_dist=None,
                              _chunk_size=2**20):
    """search for near-matches of subsequence in a file

    This searches for near-matches, where the nearly-matching parts of the
    sequence must meet the following limitations (relative to the subsequence):

    * the maximum allowed number of character substitutions
    * the maximum allowed number of new characters inserted
    * and the maximum allowed number of character deletions
    * the total number of substitutions, insertions and deletions
      (a.k.a. the Levenshtein distance)
    """
    search_params = LevenshteinSearchParams(max_substitutions, max_insertions,
                                            max_deletions, max_l_dist)
    search_class = choose_search_class(search_params)

    if ('b' in getattr(sequence_file, 'mode', '')
            or isinstance(sequence_file, io.RawIOBase)):
        matches = _search_binary_file(subsequence,
                                      sequence_file,
                                      search_params,
                                      search_class,
                                      _chunk_size=_chunk_size)
    else:
        matches = _search_unicode_file(subsequence,
                                       sequence_file,
                                       search_params,
                                       search_class,
                                       _chunk_size=_chunk_size)

    return search_class.consolidate_matches(matches)
Esempio n. 13
0
 def search(self, subsequence, sequence, max_subs):
     return hnm_generic_ngrams(
         subsequence, sequence,
         LevenshteinSearchParams(max_subs, 0, 0, max_subs))
Esempio n. 14
0
 def search(self, subsequence, sequence, max_subs):
     search_params = LevenshteinSearchParams(max_subs, 0, 0, max_subs)
     return list(fnm_generic_lp(subsequence, sequence, search_params))
Esempio n. 15
0
 def search(self, subsequence, sequence, max_l_dist):
     search_params = LevenshteinSearchParams(max_l_dist, max_l_dist,
                                             max_l_dist, max_l_dist)
     return consolidate_overlapping_matches(
         fnm_generic_lp(subsequence, sequence, search_params))
Esempio n. 16
0
 def search(self, subsequence, sequence, max_l_dist):
     return fnm_generic_ngrams(
         subsequence, sequence,
         LevenshteinSearchParams(max_l_dist, max_l_dist, max_l_dist,
                                 max_l_dist))
def fnm_nodels_ngrams(sequence, subsequence, max_substitutions, max_insertions, max_l_dist=None):
    return find_near_matches_no_deletions_ngrams(
        sequence, subsequence, LevenshteinSearchParams(
            max_substitutions, max_insertions, 0, max_l_dist,
        )
    )
 def search(self, subsequence, sequence, max_subs):
     return list(
         c_fnm_generic_lp(
             subsequence, sequence,
             LevenshteinSearchParams(max_subs, 0, 0, max_subs)))