コード例 #1
0
ファイル: reading.py プロジェクト: intabeta/inta
    def terms_within(self, fieldname, text, maxdist, prefix=0):
        """Returns a generator of words in the given field within ``maxdist``
        Damerau-Levenshtein edit distance of the given text.
        
        Important: the terms are returned in **no particular order**. The only
        criterion is that they are within ``maxdist`` edits of ``text``. You
        may want to run this method multiple times with increasing ``maxdist``
        values to ensure you get the closest matches first. You may also have
        additional information (such as term frequency or an acoustic matching
        algorithm) you can use to rank terms with the same edit distance.
        
        :param maxdist: the maximum edit distance.
        :param prefix: require suggestions to share a prefix of this length
            with the given word. This is often justifiable since most
            misspellings do not involve the first letter of the word.
            Using a prefix dramatically decreases the time it takes to generate
            the list of words.
        :param seen: an optional set object. Words that appear in the set will
            not be yielded.
        """

        for word in self.expand_prefix(fieldname, text[:prefix]):
            k = distance(word, text, limit=maxdist)
            if k <= maxdist:
                yield word
コード例 #2
0
ファイル: reading.py プロジェクト: sudhir-12/spoken-website
    def terms_within(self, fieldname, text, maxdist, prefix=0):
        """
        Returns a generator of words in the given field within ``maxdist``
        Damerau-Levenshtein edit distance of the given text.

        Important: the terms are returned in **no particular order**. The only
        criterion is that they are within ``maxdist`` edits of ``text``. You
        may want to run this method multiple times with increasing ``maxdist``
        values to ensure you get the closest matches first. You may also have
        additional information (such as term frequency or an acoustic matching
        algorithm) you can use to rank terms with the same edit distance.

        :param maxdist: the maximum edit distance.
        :param prefix: require suggestions to share a prefix of this length
            with the given word. This is often justifiable since most
            misspellings do not involve the first letter of the word.
            Using a prefix dramatically decreases the time it takes to generate
            the list of words.
        :param seen: an optional set object. Words that appear in the set will
            not be yielded.
        """

        fieldobj = self.schema[fieldname]
        for btext in self.expand_prefix(fieldname, text[:prefix]):
            word = fieldobj.from_bytes(btext)
            k = distance(word, text, limit=maxdist)
            if k <= maxdist:
                yield word
コード例 #3
0
ファイル: reading.py プロジェクト: NimbleGiraffe/tits
    def terms_within(self, fieldname, text, maxdist, prefix=0, seen=None):
        """Returns a generator of words in the given field within ``maxdist``
        Damerau-Levenshtein edit distance of the given text.
        
        :param maxdist: the maximum edit distance.
        :param prefix: require suggestions to share a prefix of this length
            with the given word. This is often justifiable since most
            misspellings do not involve the first letter of the word.
            Using a prefix dramatically decreases the time it takes to generate
            the list of words.
        :param seen: an optional set object. Words that appear in the set will
            not be yielded.
        """

        if self.has_word_graph(fieldname):
            node = self.word_graph(fieldname)
            for word in within(node, text, maxdist, prefix=prefix, seen=seen):
                yield word
        else:
            if seen is None:
                seen = set()
            for word in self.expand_prefix(fieldname, text[:prefix]):
                if word in seen:
                    continue
                if (word == text
                    or distance(word, text, limit=maxdist) <= maxdist):
                    yield word
                    seen.add(word)
コード例 #4
0
    def terms_within(self, fieldname, text, maxdist, prefix=0, seen=None):
        """Returns a generator of words in the given field within ``maxdist``
        Damerau-Levenshtein edit distance of the given text.
        
        :param maxdist: the maximum edit distance.
        :param prefix: require suggestions to share a prefix of this length
            with the given word. This is often justifiable since most
            misspellings do not involve the first letter of the word.
            Using a prefix dramatically decreases the time it takes to generate
            the list of words.
        :param seen: an optional set object. Words that appear in the set will
            not be yielded.
        """

        if self.has_word_graph(fieldname):
            node = self.word_graph(fieldname)
            for word in within(node, text, maxdist, prefix=prefix, seen=seen):
                yield word
        else:
            if seen is None:
                seen = set()
            for word in self.expand_prefix(fieldname, text[:prefix]):
                if word in seen:
                    continue
                k = distance(word, text, limit=maxdist)
                if k <= maxdist:
                    yield word
                    seen.add(word)
コード例 #5
0
 def keyfn(a):
     return distance(text, a[0])
コード例 #6
0
 def keyfn(a):
     return 0 - (1/distance(text, a[0])) * a[1]
コード例 #7
0
ファイル: spelling.py プロジェクト: ladiaria/utopia-cms
 def keyfn(a):
     return distance(text, a[0])
コード例 #8
0
ファイル: spelling.py プロジェクト: ladiaria/utopia-cms
 def keyfn(a):
     return 0 - (1 / distance(text, a[0])) * a[1]