Example #1
0
 def flatten_strings(self):
     return (utf8decode(k)[0] for k in self.flatten())
Example #2
0
    def prefix_string(self):
        """Returns the labels of the path from the root to the current arc as
        a decoded unicode string.
        """

        return utf8decode(self.prefix_bytes())[0]
Example #3
0
    def peek_key_string(self):
        """Returns the next closest key in the graph as a decoded unicode
        string.
        """

        return utf8decode(self.peek_key_bytes())[0]
Example #4
0
 def flatten_strings(self):
     return (utf8decode(k)[0] for k in self.flatten())
Example #5
0
def within(graph, text, k=1, prefix=0, address=None):
    """Yields a series of keys in the given graph within ``k`` edit distance of
    ``text``. If ``prefix`` is greater than 0, all keys must match the first
    ``prefix`` characters of ``text``.
    """

    text = to_labels(text)
    if address is None:
        address = graph._root

    sofar = emptybytes
    accept = False
    if prefix:
        prefixchars = text[:prefix]
        arc = graph.find_path(prefixchars, address=address)
        if arc is None:
            return
        sofar = emptybytes.join(prefixchars)
        address = arc.target
        accept = arc.accept

    stack = [(address, k, prefix, sofar, accept)]
    seen = set()
    while stack:
        state = stack.pop()
        # Have we already tried this state?
        if state in seen:
            continue
        seen.add(state)

        address, k, i, sofar, accept = state
        # If we're at the end of the text (or deleting enough chars would get
        # us to the end and still within K), and we're in the accept state,
        # yield the current result
        if (len(text) - i <= k) and accept:
            yield utf8decode(sofar)[0]

        # If we're in the stop state, give up
        if address is None:
            continue

        # Exact match
        if i < len(text):
            arc = graph.find_arc(address, text[i])
            if arc:
                stack.append((arc.target, k, i + 1, sofar + text[i],
                              arc.accept))
        # If K is already 0, can't do any more edits
        if k < 1:
            continue
        k -= 1

        arcs = graph.arc_dict(address)
        # Insertions
        stack.extend((arc.target, k, i, sofar + char, arc.accept)
                     for char, arc in iteritems(arcs))

        # Deletion, replacement, and transpo only work before the end
        if i >= len(text):
            continue
        char = text[i]

        # Deletion
        stack.append((address, k, i + 1, sofar, False))
        # Replacement
        for char2, arc in iteritems(arcs):
            if char2 != char:
                stack.append((arc.target, k, i + 1, sofar + char2, arc.accept))
        # Transposition
        if i < len(text) - 1:
            char2 = text[i + 1]
            if char != char2 and char2 in arcs:
                # Find arc from next char to this char
                target = arcs[char2].target
                if target:
                    arc = graph.find_arc(target, char)
                    if arc:
                        stack.append((arc.target, k, i + 2,
                                      sofar + char2 + char, arc.accept))
Example #6
0
    def peek_key_string(self):
        """Returns the next closest key in the graph as a decoded unicode
        string.
        """

        return utf8decode(self.peek_key_bytes())[0]
Example #7
0
    def prefix_string(self):
        """Returns the labels of the path from the root to the current arc as
        a decoded unicode string.
        """

        return utf8decode(self.prefix_bytes())[0]
Example #8
0
def within(graph, text, k=1, prefix=0, address=None):
    """Yields a series of keys in the given graph within ``k`` edit distance of
    ``text``. If ``prefix`` is greater than 0, all keys must match the first
    ``prefix`` characters of ``text``.
    """

    text = to_labels(text)
    if address is None:
        address = graph._root

    sofar = emptybytes
    accept = False
    if prefix:
        prefixchars = text[:prefix]
        arc = graph.find_path(prefixchars, address=address)
        if arc is None:
            return
        sofar = emptybytes.join(prefixchars)
        address = arc.target
        accept = arc.accept

    stack = [(address, k, prefix, sofar, accept)]
    seen = set()
    while stack:
        state = stack.pop()
        # Have we already tried this state?
        if state in seen:
            continue
        seen.add(state)

        address, k, i, sofar, accept = state
        # If we're at the end of the text (or deleting enough chars would get
        # us to the end and still within K), and we're in the accept state,
        # yield the current result
        if (len(text) - i <= k) and accept:
            yield utf8decode(sofar)[0]

        # If we're in the stop state, give up
        if address is None:
            continue

        # Exact match
        if i < len(text):
            arc = graph.find_arc(address, text[i])
            if arc:
                stack.append((arc.target, k, i + 1, sofar + text[i],
                              arc.accept))
        # If K is already 0, can't do any more edits
        if k < 1:
            continue
        k -= 1

        arcs = graph.arc_dict(address)
        # Insertions
        stack.extend((arc.target, k, i, sofar + char, arc.accept)
                     for char, arc in iteritems(arcs))

        # Deletion, replacement, and transpo only work before the end
        if i >= len(text):
            continue
        char = text[i]

        # Deletion
        stack.append((address, k, i + 1, sofar, False))
        # Replacement
        for char2, arc in iteritems(arcs):
            if char2 != char:
                stack.append((arc.target, k, i + 1, sofar + char2, arc.accept))
        # Transposition
        if i < len(text) - 1:
            char2 = text[i + 1]
            if char != char2 and char2 in arcs:
                # Find arc from next char to this char
                target = arcs[char2].target
                if target:
                    arc = graph.find_arc(target, char)
                    if arc:
                        stack.append((arc.target, k, i + 2,
                                      sofar + char2 + char, arc.accept))
Example #9
0
 def from_bytes(self, bs):
     return utf8decode(bs)[0]
Example #10
0
 def from_bytes(self, bs):
     return utf8decode(bs)[0]