def flatten_strings(self): return (utf8decode(k)[0] for k in self.flatten())
def prefix_string(self): """Returns the labels of the path from the root to the current arc as a decoded unicode string. """ return utf8decode(self.prefix_bytes())[0]
def peek_key_string(self): """Returns the next closest key in the graph as a decoded unicode string. """ return utf8decode(self.peek_key_bytes())[0]
def within(graph, text, k=1, prefix=0, address=None): """Yields a series of keys in the given graph within ``k`` edit distance of ``text``. If ``prefix`` is greater than 0, all keys must match the first ``prefix`` characters of ``text``. """ text = to_labels(text) if address is None: address = graph._root sofar = emptybytes accept = False if prefix: prefixchars = text[:prefix] arc = graph.find_path(prefixchars, address=address) if arc is None: return sofar = emptybytes.join(prefixchars) address = arc.target accept = arc.accept stack = [(address, k, prefix, sofar, accept)] seen = set() while stack: state = stack.pop() # Have we already tried this state? if state in seen: continue seen.add(state) address, k, i, sofar, accept = state # If we're at the end of the text (or deleting enough chars would get # us to the end and still within K), and we're in the accept state, # yield the current result if (len(text) - i <= k) and accept: yield utf8decode(sofar)[0] # If we're in the stop state, give up if address is None: continue # Exact match if i < len(text): arc = graph.find_arc(address, text[i]) if arc: stack.append((arc.target, k, i + 1, sofar + text[i], arc.accept)) # If K is already 0, can't do any more edits if k < 1: continue k -= 1 arcs = graph.arc_dict(address) # Insertions stack.extend((arc.target, k, i, sofar + char, arc.accept) for char, arc in iteritems(arcs)) # Deletion, replacement, and transpo only work before the end if i >= len(text): continue char = text[i] # Deletion stack.append((address, k, i + 1, sofar, False)) # Replacement for char2, arc in iteritems(arcs): if char2 != char: stack.append((arc.target, k, i + 1, sofar + char2, arc.accept)) # Transposition if i < len(text) - 1: char2 = text[i + 1] if char != char2 and char2 in arcs: # Find arc from next char to this char target = arcs[char2].target if target: arc = graph.find_arc(target, char) if arc: stack.append((arc.target, k, i + 2, sofar + char2 + char, arc.accept))
def from_bytes(self, bs): return utf8decode(bs)[0]