def test_str_includes_node_finality(self): non_final = TokenSet() final = TokenSet() other_final = TokenSet() final.final = True other_final.final = True assert str(non_final) != str(final) assert str(other_final) == str(final)
def test_str_includes_all_edges(self): zero_edges = TokenSet() one_edge = TokenSet() two_edges = TokenSet() one_edge.edges["a"] = 1 two_edges.edges["a"] = 1 two_edges.edges["b"] = 1 assert str(zero_edges) != str(one_edge) assert str(two_edges) != str(one_edge) assert str(two_edges) != str(zero_edges)
def test_str_includes_edge_id(self): child_a = TokenSet() child_b = TokenSet() parent_a = TokenSet() parent_b = TokenSet() parent_c = TokenSet() parent_a.edges["a"] = child_a parent_b.edges["a"] = child_b parent_c.edges["a"] = child_b assert str(parent_b) == str(parent_c) assert str(parent_a) != str(parent_c) assert str(parent_a) != str(parent_b)
def insert(self, word): if word < self.previous_word: raise BaseLunrException("Out of order word insertion") common_prefix = 0 for i in range(min(len(word), len(self.previous_word))): if word[i] != self.previous_word[i]: break common_prefix += 1 self.minimize(common_prefix) node = (self.root if not self.unchecked_nodes else self.unchecked_nodes[-1]["child"]) for i in range(common_prefix, len(word)): next_node = TokenSet() char = word[i] node.edges[char] = next_node self.unchecked_nodes.append({ "parent": node, "char": char, "child": next_node }) node = next_node node.final = True self.previous_word = word
def __init__(self): self.previous_word = "" self.root = TokenSet() self.unchecked_nodes = [] self.minimized_nodes = {}