예제 #1
0
    def test_str_includes_node_finality(self):
        non_final = TokenSet()
        final = TokenSet()
        other_final = TokenSet()

        final.final = True
        other_final.final = True

        assert str(non_final) != str(final)
        assert str(other_final) == str(final)
예제 #2
0
    def test_str_includes_all_edges(self):
        zero_edges = TokenSet()
        one_edge = TokenSet()
        two_edges = TokenSet()

        one_edge.edges["a"] = 1
        two_edges.edges["a"] = 1
        two_edges.edges["b"] = 1

        assert str(zero_edges) != str(one_edge)
        assert str(two_edges) != str(one_edge)
        assert str(two_edges) != str(zero_edges)
예제 #3
0
    def test_str_includes_edge_id(self):
        child_a = TokenSet()
        child_b = TokenSet()
        parent_a = TokenSet()
        parent_b = TokenSet()
        parent_c = TokenSet()

        parent_a.edges["a"] = child_a
        parent_b.edges["a"] = child_b
        parent_c.edges["a"] = child_b

        assert str(parent_b) == str(parent_c)
        assert str(parent_a) != str(parent_c)
        assert str(parent_a) != str(parent_b)
예제 #4
0
    def insert(self, word):
        if word < self.previous_word:
            raise BaseLunrException("Out of order word insertion")

        common_prefix = 0
        for i in range(min(len(word), len(self.previous_word))):
            if word[i] != self.previous_word[i]:
                break

            common_prefix += 1

        self.minimize(common_prefix)

        node = (self.root if not self.unchecked_nodes else
                self.unchecked_nodes[-1]["child"])

        for i in range(common_prefix, len(word)):
            next_node = TokenSet()
            char = word[i]

            node.edges[char] = next_node

            self.unchecked_nodes.append({
                "parent": node,
                "char": char,
                "child": next_node
            })

            node = next_node

        node.final = True
        self.previous_word = word
예제 #5
0
 def __init__(self):
     self.previous_word = ""
     self.root = TokenSet()
     self.unchecked_nodes = []
     self.minimized_nodes = {}