def _new_subtries(self, st, new_roots, join=True): new_subtries = set() parents_to_check_for_joins = set() for path in new_roots: new_subtries.add(self._break_off_node(path[-2], path[-1])) if len(path) > 2: parents_to_check_for_joins.add((path[-3], path[-2])) for st2 in new_subtries: if st2.serialized_size() > self.block_size: self.split_subtrie(st2) if join: self.write_graphviz("jtrie_old.dot") for grandparent, parent in parents_to_check_for_joins: all_split = True total_size = 0 for child in parent.children.values(): if child.pointer is not None: is_subtrie_ptr, ptr = util.unpack_pointer(child.pointer, self.pointer_size) if is_subtrie_ptr: total_size += self.subtries[ptr].serialized_size() + 1 else: all_split = False break else: all_split = False break if all_split and total_size + self.subtries[0].sizeof(parent) < self.block_size: self._join_subtries(grandparent, parent)
def _join_subtries(self, grandparent, parent): for k, child in parent.children.iteritems(): in_edge = child.in_edge subtrie_id = util.unpack_pointer(child.pointer, self.pointer_size)[1] parent.children[k] = self.subtries[subtrie_id].root parent.children[k].in_edge = in_edge del self.subtries[subtrie_id] new_subtrie = self._break_off_node(grandparent, parent)
def _follow_subtrie_roots(self, subtrie_id, node): if node.pointer is None: return subtrie_id, node, -1 is_subtrie_ptr, ptr = util.unpack_pointer(node.pointer, self.pointer_size) if is_subtrie_ptr: next_root = self.subtries[ptr].root return self._follow_subtrie_roots(ptr, next_root) else: return subtrie_id, node, ptr
def write_graphviz(self, path="jtrie.dot"): with open(path, "w") as f: f.write("digraph jtrie {\ngraph[margin=0, rankdir=LR];\nnode[shape=circle];\n") for label, subtrie in self.subtries.iteritems(): f.write('subgraph cluster%d {\ngraph[label="%d"]' % (label, label)) cross_subtrie_edges = [] traversal_stack = deque([subtrie.root]) while traversal_stack: node = traversal_stack.pop() if node.pointer is None: f.write('%d[label=""];\n' % node._node_id) else: is_subtrie_ptr, ptr = util.unpack_pointer(node.pointer, self.pointer_size) if is_subtrie_ptr: f.write('%d[label="%d", shape=doublecircle];\n' % (node._node_id, ptr)) other_root = self.subtries[ptr].root if other_root.starts_chain: cross_subtrie_edges.append( '%d->%d[label="%s"];\n' % (node._node_id, other_root._node_id, "".join([chr(c) for c in other_root.chain])) ) else: cross_subtrie_edges.append("%d->%d;\n" % (node._node_id, other_root._node_id)) else: f.write('%d[label="%d", shape=doublecircle];\n' % (node._node_id, ptr)) for _, child in node.children.iteritems(): f.write( '%d->%d[label="%s"];\n' % ( node._node_id, child._node_id, "".join([chr(child.in_edge)] + [chr(c) for c in child.chain]), ) ) if node.continuation is not None: traversal_stack.append(node.continuation) traversal_stack.extend(node.children.values()) f.write("}\n") for line in cross_subtrie_edges: f.write(line) f.write("}\n")
def _follow_jtrie(self, string, string_pos=0, subtrie_id=0): """ :returns: (subtrie_id, node, string_pointer, string_pos). string_pointer will be -1 if final node does not match or end of string not reached. string_pos will be -1 if end of string was reached. """ node, chain_pos, string_pos = self.subtries[subtrie_id].follow_trie(string, string_pos) if string_pos == -1: if node.pointer is not None: subtrie_id, node, ptr = self._follow_subtrie_roots(subtrie_id, node) return subtrie_id, node, ptr, string_pos, chain_pos else: return subtrie_id, node, -1, string_pos, chain_pos else: if node.pointer is not None: is_subtrie_ptr, ptr = util.unpack_pointer(node.pointer, self.pointer_size) if is_subtrie_ptr: return self._follow_jtrie(string, string_pos, ptr) else: return subtrie_id, node, -1, string_pos, chain_pos else: return subtrie_id, node, -1, string_pos, chain_pos