def insert(self, string, next_id=0, string_pos=0, at_node=None, at_chain_pos=0): """ Finds ``string`` in the subtrie if it exists. Inserts it if it didn't already exist. :param next_id: Value of next pointer to be used. If ``new_insertion`` is ``True``, then a new matching node was created, so the caller should increment its own ``next_id``. :returns: tuple (new_insertion (boolean), id (integer)) """ self.size_valid = False if at_node is None: node, chain_pos, string_pos = self.follow_trie(string, string_pos) else: node = at_node chain_pos = at_chain_pos if string_pos < 0 and not node.starts_chain: # If the node starts a chain then we will have to split it. # Chained nodes cannot have pointers. if node.pointer is None: node.pointer = next_id return True, node.pointer else: return False, node.pointer if string_pos >= 0: new_node = self._convert_remaining_string_to_chain(string, string_pos, next_id) else: new_node = None if node.starts_chain: # Y is a branching and/or matching node. # Insert X at... # Case 1: # -a-*(^bc)-Y (node has in-edge of 'a', its chain is 'bc', and insert happens at start of chain) # -> -a-*-b-*(c)-Y # \-X # Case 2: # -a-*(b^c)-Y # -> -a-*-b-*-c-*-Y # \-X # Case 3: # -a-*(bc^)-Y # -> -a-*-b-*-c-*-Y # \-X # Case 4: # String already exists within chained node, special cased in each one chain_left, chain_right = node.chain[:chain_pos], node.chain[chain_pos:] # General rules: # new_node is the root of the new subtree for the new string. # the old node ('node')'s chain is cut off. # new_node_left is the last letter in chain_left and branches at least 2. # new_node_right is the right part of the chain and takes the old node's children. if len(chain_left) == 0 or new_node is None and chain_pos == -1: # Cases 1 and 4 if new_node is None: chain_right = node.chain if len(chain_right) > 1: # a new_node_right = Node(True, chain_right[0], len(chain_right)-1, chain=chain_right[1:]) else: # b new_node_right = Node(False, chain_right[0], 1) new_node_right.children = node.children node.starts_chain = False node.chain = [] node.children = {new_node_right.in_edge: new_node_right} if new_node is None: node.pointer = next_id else: node.children[new_node.in_edge] = new_node elif chain_pos >= 0 and len(chain_left) < len(node.chain): # Case 2 if len(chain_left) > 1: # a node.chain = node.chain[:chain_pos-1] else: # b node.starts_chain = False node.chain = [] new_node_left = Node(False, chain_left[-1], 1) if len(chain_right) > 1: # c new_node_right = Node(True, chain_right[0], len(chain_right)-1, chain=chain_right[1:]) else: # d new_node_right = Node(False, chain_right[0], 1) new_node_right.children = node.children node.children = {new_node_left.in_edge: new_node_left} new_node_left.children[new_node_right.in_edge] = new_node_right if new_node is None: new_node_left.pointer = next_id else: new_node_left.children[new_node.in_edge] = new_node elif len(chain_right) == 0: # Case 3 new_node_left = Node(False, chain_left[-1], 2) new_node_left.children = node.children node.children = {new_node_left.in_edge: new_node_left} if new_node is None: new_node_left.pointer = next_id else: new_node_left.children[new_node.in_edge] = new_node if len(chain_left) > 1: # a node.chain = chain_left[:-1] else: # b node.starts_chain = False node.chain = [] else: node.add_child(string[string_pos], new_node) return True, next_id