Beispiel #1
0
 def insert(self, string, next_id=0, string_pos=0, at_node=None, at_chain_pos=0):
     """
     Finds ``string`` in the subtrie if it exists. Inserts it if it didn't already exist.
     
     :param next_id: Value of next pointer to be used. If ``new_insertion`` is ``True``, then a new matching node was created, so the caller should increment its own ``next_id``.
     :returns: tuple (new_insertion (boolean), id (integer))
     """
     self.size_valid = False
     
     if at_node is None:
         node, chain_pos, string_pos = self.follow_trie(string, string_pos)
     else:
         node = at_node
         chain_pos = at_chain_pos
     
     if string_pos < 0 and not node.starts_chain:
         # If the node starts a chain then we will have to split it.
         # Chained nodes cannot have pointers.
         if node.pointer is None:
             node.pointer = next_id
             return True, node.pointer
         else:
             return False, node.pointer
     if string_pos >= 0:
         new_node = self._convert_remaining_string_to_chain(string, string_pos, next_id)
     else:
         new_node = None
     
     if node.starts_chain:
         # Y is a branching and/or matching node.
         # Insert X at...
         #   Case 1:
         #   -a-*(^bc)-Y (node has in-edge of 'a', its chain is 'bc', and insert happens at start of chain)
         #       -> -a-*-b-*(c)-Y
         #             \-X
         #   Case 2:
         #   -a-*(b^c)-Y
         #       -> -a-*-b-*-c-*-Y
         #                 \-X
         #   Case 3:
         #   -a-*(bc^)-Y
         #       -> -a-*-b-*-c-*-Y
         #                     \-X
         #   Case 4:
         #   String already exists within chained node, special cased in each one
         chain_left, chain_right = node.chain[:chain_pos], node.chain[chain_pos:]
         
         # General rules:
         #   new_node is the root of the new subtree for the new string.
         #   the old node ('node')'s chain is cut off.
         #   new_node_left is the last letter in chain_left and branches at least 2.
         #   new_node_right is the right part of the chain and takes the old node's children.
         if len(chain_left) == 0 or new_node is None and chain_pos == -1:  # Cases 1 and 4
             if new_node is None:
                 chain_right = node.chain
             if len(chain_right) > 1:    # a
                 new_node_right = Node(True, chain_right[0], len(chain_right)-1, chain=chain_right[1:])
             else:                       # b
                 new_node_right = Node(False, chain_right[0], 1)
             new_node_right.children = node.children
             node.starts_chain = False
             node.chain = []
             node.children = {new_node_right.in_edge: new_node_right}
             if new_node is None:
                 node.pointer = next_id
             else:
                 node.children[new_node.in_edge] = new_node
         elif chain_pos >= 0 and len(chain_left) < len(node.chain): # Case 2
             if len(chain_left) > 1:                 # a
                 node.chain = node.chain[:chain_pos-1]
             else:                                   # b
                 node.starts_chain = False
                 node.chain = []
             new_node_left = Node(False, chain_left[-1], 1)
             if len(chain_right) > 1:                # c
                 new_node_right = Node(True, chain_right[0], len(chain_right)-1, chain=chain_right[1:])
             else:                                   # d
                 new_node_right = Node(False, chain_right[0], 1)
             new_node_right.children = node.children
             node.children = {new_node_left.in_edge: new_node_left}
             new_node_left.children[new_node_right.in_edge] = new_node_right
             if new_node is None:
                 new_node_left.pointer = next_id
             else:
                 new_node_left.children[new_node.in_edge] = new_node
         elif len(chain_right) == 0:             # Case 3
             new_node_left = Node(False, chain_left[-1], 2)
             new_node_left.children = node.children
             node.children = {new_node_left.in_edge: new_node_left}
             if new_node is None:
                 new_node_left.pointer = next_id
             else:
                 new_node_left.children[new_node.in_edge] = new_node
             if len(chain_left) > 1:                 # a
                 node.chain = chain_left[:-1]
             else:                                   # b
                 node.starts_chain = False
                 node.chain = []
     else:
         node.add_child(string[string_pos], new_node)
     return True, next_id