def dfs_iter(self, starting_node=None, starting_lru=''): starting_from_root = not starting_node if starting_node: starting_block = starting_node.block starting_lru = ''.join(list(lru_iter(starting_lru))[:-1]) else: starting_node = self.root() starting_block = self.root().block # If there is no starting node, there is no point in doing a DFS if not starting_node.exists: return stack = [(starting_block, starting_lru)] node = self.node() while len(stack): block, lru = stack.pop() node.read(block) current_lru = lru + node.stem() yield node, current_lru if starting_from_root or block != starting_block: if node.has_right(): stack.append((node.right(), lru)) if node.has_left(): stack.append((node.left(), lru)) if node.has_child(): stack.append((node.child(), current_lru))
def lru_node(self, lru): node = self.root() stems = list(lru_iter(lru)) l = len(stems) for i in range(l): stem = stems[i] while True: current_stem = node.stem() if current_stem == stem: break if stem < current_stem: if node.has_left(): node.read_left() else: return else: if node.has_right(): node.read_right() else: return if i < l - 1: if not node.has_child(): return else: node.read_child() return node
def add_lru(self, lru): # Iteration state # TODO: we should be able to use an iterator and not keep a list! stems = list(lru_iter(lru)) l = len(stems) i = 0 history = LRUTrieWalkHistory(lru) node = self.root() lru = '' # Descending the trie while i < l: stem = stems[i] lru += stem node = self.__ensure_stem_from_siblings(node, stem) # Tracking webentities if node.has_webentity(): history.update_webentity(node.webentity(), lru, len(lru)) # Tracking webentity creation rules if node.has_webentity_creation_rule(): history.add_webentity_creation_rule(len(lru)) i += 1 if i < l and node.has_child(): node.read_child() else: break # We went as far as possible, now we add the missing part while i < l: stem = stems[i] # Creating the child child = self.node(stem=stem) child.set_parent(node.block) child.write() # Linking the child to its parent node.set_child(child.block) node.write() node = child i += 1 return node, history
def follow_lru(self, lru): # Does almost the same thing as lru_node but with a history, # and thus less efficient. # Very similar to add_lru too, but returns False if lru not in Trie node = self.root() history = LRUTrieWalkHistory(lru) stems = list(lru_iter(lru)) lru = '' l = len(stems) for i in range(l): stem = stems[i] lru += stem while True: current_stem = node.stem() if current_stem == stem: break if stem < current_stem: if node.has_left(): node.read_left() else: return None, history else: if node.has_right(): node.read_right() else: return None, history if node.has_webentity(): history.update_webentity(node.webentity(), lru, len(lru)) if node.has_webentity_creation_rule(): history.add_webentity_creation_rule(len(lru)) if i < l - 1: if not node.has_child(): return None, history else: node.read_child() return node, history
def webentity_dfs_iter(self, starting_node, starting_lru): ''' Note that this algorithm will peruse the webentity nodes only for the given prefix. We would need a refined algorithm for the cases when then prefixes are not given and we need to peruse the webentity's whole realm. ''' starting_block = starting_node.block starting_lru = ''.join(list(lru_iter(starting_lru))[:-1]) # If there is no starting node, there is no point in doing a DFS if not starting_node.exists: return stack = [(starting_block, starting_lru)] node = self.node() while len(stack): block, lru = stack.pop() node.read(block) relevant_node = block == starting_block or not node.has_webentity() current_lru = lru + node.stem() if relevant_node: yield node, current_lru # Following siblings if block != starting_block: if node.has_right(): stack.append((node.right(), lru)) if node.has_left(): stack.append((node.left(), lru)) # Following child if relevant_node and node.has_child(): stack.append((node.child(), current_lru))
def test_lru_iter(self): self.assertEqual( list(lru_iter('s:http|h:fr|h:sciences-po|h:medialab|')), ['s:http|', 'h:fr|', 'h:sciences-po|', 'h:medialab|'] )