예제 #1
0
    def dfs_iter(self, starting_node=None, starting_lru=''):
        starting_from_root = not starting_node

        if starting_node:
            starting_block = starting_node.block
            starting_lru = ''.join(list(lru_iter(starting_lru))[:-1])
        else:
            starting_node = self.root()
            starting_block = self.root().block

        # If there is no starting node, there is no point in doing a DFS
        if not starting_node.exists:
            return

        stack = [(starting_block, starting_lru)]
        node = self.node()

        while len(stack):
            block, lru = stack.pop()
            node.read(block)

            current_lru = lru + node.stem()

            yield node, current_lru

            if starting_from_root or block != starting_block:
                if node.has_right():
                    stack.append((node.right(), lru))

                if node.has_left():
                    stack.append((node.left(), lru))

            if node.has_child():
                stack.append((node.child(), current_lru))
예제 #2
0
    def lru_node(self, lru):
        node = self.root()

        stems = list(lru_iter(lru))
        l = len(stems)

        for i in range(l):
            stem = stems[i]

            while True:
                current_stem = node.stem()

                if current_stem == stem:
                    break

                if stem < current_stem:
                    if node.has_left():
                        node.read_left()
                    else:
                        return
                else:
                    if node.has_right():
                        node.read_right()
                    else:
                        return

            if i < l - 1:
                if not node.has_child():
                    return
                else:
                    node.read_child()

        return node
예제 #3
0
    def add_lru(self, lru):

        # Iteration state
        # TODO: we should be able to use an iterator and not keep a list!
        stems = list(lru_iter(lru))
        l = len(stems)
        i = 0
        history = LRUTrieWalkHistory(lru)
        node = self.root()
        lru = ''

        # Descending the trie
        while i < l:
            stem = stems[i]
            lru += stem

            node = self.__ensure_stem_from_siblings(node, stem)

            # Tracking webentities
            if node.has_webentity():
                history.update_webentity(node.webentity(), lru, len(lru))

            # Tracking webentity creation rules
            if node.has_webentity_creation_rule():
                history.add_webentity_creation_rule(len(lru))

            i += 1

            if i < l and node.has_child():
                node.read_child()
            else:
                break

        # We went as far as possible, now we add the missing part
        while i < l:
            stem = stems[i]

            # Creating the child
            child = self.node(stem=stem)
            child.set_parent(node.block)
            child.write()

            # Linking the child to its parent
            node.set_child(child.block)
            node.write()

            node = child
            i += 1

        return node, history
예제 #4
0
    def follow_lru(self, lru):
        # Does almost the same thing as lru_node but with a history,
        # and thus less efficient.
        # Very similar to add_lru too, but returns False if lru not in Trie

        node = self.root()
        history = LRUTrieWalkHistory(lru)

        stems = list(lru_iter(lru))
        lru = ''
        l = len(stems)

        for i in range(l):
            stem = stems[i]
            lru += stem

            while True:
                current_stem = node.stem()

                if current_stem == stem:
                    break

                if stem < current_stem:
                    if node.has_left():
                        node.read_left()
                    else:
                        return None, history
                else:
                    if node.has_right():
                        node.read_right()
                    else:
                        return None, history

            if node.has_webentity():
                history.update_webentity(node.webentity(), lru, len(lru))

            if node.has_webentity_creation_rule():
                history.add_webentity_creation_rule(len(lru))

            if i < l - 1:
                if not node.has_child():
                    return None, history
                else:
                    node.read_child()

        return node, history
예제 #5
0
    def webentity_dfs_iter(self, starting_node, starting_lru):
        '''
        Note that this algorithm will peruse the webentity nodes only for the
        given prefix. We would need a refined algorithm for the cases when
        then prefixes are not given and we need to peruse the webentity's
        whole realm.
        '''
        starting_block = starting_node.block
        starting_lru = ''.join(list(lru_iter(starting_lru))[:-1])

        # If there is no starting node, there is no point in doing a DFS
        if not starting_node.exists:
            return

        stack = [(starting_block, starting_lru)]
        node = self.node()

        while len(stack):
            block, lru = stack.pop()
            node.read(block)

            relevant_node = block == starting_block or not node.has_webentity()
            current_lru = lru + node.stem()

            if relevant_node:
                yield node, current_lru

            # Following siblings
            if block != starting_block:
                if node.has_right():
                    stack.append((node.right(), lru))

                if node.has_left():
                    stack.append((node.left(), lru))

            # Following child
            if relevant_node and node.has_child():
                stack.append((node.child(), current_lru))
예제 #6
0
 def test_lru_iter(self):
     self.assertEqual(
         list(lru_iter('s:http|h:fr|h:sciences-po|h:medialab|')),
         ['s:http|', 'h:fr|', 'h:sciences-po|', 'h:medialab|']
     )