コード例 #1
0
    def test_find_branches(self):
        DATA = """
        C1
            C11
            XYZ
                C121
                C122
            C13
        XYZ
            C21
                C211
        C3
            XYZ
            C1

        """
        tree = graph.parse_text_tree(DATA)

        # find non-exist branches
        # current implementation think it is better to return ['NOT EXIST'] than []
        branches = graph.find_branches(tree, 'NOT EXIST')
        children = [node.data for node,_ in branches.dfs()]
        self.assertEqual(children, ['NOT EXIST'])

        # find XYZ and its children
        branches = graph.find_branches(tree, 'XYZ')
        children = [node.data for node,_ in branches.dfs()]
        self.assertEqual(children, [
            'XYZ',
            'C121',
            'C122',
            'C21',
            'C211',
        ])
コード例 #2
0
def query_by_tag(wlib, tag):
    """
    @return - list of Position with items filled.
    """

    branches = graph.find_branches(wlib.category.root, tag)
    assert branches

    positions = []
    tag_set = sets.Set()    # all tags

    # build positions
    for visit_record in branches.dfs_ctx():
        node, idx, path, _ = visit_record
        pos = Position(node.data)
        positions.append(pos)
        visit_record[3] = pos

        # set prefix, set parent.children
        parent_vr = path and path[-1] or None
        if parent_vr:
            if len(path) == 1:
                pos.prefix = str(idx+1)
            else:
                pos.prefix = '%s.%s' % (path[-1][3].prefix, str(idx+1))
            parent_vr[3].children.append(pos)

        # set parent_path
        pos.parent_path = [vr[3] for vr in reversed(path)]

        # update tag_set
        if pos.tag:
            tag_set.add(pos.tag)

    # note: branches is a non null Tree
    root_pos = positions[0]

    #for pos in positions: print pos # DBEUG
    #for pos, _ in root_pos.dfs(): print pos # DEBUG

    pos_rbfs = [pos for pos,_ in root_pos.bfs()]
    pos_rbfs.reverse()

    for page in wlib.webpages:
        # basic filtering
        rtags = []  # relevant tags
        itags = []  # irrelevant tags
        for tag in page.tags:
            if tag in tag_set:
                rtags.append(tag)
            else:
                itags.append(tag)
        if not rtags:
            continue

        itags.sort()

        for pos in positions:           # clear markers first
            pos.trail_walked = False
        for pos in pos_rbfs:
            if pos.trail_walked:
                continue
            if pos.tag not in rtags:
                continue
            # should insert item in this position
            # calculate pos_rel with respect to this position
            pos_rel = []
            for ppos in pos.parent_path:
                ppos.trail_walked = True
                if ppos.tag in rtags:
                    pos_rel.append(0)
                else:
                    pos_rel.append(1)

            pos.items.append((pos_rel, itags, page))                    # TODO: between pos_rel and itags is irrelevant tag in tree.

    for pos in positions:
        pos.items.sort()

    return positions