コード例 #1
0
    def test_treeify_keep_children(self):
        n1 = struct.Node(label=['1'])
        n1b = struct.Node(label=['1', 'b'], children=[1, 2, 3])

        self.assertEqual(struct.treeify([n1, n1b]), [
            struct.Node(label=['1'], children=[
                struct.Node(label=['1', 'b'], children=[1, 2, 3])
            ])
        ])
コード例 #2
0
def build(text, part):
    """Create a tree representing the whole interpretation."""
    part = str(part)
    title, body = utils.title_body(text)
    segments = segment_by_header(body, part)

    if segments:
        children = [segment_tree(body[s:e], part, [part]) for s, e in segments]
        return Node(body[:segments[0][0]], treeify(children),
                    [part, Node.INTERP_MARK], title, Node.INTERP)
    else:
        return Node(body, [], [part, Node.INTERP_MARK], title, Node.INTERP)
コード例 #3
0
def parse_from_xml(root, xml_nodes):
    """Core of supplement processing; shared by whole XML parsing and notice
    parsing. root is the root interpretation node (e.g. a Node with label
    '1005-Interp'). xml_nodes contains all XML nodes which will be relevant
    to the interpretations"""

    supplement_nodes = [root]

    last_label = root.label
    header_count = 0
    for ch in xml_nodes:
        node = Node(label=last_label, node_type=Node.INTERP)
        label_obj = Label.from_node(node)

        #   Explicitly ignore "subpart" headers, as they are inconsistent
        #   and they will be reconstructed as subterps client-side
        text = tree_utils.get_node_text(ch, add_spaces=True)
        if is_title(ch) and 'subpart' not in text.lower():
            labels = text_to_labels(text, label_obj)
            if labels:
                label = merge_labels(labels)
            else:  # Header without a label, like an Introduction, etc.
                header_count += 1
                label = root.label[:2] + ['h%d' % header_count]

            inner_stack = tree_utils.NodeStack()
            missing = missing_levels(last_label, label)
            supplement_nodes.extend(missing)
            last_label = label

            node = Node(node_type=Node.INTERP, label=label, title=text.strip())
            inner_stack.add(2, node)

            process_inner_children(inner_stack, ch, parent=node)

            while inner_stack.size() > 1:
                inner_stack.unwind()

            ch_node = inner_stack.m_stack[0][0][1]
            supplement_nodes.append(ch_node)

    supplement_tree = treeify(supplement_nodes)

    def per_node(node):
        node.label = [l.replace('<E T="03">', '') for l in node.label]
        for child in node.children:
            per_node(child)

    for node in supplement_tree:
        per_node(node)

    return supplement_tree[0]
コード例 #4
0
def parse_from_xml(root, xml_nodes):
    """Core of supplement processing; shared by whole XML parsing and notice
    parsing. root is the root interpretation node (e.g. a Node with label
    '1005-Interp'). xml_nodes contains all XML nodes which will be relevant
    to the interpretations"""

    supplement_nodes = [root]

    last_label = root.label
    header_count = 0
    for ch in xml_nodes:
        node = Node(label=last_label, node_type=Node.INTERP)
        label_obj = Label.from_node(node)

        #   Explicitly ignore "subpart" headers, as they are inconsistent
        #   and they will be reconstructed as subterps client-side
        text = tree_utils.get_node_text(ch, add_spaces=True)
        if is_title(ch) and 'subpart' not in text.lower():
            labels = text_to_labels(text, label_obj)
            if labels:
                label = merge_labels(labels)
            else:   # Header without a label, like an Introduction, etc.
                header_count += 1
                label = root.label[:2] + ['h%d' % header_count]

            inner_stack = tree_utils.NodeStack()
            missing = missing_levels(last_label, label)
            supplement_nodes.extend(missing)
            last_label = label

            node = Node(node_type=Node.INTERP, label=label,
                        title=text.strip())
            inner_stack.add(2, node)

            process_inner_children(inner_stack, ch, parent=node)

            while inner_stack.size() > 1:
                inner_stack.unwind()

            ch_node = inner_stack.m_stack[0][0][1]
            supplement_nodes.append(ch_node)

    supplement_tree = treeify(supplement_nodes)

    def per_node(node):
        node.label = [l.replace('<E T="03">', '') for l in node.label]
        for child in node.children:
            per_node(child)
    for node in supplement_tree:
        per_node(node)

    return supplement_tree[0]
コード例 #5
0
    def test_treeify_interp(self):
        n1 = struct.Node(label=['1', 'Interp'])
        n1b = struct.Node(label=['1', 'b', 'Interp'])
        n1b5 = struct.Node(label=['1', 'b', '5', 'Interp'])

        result = struct.treeify([n1, n1b, n1b5])
        self.assertEqual(result, [
            struct.Node(label=['1', 'Interp'], children=[
                struct.Node(label=['1', 'b', 'Interp'], children=[
                    struct.Node(label=['1', 'b', '5', 'Interp'])
                ])
            ])
        ])
コード例 #6
0
def build(text, part):
    """Create a tree representing the whole interpretation."""
    part = str(part)
    title, body = utils.title_body(text)
    segments = segment_by_header(body, part)

    if segments:
        children = [segment_tree(body[s:e], part, [part]) for s, e in segments]
        return Node(
            body[:segments[0][0]], treeify(children),
            [part, Node.INTERP_MARK], title, Node.INTERP)
    else:
        return Node(
            body, [], [part, Node.INTERP_MARK], title,
            Node.INTERP)
コード例 #7
0
    def test_treeify(self):
        n1 = struct.Node(label=['1'])
        n1b = struct.Node(label=['1', 'b'])
        n1b5 = struct.Node(label=['1', 'b', '5'])

        n2 = struct.Node(label=['2'])

        result = struct.treeify([n1, n1b5, n2, n1b])
        self.assertEqual(sorted(result), sorted([
            struct.Node(label=['1'], children=[
                struct.Node(label=['1', 'b'], children=[
                    struct.Node(label=['1', 'b', '5'])
                ])
            ]),
            struct.Node(label=['2'])
        ]))
コード例 #8
0
def process_without_headers(cfr_part, parent_xml, amended_labels):
    """Sometimes, we only get a list of paragraphs that have changes, but no
    header indicating with which sections they are associated. Accommodate
    by trying to match up amended_labels with paragraphs"""
    parent_xml = standardize_xml(parent_xml)

    relevant_labels = [al.label for al in
                       filter(_is_interp_amend, amended_labels)]
    label_indices = []
    for idx, child in enumerate(parent_xml):
        text = tree_utils.get_node_text(child)
        if len(relevant_labels) > len(label_indices):
            marker = relevant_labels[len(label_indices)][-1] + '.'
            if text.startswith(marker):
                label_indices.append(idx)

    labelXindex = zip(relevant_labels, label_indices)
    nodes = []
    #   Reverse it so we can delete from the bottom
    for label, idx in reversed(labelXindex):
        stack = tree_utils.NodeStack()
        prefix = label[:label.index(Node.INTERP_MARK) + 1]
        section = Node(node_type=Node.INTERP, label=prefix)
        stack.add(2, section)
        interpretations.process_inner_children(stack, parent_xml[idx - 1])
        while stack.size() > 1:
            stack.unwind()

        nodes.append(stack.m_stack[0][0][1])

        # delete the tail
        while len(parent_xml.getchildren()) > idx:
            parent_xml.remove(parent_xml[idx])
    if nodes:
        nodes.append(Node(node_type=Node.INTERP,
                          label=[cfr_part, Node.INTERP_MARK]))
        #   Reverse it again into normal flow
        return treeify(list(reversed(nodes)))[0]
    else:
        return None