def test_treeify_keep_children(self): n1 = struct.Node(label=['1']) n1b = struct.Node(label=['1', 'b'], children=[1, 2, 3]) self.assertEqual(struct.treeify([n1, n1b]), [ struct.Node(label=['1'], children=[ struct.Node(label=['1', 'b'], children=[1, 2, 3]) ]) ])
def build(text, part): """Create a tree representing the whole interpretation.""" part = str(part) title, body = utils.title_body(text) segments = segment_by_header(body, part) if segments: children = [segment_tree(body[s:e], part, [part]) for s, e in segments] return Node(body[:segments[0][0]], treeify(children), [part, Node.INTERP_MARK], title, Node.INTERP) else: return Node(body, [], [part, Node.INTERP_MARK], title, Node.INTERP)
def parse_from_xml(root, xml_nodes): """Core of supplement processing; shared by whole XML parsing and notice parsing. root is the root interpretation node (e.g. a Node with label '1005-Interp'). xml_nodes contains all XML nodes which will be relevant to the interpretations""" supplement_nodes = [root] last_label = root.label header_count = 0 for ch in xml_nodes: node = Node(label=last_label, node_type=Node.INTERP) label_obj = Label.from_node(node) # Explicitly ignore "subpart" headers, as they are inconsistent # and they will be reconstructed as subterps client-side text = tree_utils.get_node_text(ch, add_spaces=True) if is_title(ch) and 'subpart' not in text.lower(): labels = text_to_labels(text, label_obj) if labels: label = merge_labels(labels) else: # Header without a label, like an Introduction, etc. header_count += 1 label = root.label[:2] + ['h%d' % header_count] inner_stack = tree_utils.NodeStack() missing = missing_levels(last_label, label) supplement_nodes.extend(missing) last_label = label node = Node(node_type=Node.INTERP, label=label, title=text.strip()) inner_stack.add(2, node) process_inner_children(inner_stack, ch, parent=node) while inner_stack.size() > 1: inner_stack.unwind() ch_node = inner_stack.m_stack[0][0][1] supplement_nodes.append(ch_node) supplement_tree = treeify(supplement_nodes) def per_node(node): node.label = [l.replace('<E T="03">', '') for l in node.label] for child in node.children: per_node(child) for node in supplement_tree: per_node(node) return supplement_tree[0]
def test_treeify_interp(self): n1 = struct.Node(label=['1', 'Interp']) n1b = struct.Node(label=['1', 'b', 'Interp']) n1b5 = struct.Node(label=['1', 'b', '5', 'Interp']) result = struct.treeify([n1, n1b, n1b5]) self.assertEqual(result, [ struct.Node(label=['1', 'Interp'], children=[ struct.Node(label=['1', 'b', 'Interp'], children=[ struct.Node(label=['1', 'b', '5', 'Interp']) ]) ]) ])
def build(text, part): """Create a tree representing the whole interpretation.""" part = str(part) title, body = utils.title_body(text) segments = segment_by_header(body, part) if segments: children = [segment_tree(body[s:e], part, [part]) for s, e in segments] return Node( body[:segments[0][0]], treeify(children), [part, Node.INTERP_MARK], title, Node.INTERP) else: return Node( body, [], [part, Node.INTERP_MARK], title, Node.INTERP)
def test_treeify(self): n1 = struct.Node(label=['1']) n1b = struct.Node(label=['1', 'b']) n1b5 = struct.Node(label=['1', 'b', '5']) n2 = struct.Node(label=['2']) result = struct.treeify([n1, n1b5, n2, n1b]) self.assertEqual(sorted(result), sorted([ struct.Node(label=['1'], children=[ struct.Node(label=['1', 'b'], children=[ struct.Node(label=['1', 'b', '5']) ]) ]), struct.Node(label=['2']) ]))
def process_without_headers(cfr_part, parent_xml, amended_labels): """Sometimes, we only get a list of paragraphs that have changes, but no header indicating with which sections they are associated. Accommodate by trying to match up amended_labels with paragraphs""" parent_xml = standardize_xml(parent_xml) relevant_labels = [al.label for al in filter(_is_interp_amend, amended_labels)] label_indices = [] for idx, child in enumerate(parent_xml): text = tree_utils.get_node_text(child) if len(relevant_labels) > len(label_indices): marker = relevant_labels[len(label_indices)][-1] + '.' if text.startswith(marker): label_indices.append(idx) labelXindex = zip(relevant_labels, label_indices) nodes = [] # Reverse it so we can delete from the bottom for label, idx in reversed(labelXindex): stack = tree_utils.NodeStack() prefix = label[:label.index(Node.INTERP_MARK) + 1] section = Node(node_type=Node.INTERP, label=prefix) stack.add(2, section) interpretations.process_inner_children(stack, parent_xml[idx - 1]) while stack.size() > 1: stack.unwind() nodes.append(stack.m_stack[0][0][1]) # delete the tail while len(parent_xml.getchildren()) > idx: parent_xml.remove(parent_xml[idx]) if nodes: nodes.append(Node(node_type=Node.INTERP, label=[cfr_part, Node.INTERP_MARK])) # Reverse it again into normal flow return treeify(list(reversed(nodes)))[0] else: return None