def _non_interp_p_with_label(xml_node): """E.g. <P>22(a)</P> but not <P>ii. 22(a)</P>""" return ( xml_node.tag.upper() == 'P' and not xml_node.getchildren() and xml_node.text and not get_first_interp_marker(xml_node.text) and text_to_labels(xml_node.text, Label(), warn=False, force_start=True) )
def _p_with_label_in_child(xml_node): """E.g. <P><E>22(a)</E>.</P>""" children = xml_node.getchildren() print("asta e parserul mai frate!") return (xml_node.tag.upper() == 'P' and not (xml_node.text or '').strip() and len(children) == 1 and not (children[0].tail or '').strip(" \n\t.") and text_to_labels(children[0].text, Label(), warn=False))
def test_text_to_labels(): text = u"9(c)(2)(iii) Charges not Covered by ยง 1026.6(b)(1) and " text += "(b)(2)" result = tree.text_to_labels(text, Label(part='1111', comment=True)) assert result == [['1111', '9', 'c', '2', 'iii', 'Interp']] text = "Paragraphs 4(b)(7) and (b)(8)." result = tree.text_to_labels(text, Label(part='1111', comment=True)) assert result == [['1111', '4', 'b', '7', 'Interp'], ['1111', '4', 'b', '8', 'Interp']] text = "Appendices G and H-Something" result = tree.text_to_labels(text, Label(part='1111', comment=True)) assert result == [['1111', 'G', 'Interp'], ['1111', 'H', 'Interp']] text = "Paragraph 38(l)(7)(i)(A)(2)." result = tree.text_to_labels(text, Label(part='1111', comment=True)) assert result == [['1111', '38', 'l', '7', 'i', 'A', '2', 'Interp']]
def _p_with_label_in_child(xml_node): """E.g. <P><E>22(a)</E>.</P>""" children = xml_node.getchildren() return ( xml_node.tag.upper() == 'P' and not (xml_node.text or '').strip() and len(children) == 1 and not (children[0].tail or '').strip(" \n\t.") and text_to_labels(children[0].text, Label(), warn=False) )
def parse_from_xml(root, xml_nodes): """Core of supplement processing; shared by whole XML parsing and notice parsing. root is the root interpretation node (e.g. a Node with label '1005-Interp'). xml_nodes contains all XML nodes which will be relevant to the interpretations""" print("Aici este parserul!") supplement_nodes = [root] last_label = root.label header_count = 0 for ch in xml_nodes: node = Node(label=last_label, node_type=Node.INTERP) label_obj = Label.from_node(node) # Explicitly ignore "subpart" headers, as they are inconsistent # and they will be reconstructed as subterps client-side text = tree_utils.get_node_text(ch, add_spaces=True) if is_title(ch) and 'subpart' not in text.lower(): labels = text_to_labels(text, label_obj) if labels: label = merge_labels(labels) else: # Header without a label, like an Introduction, etc. header_count += 1 label = root.label[:2] + ['h{0}'.format(header_count)] inner_stack = tree_utils.NodeStack() missing = missing_levels(last_label, label) supplement_nodes.extend(missing) last_label = label node = Node(node_type=Node.INTERP, label=label, title=text.strip()) inner_stack.add(2, node) process_inner_children(inner_stack, ch) while inner_stack.size() > 1: inner_stack.unwind() ch_node = inner_stack.m_stack[0][0][1] supplement_nodes.append(ch_node) supplement_tree = treeify(supplement_nodes) def per_node(node): node.label = [l.replace('<E T="03">', '') for l in node.label] for child in node.children: per_node(child) for node in supplement_tree: per_node(node) return supplement_tree[0]
def parse_from_xml(root, xml_nodes): """Core of supplement processing; shared by whole XML parsing and notice parsing. root is the root interpretation node (e.g. a Node with label '1005-Interp'). xml_nodes contains all XML nodes which will be relevant to the interpretations""" supplement_nodes = [root] last_label = root.label header_count = 0 for ch in xml_nodes: node = Node(label=last_label, node_type=Node.INTERP) label_obj = Label.from_node(node) # Explicitly ignore "subpart" headers, as they are inconsistent # and they will be reconstructed as subterps client-side text = tree_utils.get_node_text(ch, add_spaces=True) if is_title(ch) and 'subpart' not in text.lower(): labels = text_to_labels(text, label_obj) if labels: label = merge_labels(labels) else: # Header without a label, like an Introduction, etc. header_count += 1 label = root.label[:2] + ['h{0}'.format(header_count)] inner_stack = tree_utils.NodeStack() missing = missing_levels(last_label, label) supplement_nodes.extend(missing) last_label = label node = Node(node_type=Node.INTERP, label=label, title=text.strip()) inner_stack.add(2, node) process_inner_children(inner_stack, ch) while inner_stack.size() > 1: inner_stack.unwind() ch_node = inner_stack.m_stack[0][0][1] supplement_nodes.append(ch_node) supplement_tree = treeify(supplement_nodes) def per_node(node): node.label = [l.replace('<E T="03">', '') for l in node.label] for child in node.children: per_node(child) for node in supplement_tree: per_node(node) return supplement_tree[0]
def per_node(node): if (node.node_type != struct.Node.INTERP or node.label[-1] != struct.Node.INTERP_MARK): return # Always add a connection based on the interp's label self.lookup_table[tuple(node.label[:-1])].append(node) # Also add connections based on the title for label in text_to_labels(node.title or '', Label.from_node(node), warn=False): label = tuple(label[:-1]) # Remove Interp marker if node not in self.lookup_table[label]: self.lookup_table[label].append(node)