def build_tree(reg_xml): doc = etree.fromstring(reg_xml) preprocess_xml(doc) reg_part = get_reg_part(doc) title = get_title(doc) tree = Node("", [], [reg_part], title) part = doc.xpath('//PART')[0] subpart_xmls = [c for c in part.getchildren() if c.tag == 'SUBPART'] if len(subpart_xmls) > 0: subparts = [build_subpart(reg_part, s) for s in subpart_xmls] tree.children = subparts else: section_xmls = [c for c in part.getchildren() if c.tag == 'SECTION'] sections = [] for section_xml in section_xmls: sections.extend(build_from_section(reg_part, section_xml)) empty_part = reg_text.build_empty_part(reg_part) empty_part.children = sections tree.children = [empty_part] non_reg_sections = build_non_reg_text(doc, reg_part) tree.children += non_reg_sections return tree
def build_tree(reg_xml): logger.info("Build tree %s", reg_xml) preprocess_xml(reg_xml) reg_part = get_reg_part(reg_xml) title = get_title(reg_xml) tree = Node("", [], [reg_part], title) part = reg_xml.xpath('//PART')[0] # Build a list of SUBPARTs, then pull SUBJGRPs into that list: subpart_and_subjgrp_xmls = [] for subpart in part.xpath('./SUBPART|./SUBJGRP'): subpart_and_subjgrp_xmls.append(subpart) # SUBJGRPS can be nested, particularly inside SUBPARTs for subjgrp in subpart.xpath('./SUBJGRP'): subpart_and_subjgrp_xmls.append(subjgrp) if len(subpart_and_subjgrp_xmls) > 0: subthings = [] letter_list = [] for subthing in subpart_and_subjgrp_xmls: if subthing.tag == "SUBPART": subthings.append(build_subpart(reg_part, subthing)) elif subthing.tag == "SUBJGRP": built_subjgrp = build_subjgrp(reg_part, subthing, letter_list) letter_list.append(built_subjgrp.label[-1]) subthings.append(built_subjgrp) tree.children = subthings else: section_xmls = [c for c in part.getchildren() if c.tag == 'SECTION'] sections = [] for section_xml in section_xmls: sections.extend(build_from_section(reg_part, section_xml)) empty_part = reg_text.build_empty_part(reg_part) empty_part.children = sections tree.children = [empty_part] non_reg_sections = build_non_reg_text(reg_xml, reg_part) tree.children += non_reg_sections return tree
def __call__(self, parent, xml_node): sections = build_from_section(parent.cfr_part, xml_node) if not parent.children: parent.children.append(build_empty_part(parent.cfr_part)) parent.children[-1].children.extend(sections)