def test_find_section_by_section(self): sxs_xml = """ <HD SOURCE="HD2">Sub Section</HD> <P>Content</P> <HD SOURCE="HD3">Sub sub section</HD> <EXTRACT><P>This is in an extract</P></EXTRACT> <P>Sub Sub Content</P>""" full_xml = """ <ROOT> <SUPLINF> <HD SOURCE="HED">Supplementary Info</HD> <HD SOURCE="HD1">Stuff Here</HD> <P>Some Content</P> <HD SOURCE="HD1">X. Section-by-Section Analysis</HD> %s <HD SOURCE="HD1">Section that follows</HD> <P>Following Content</P> </SUPLINF> </ROOT>""" % sxs_xml # Must use text field since the nodes are not directly comparable sxs_texts = ['Sub Section', 'Content', 'Sub sub section', 'This is in an extract', 'Sub Sub Content'] computed = sxs.find_section_by_section(etree.fromstring(full_xml)) self.assertEqual(sxs_texts, map(lambda el: el.text, computed))
def process_xml(notice, notice_xml): """Pull out relevant fields from the xml and add them to the notice""" xml_chunk = notice_xml.xpath('//FURINF/P') if xml_chunk: notice['contact'] = xml_chunk[0].text addresses = fetch_addresses(notice_xml) if addresses: notice['addresses'] = addresses sxs = find_section_by_section(notice_xml) sxs = build_section_by_section(sxs, notice['cfr_part'], notice['meta']['start_page']) notice['section_by_section'] = sxs context = [] amends = [] for par in notice_xml.xpath('//AMDPAR'): amend_set, context = parse_amdpar(par, context) amends.extend(amend_set) if amends: notice['amendments'] = amends return notice
def process_sxs(notice, notice_xml): """ Find and build SXS from the notice_xml. """ sxs = find_section_by_section(notice_xml) # note we will continue to use cfr_parts[0] as the default SxS label until # we find a counter example sxs = build_section_by_section(sxs, notice['meta']['start_page'], notice['cfr_parts'][0]) notice['section_by_section'] = sxs
def test_find_section_by_section_not_present(self): full_xml = """ <ROOT> <SUPLINF> <HD SOURCE="HED">Supplementary Info</HD> <HD SOURCE="HD1">This is not sxs Analysis</HD> <P>Stuff</P> <P>Stuff2</P> <FTNT>Foot Note</FTNT> </SUPLINF> </ROOT>""" self.assertEqual([], sxs.find_section_by_section(etree.fromstring( full_xml)))
def test_find_section_by_section_intro_text(self): sxs_xml = """ <P>Some intro text</P> <P>This text includes a reference to Section 8675.309(a)</P> <HD SOURCE="HD2">Section 8675.309 Stuff</HD> <P>Content</P>""" full_xml = """ <ROOT> <SUPLINF> <HD SOURCE="HED">Supplementary Info</HD> <HD SOURCE="HD1">Stuff Here</HD> <P>Some Content</P> <HD SOURCE="HD1">X. Section-by-Section Analysis</HD> %s <HD SOURCE="HD1">Section that follows</HD> <P>Following Content</P> </SUPLINF> </ROOT>""" % sxs_xml sxs_texts = ['Section 8675.309 Stuff', 'Content'] computed = sxs.find_section_by_section(etree.fromstring(full_xml)) self.assertEqual(sxs_texts, map(lambda el: el.text, computed))
def process_sxs(notice, notice_xml): """ Find and build SXS from the notice_xml. """ sxs = find_section_by_section(notice_xml) sxs = build_section_by_section(sxs, notice['cfr_part'], notice['meta']['start_page']) notice['section_by_section'] = sxs