def test_process_appendix_header_is_paragraph(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') ctx.HD("A-1 - First kind of awesome", SOURCE='HD2') ctx.HD("(A) First Subkind", SOURCE='HD3') ctx.P("1. Content") ctx.HD("(B) Next Subkind", SOURCE='HD3') ctx.P("1. Moar Contents") ctx.HD("I. Remains Header", SOURCE='HD3') ctx.P("1. Content tent") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.label == ['1111', 'A'] assert appendix.child_labels == ['1'] assert appendix['1'].child_labels == ['A', 'B'] assert appendix['1'].title == 'A-1 - First kind of awesome' assert appendix['1']['A'].child_labels == ['1'] assert appendix['1']['A'].text == '(A) First Subkind' assert appendix['1']['A']['1'].text == '1. Content' assert appendix['1']['B'].child_labels == ['1'] assert appendix['1']['B'].text == '(B) Next Subkind' assert appendix['1']['B']['1'].text == '1. Moar Contents' assert appendix['1']['B']['1'].child_labels == ['h1'] assert appendix['1']['B']['1']['h1'].title == 'I. Remains Header' assert appendix['1']['B']['1']['h1'].child_labels == ['1'] assert appendix['1']['B']['1']['h1']['1'].text == '1. Content tent'
def test_build_from_section_example(): """Account for paragraphs within an EXAMPLE tag""" with section_ctx() as ctx: ctx.P("(a) aaaa") with ctx.EXAMPLE(): ctx.P("You need a form if:") ctx.P("1. Some content") ctx.P("2. Other content") with ctx.EXAMPLE(): ctx.P("You do not need a form if:") ctx.P("1. Some content") ctx.P("2. Other content") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a'] assert node['a'].text == '(a) aaaa' assert node['a'].child_labels == ['p1', 'p2'] assert node['a']['p1'].text == '' assert node['a']['p1'].child_labels == ['p1', 'p2', 'p3'] assert node['a']['p1']['p1'].text == 'You need a form if:' assert node['a']['p1']['p2'].text == '1. Some content' assert node['a']['p1']['p3'].text == '2. Other content' assert node['a']['p2'].text == '' assert node['a']['p2'].child_labels == ['p1', 'p2', 'p3'] assert node['a']['p2']['p1'].text == 'You do not need a form if:' assert node['a']['p2']['p2'].text == '1. Some content' assert node['a']['p2']['p3'].text == '2. Other content'
def test_process_same_sub_level(): with XMLBuilder('APPENDIX') as ctx: ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') ctx.P("1. 1 1 1") ctx.P("a. 1a 1a 1a") ctx.P("b. 1b 1b 1b") ctx.P("c. 1c 1c 1c") ctx.P("d. 1d 1d 1d") ctx.P("e. 1e 1e 1e") ctx.P("f. 1f 1f 1f") ctx.P("2. 2 2 2") ctx.P("a. 2a 2a 2a") ctx.P("i. 2ai 2ai 2ai") ctx.P("ii. 2aii 2aii 2aii") ctx.P("a. 2aiia 2aiia 2aiia") ctx.P("b. 2aiib 2aiib 2aiib") ctx.P("c. 2aiic 2aiic 2aiic") ctx.P("d. 2aiid 2aiid 2aiid") ctx.P("b. 2b 2b 2b") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['1', '2'] assert appendix['1'].child_labels == ['a', 'b', 'c', 'd', 'e', 'f'] assert appendix['2'].child_labels == ['a', 'b'] assert appendix['2']['a'].child_labels == ['i', 'ii'] assert appendix['2']['a']['i'].children == [] assert appendix['2']['a']['ii'].child_labels == ['a', 'b', 'c', 'd'] assert appendix['2']['b'].children == []
def test_build_from_section_extract(): """Account for paragraphs within an EXTRACT tag""" with section_ctx() as ctx: ctx.P("(a) aaaa") with ctx.EXTRACT(): ctx.P("1. Some content") ctx.P("2. Other content") ctx.P("(3) This paragraph has parens for some reason") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.label == ['8675', '309'] assert node.child_labels == ['a'] assert node.text == '' assert node.node_type == 'regtext' assert node['a'].text == '(a) aaaa' assert node['a'].node_type == 'regtext' assert node['a'].child_labels == ['p1'] assert node['a']['p1'].text == '' assert node['a']['p1'].node_type == 'extract' assert node['a']['p1'].child_labels == ['p1', 'p2', 'p3'] for child in node['a']['p1'].children: assert child.node_type == 'regtext' assert node['a']['p1']['p1'].text == "1. Some content" assert node['a']['p1']['p2'].text == "2. Other content" assert node['a']['p1']['p3'].text == ( "(3) This paragraph has parens for some reason")
def test_build_from_section_double_collapsed(): with section_ctx() as ctx: ctx.child_from_string( '<P>(a) <E T="03">Keyterm</E>—(1)(i) Content</P>') ctx.P("(ii) Content2") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a'] assert node['a'].child_labels == ['1'] assert node['a']['1'].child_labels == ['i', 'ii']
def test_build_from_section_double_collapsed(self): with self.section() as ctx: ctx.child_from_string( u'<P>(a) <E T="03">Keyterm</E>—(1)(i) Content</P>') ctx.P("(ii) Content2") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a'], node.child_labels) self.assertEqual(['1'], node['a'].child_labels) self.assertEqual(['i', 'ii'], node['a']['1'].child_labels)
def test_process_collapsed_keyterm(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE="HED") ctx.child_from_string('<P>(a) <E T="03">Keyterm</E> (1) Content</P>') appendix = appendices.AppendixProcessor(1111).process(ctx.xml) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['a'] assert appendix['a'].child_labels == ['1'] assert appendix['a']['1'].children == []
def _setup_for_ambiguous(self, final_par): with self.section() as ctx: ctx.P("(g) Some Content") ctx.P("(h) H Starts") ctx.P("(1) H-1") ctx.P("(2) H-2") ctx.P("(i) Is this 8675-309-h-2-i or 8675-309-i") ctx.P(final_par) node = reg_text.build_from_section('8675', ctx.xml)[0] return NodeAccessor(node)
def test_build_from_section_bad_spaces(): with section_ctx(section=16) as ctx: ctx.STARS() ctx.child_from_string( '<P>(b)<E T="03">General.</E>Content Content.</P>') node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.label == ['8675', '16'] assert node.child_labels == ['b'] assert node['b'].text == "(b) General. Content Content."
def test_build_from_section_intro_text(self): with self.section() as ctx: ctx.P("Some content about this section.") ctx.P("(a) something something") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual('Some content about this section.', node.text.strip()) self.assertEqual(['a'], node.child_labels) self.assertEqual('(a) something something', node['a'].text.strip()) self.assertEqual([], node['a'].children)
def test_build_from_section_intro_text(): with section_ctx() as ctx: ctx.P("Some content about this section.") ctx.P("(a) something something") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.text == 'Some content about this section.' assert node.child_labels == ['a'] assert node['a'].text == '(a) something something' assert node['a'].children == []
def test_build_from_section_bad_spaces(self): with self.section(section=16) as ctx: ctx.STARS() ctx.child_from_string( '<P>(b)<E T="03">General.</E>Content Content.</P>') node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['8675', '16'], node.label) self.assertEqual(['b'], node.child_labels) self.assertEqual(node['b'].text.strip(), "(b) General. Content Content.")
def test_build_from_section_collapsed(self): with self.section() as ctx: ctx.P("(a) aaa") ctx.P("(1) 111") ctx.child_from_string(u'<P>(2) 222—(i) iii. (A) AAA</P>') ctx.P("(B) BBB") n309 = reg_text.build_from_section('8675', ctx.xml)[0] n309 = NodeAccessor(n309) self.assertEqual(['a'], n309.child_labels) self.assertEqual(['1', '2'], n309['a'].child_labels) self.assertEqual(['i'], n309['a']['2'].child_labels) self.assertEqual(['A', 'B'], n309['a']['2']['i'].child_labels)
def test_build_from_section_collapsed(): with section_ctx() as ctx: ctx.P("(a) aaa") ctx.P("(1) 111") ctx.child_from_string('<P>(2) 222—(i) iii. (A) AAA</P>') ctx.P("(B) BBB") n309 = section.build_from_section('8675', ctx.xml)[0] n309 = NodeAccessor(n309) assert n309.child_labels == ['a'] assert n309['a'].child_labels == ['1', '2'] assert n309['a']['2'].child_labels == ['i'] assert n309['a']['2']['i'].child_labels == ['A', 'B']
def test_build_from_section_collapsed_level(): with section_ctx() as ctx: ctx.child_from_string( '<P>(a) <E T="03">Transfers </E>—(1) <E T="03">Notice.</E> ' 'follow</P>') ctx.P("(2) More text") ctx.child_from_string('<P>(b) <E T="03">Contents</E> (1) Here</P>') ctx.P("(2) More text") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a', 'b'] assert node['a'].child_labels == ['1', '2'] assert node['b'].child_labels == ['1', '2']
def test_build_from_section_collapsed_level(self): with self.section() as ctx: ctx.child_from_string( u'<P>(a) <E T="03">Transfers </E>—(1) <E T="03">Notice.</E> ' u'follow</P>') ctx.P("(2) More text") ctx.child_from_string('<P>(b) <E T="03">Contents</E> (1) Here</P>') ctx.P("(2) More text") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a', 'b'], node.child_labels) self.assertEqual(['1', '2'], node['a'].child_labels) self.assertEqual(['1', '2'], node['b'].child_labels)
def test_build_from_section_collapsed_level_emph(): with section_ctx() as ctx: ctx.P("(a) aaaa") ctx.P("(1) 1111") ctx.P("(i) iiii") ctx.child_from_string('<P>(A) AAA—(<E T="03">1</E>) eeee</P>') ctx.STARS() node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) a1ia = node['a']['1']['i']['A'] assert a1ia.text == "(A) AAA—" assert a1ia.child_labels == ['1'] assert a1ia['1'].text == "(1) eeee"
def test_build_from_section_fp(): with section_ctx() as ctx: ctx.P("(a) aaa") ctx.P("(b) bbb") ctx.FP("fpfpfp") ctx.P("(c) ccc") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a', 'b', 'c'] assert node['a'].child_labels == [] assert node['b'].child_labels == ['p1'] assert node['b']['p1'].child_labels == [] assert node['c'].child_labels == []
def test_build_from_section_collapsed_level_emph(self): with self.section() as ctx: ctx.P("(a) aaaa") ctx.P("(1) 1111") ctx.P("(i) iiii") ctx.child_from_string(u'<P>(A) AAA—(<E T="03">1</E>) eeee</P>') ctx.STARS() node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) a1iA = node['a']['1']['i']['A'] self.assertEqual(u"(A) AAA—", a1iA.text) self.assertEqual(['1'], a1iA.child_labels) self.assertEqual("(1) eeee", a1iA['1'].text.strip())
def test_build_from_section_fp(self): with self.section() as ctx: ctx.P("(a) aaa") ctx.P("(b) bbb") ctx.FP("fpfpfp") ctx.P("(c) ccc") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a', 'b', 'c'], node.child_labels) self.assertEqual([], node['a'].child_labels) self.assertEqual(['p1'], node['b'].child_labels) self.assertEqual([], node['b']['p1'].child_labels) self.assertEqual([], node['c'].child_labels)
def test_build_from_section_notes(): """Account for paragraphs within a NOTES tag""" with section_ctx() as ctx: ctx.P("(a) aaaa") with ctx.NOTES(): ctx.PRTPAGE(P="8") ctx.P("1. Some content") ctx.P("2. Other content") node = NodeAccessor(section.build_from_section('8675', ctx.xml)[0]) assert node.child_labels == ['a'] assert node['a'].child_labels == ['p1'] assert node['a']['p1'].node_type == Node.NOTE assert node['a']['p1'].child_labels == ['1', '2']
def test_process_collapsed(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE="HED") ctx.HD("Part I - Something", SOURCE="HD1") ctx.P(u"(a) Something referencing § 999.2(a)(1). (1) Content") ctx.P("(2) Something else") appendix = appendices.AppendixProcessor(1111).process(ctx.xml) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['I'] assert appendix['I'].child_labels == ['a'] assert appendix['I']['a'].child_labels == ['1', '2'] assert appendix['I']['a']['1'].text == '(1) Content' assert appendix['I']['a']['2'].text == '(2) Something else'
def test_process_header_depth(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE="HED") ctx.HD("Title 1", SOURCE="HD1") ctx.P("(1) Content 1") ctx.P("(2) Content 2") ctx.HD("Title 2", SOURCE="HD1") ctx.P("A. Content") appendix = appendices.AppendixProcessor(1111).process(ctx.xml) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['h1', 'h2'] assert appendix['h1'].child_labels == ['1', '2'] assert appendix['h2'].child_labels == ['A']
def test_build_from_section_image(self): """We should process images (GPH/GID)""" with XMLBuilder("SECTION", "\n\n") as ctx: ctx.SECTNO(u"§ 8675.309") ctx.SUBJECT("subsubsub") ctx.P("(a) aaa") with ctx.GPH(): ctx.GID("a-gid") ctx.P("(b) bbb") node = NodeAccessor(reg_text.build_from_section('8675', ctx.xml)[0]) self.assertEqual(['a', 'b'], node.child_labels) self.assertEqual(['p1'], node['a'].child_labels) self.assertEqual('![](a-gid)', node['a']['p1'].text)
def test_build_from_section_notes(self): """Account for paragraphs within a NOTES tag""" with self.section() as ctx: ctx.P("(a) aaaa") with ctx.NOTES(): ctx.PRTPAGE(P="8") ctx.P("1. Some content") ctx.P("2. Other content") node = NodeAccessor(reg_text.build_from_section('8675', ctx.xml)[0]) self.assertEqual(['a'], node.child_labels) self.assertEqual(['p1'], node['a'].child_labels) self.assertEqual(Node.NOTE, node['a']['p1'].node_type) self.assertEqual(['1', '2'], node['a']['p1'].child_labels)
def test_build_from_section_italic_levels(): with section_ctx() as ctx: ctx.P("(a) aaa") ctx.P("(1) 111") ctx.P("(i) iii") ctx.P("(A) AAA") ctx.child_from_string('<P>(<E T="03">1</E>) i1i1i1</P>') ctx.child_from_string('<P>\n(<E T="03">2</E>) i2i2i2</P>') node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a'] assert node['a'].child_labels == ['1'] assert node['a']['1'].child_labels == ['i'] assert node['a']['1']['i'].child_labels == ['A'] assert node['a']['1']['i']['A'].child_labels == ['1', '2']
def test_build_from_section_italic_levels(self): with self.section() as ctx: ctx.P("(a) aaa") ctx.P("(1) 111") ctx.P("(i) iii") ctx.P("(A) AAA") ctx.child_from_string('<P>(<E T="03">1</E>) i1i1i1</P>') ctx.child_from_string('<P>\n(<E T="03">2</E>) i2i2i2</P>') node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a'], node.child_labels) self.assertEqual(['1'], node['a'].child_labels) self.assertEqual(['i'], node['a']['1'].child_labels) self.assertEqual(['A'], node['a']['1']['i'].child_labels) self.assertEqual(['1', '2'], node['a']['1']['i']['A'].child_labels)
def test_process_separated_by_header(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR('Pt. 1111, App. A') ctx.HD('Appendix A to Part 1111-Awesome', SOURCE='HED') ctx.P('(a) aaaaaa') ctx.P('(1) 111111') ctx.HD('Random Header', SOURCE='HD1') ctx.P('(2) 222222') ctx.P('Markerless') appendix = appendices.AppendixProcessor(1111).process(ctx.xml) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['a'] assert appendix['a'].child_labels == ['1', '2', 'p1'] assert appendix['a']['1'].child_labels == ['h1'] assert appendix['a']['2'].children == [] assert appendix['a']['p1'].children == []
def test_process_markerless_collapsed(): """Should be able to find collapsed markers in a markerless paragraph""" with XMLBuilder("ROOT") as ctx: ctx.P("Intro text") ctx.child_from_string( '<P><E T="03">Some term.</E> (a) First definition</P>') ctx.P("(b) Second definition") root = Node(label=['111', '22']) root = section.RegtextParagraphProcessor().process(ctx.xml, root) root = NodeAccessor(root) assert root.label == ['111', '22'] assert len(root.children) == 2 assert all(c.is_markerless for c in root.children) keyterm_label = root.child_labels[1] assert len(keyterm_label) > 5 assert root[keyterm_label].child_labels == ['a', 'b']
def test_header_ordering(): with XMLBuilder('APPENDIX') as ctx: ctx.EAR("Pt. 1111, App. A") ctx.HD("Appendix A to Part 1111-Awesome", SOURCE='HED') ctx.HD("A-1 Content", SOURCE='HD1') ctx.HD("Level 1", SOURCE='HD3') ctx.HD("Level 2", SOURCE='HD2') # Note HD3 then HD2 ctx.P("Paragraph") ctx.HD("A-1(A) More Content", SOURCE='HD1') ctx.P("A1A Paragraph") appendix = appendices.process_appendix(ctx.xml, 1111) appendix = NodeAccessor(appendix) assert appendix.child_labels == ['1', '1(A)'] assert appendix['1'].child_labels == ['h1'] assert appendix['1']['h1'].child_labels == ['h2'] assert appendix['1']['h1']['h2'].child_labels == ['p1'] assert appendix['1']['h1']['h2']['p1'].children == []