def test_build_whole_regtree_missing_interp(self): """Not all regs have an interpretation section.""" text = "PART 200-Regulation Q\n" text += u"§ 200.1 First section.\n" text += "Section content\n" text += "Appendix A to Part 200 - Appendix Title\n" text += "Appendix content" node200_1 = Node("\nSection content\n", label=['200', '1'], title=u"§ 200.1 First section.", children=[], node_type=Node.REGTEXT) nodeA = Node("\nAppendix content", label=["200", "A"], title="Appendix A to Part 200 - Appendix Title", children=[], node_type=Node.APPENDIX) nodeEP = Node('', label=['200', 'Subpart'], title='', children=[node200_1], node_type=Node.EMPTYPART) res = build_whole_regtree(text) # Convert to JSON so we can ignore some unicode issues enc = NodeEncoder(sort_keys=True) self.assertEqual( enc.encode(build_whole_regtree(text)), enc.encode(Node("\n", label=["200"], title="PART 200-Regulation Q", children=[nodeEP, nodeA])) )
def test_build_whole_regtree_missing_interp(self): """Not all regs have an interpretation section.""" text = "PART 200-Regulation Q\n" text += u"§ 200.1 First section.\n" text += "Section content\n" text += "Appendix A to Part 200 - Appendix Title\n" text += "Appendix content" node200_1 = Node("\nSection content\n", label=['200', '1'], title=u"§ 200.1 First section.", children=[], node_type=Node.REGTEXT) nodeA = Node("\nAppendix content", label=["200", "A"], title="Appendix A to Part 200 - Appendix Title", children=[], node_type=Node.APPENDIX) nodeEP = Node('', label=['200', 'Subpart'], title='', children=[node200_1], node_type=Node.EMPTYPART) res = build_whole_regtree(text) # Convert to JSON so we can ignore some unicode issues enc = NodeEncoder(sort_keys=True) self.assertEqual( enc.encode(build_whole_regtree(text)), enc.encode( Node("\n", label=["200"], title="PART 200-Regulation Q", children=[nodeEP, nodeA])))
def test_build_whole_regtree(self): """Integration test for the plain-text regulation tree parser""" text = "PART 200-Regulation Q\n" text += u"§ 200.1 First section.\n" text += "(a) First par\n" text += "(b) Second par\n" text += u"§ 200.2 Second section.\n" text += "Content without sub pars\n" text += "Appendix A to Part 200 - Appendix Title\n" text += "A-1 Appendix 1\n" text += "(a) Appendix par 1\n" text += "Supplement I to Part 200 - Official Interpretations\n" text += "Section 200.2 Second section\n" text += "2(a)(5) First par\n" text += "1. Commentary 1\n" text += "2. Commentary 2\n" node201 = Node("\n", label=['200', '1'], title=u"§ 200.1 First section.", children=[ Node(u"(a) First par\n", label=["200", "1", "a"]), Node(u"(b) Second par\n", label=["200", "1", "b"]) ]) node202 = Node("\nContent without sub pars\n", label=["200", "2"], title=u"§ 200.2 Second section.") nodeA = Node("\n", label=["200", "A"], node_type=Node.APPENDIX, title="Appendix A to Part 200 - Appendix Title", children=[ Node("\n", label=["200", "A", "1"], title="A-1 Appendix 1", node_type=Node.APPENDIX, children=[ Node("(a) Appendix par 1\n", node_type=Node.APPENDIX, label=["200", "A", "1", "a"]) ]) ]) nodeI1 = Node('1. Commentary 1\n', node_type=Node.INTERP, label=['200', '2', 'a', '5', Node.INTERP_MARK, '1']) nodeI2 = Node('2. Commentary 2\n', node_type=Node.INTERP, label=['200', '2', 'a', '5', Node.INTERP_MARK, '2']) nodeI = Node( '\n', label=['200', Node.INTERP_MARK], node_type=Node.INTERP, title='Supplement I to Part 200 - Official Interpretations', children=[ Node('\n', label=['200', '2', Node.INTERP_MARK], node_type=Node.INTERP, title='Section 200.2 Second section', children=[ Node('\n', label=['200', '2', 'a', '5', Node.INTERP_MARK], node_type=Node.INTERP, title='2(a)(5) First par', children=[nodeI1, nodeI2]) ]) ]) nodeEP = Node('', label=['200', 'Subpart'], title='', children=[node201, node202], node_type=Node.EMPTYPART) res = build_whole_regtree(text) # Convert to JSON so we can ignore some unicode issues enc = NodeEncoder(sort_keys=True) self.assertEqual( enc.encode(build_whole_regtree(text)), enc.encode( Node("\n", label=["200"], title="PART 200-Regulation Q", children=[nodeEP, nodeA, nodeI])))
def test_build_whole_regtree(self): """Integration test for the plain-text regulation tree parser""" text = "PART 200-Regulation Q\n" text += u"§ 200.1 First section.\n" text += "(a) First par\n" text += "(b) Second par\n" text += u"§ 200.2 Second section.\n" text += "Content without sub pars\n" text += "Appendix A to Part 200 - Appendix Title\n" text += "A-1 Appendix 1\n" text += "(a) Appendix par 1\n" text += "Supplement I to Part 200 - Official Interpretations\n" text += "Section 200.2 Second section\n" text += "2(a)(5) First par\n" text += "1. Commentary 1\n" text += "2. Commentary 2\n" node201 = Node("\n", label=['200', '1'], title=u"§ 200.1 First section.", children=[ Node(u"(a) First par\n", label=["200", "1", "a"]), Node(u"(b) Second par\n", label=["200", "1", "b"]) ]) node202 = Node("\nContent without sub pars\n", label=["200", "2"], title=u"§ 200.2 Second section.") nodeA = Node( "\n", label=["200", "A"], node_type=Node.APPENDIX, title="Appendix A to Part 200 - Appendix Title", children=[ Node("\n", label=["200", "A", "1"], title="A-1 Appendix 1", node_type=Node.APPENDIX, children=[Node("(a) Appendix par 1\n", node_type=Node.APPENDIX, label=["200", "A", "1", "a"])]) ] ) nodeI1 = Node('1. Commentary 1\n', node_type=Node.INTERP, label=['200', '2', 'a', '5', Node.INTERP_MARK, '1']) nodeI2 = Node('2. Commentary 2\n', node_type=Node.INTERP, label=['200', '2', 'a', '5', Node.INTERP_MARK, '2']) nodeI = Node( '\n', label=['200', Node.INTERP_MARK], node_type=Node.INTERP, title='Supplement I to Part 200 - Official Interpretations', children=[ Node('\n', label=['200', '2', Node.INTERP_MARK], node_type=Node.INTERP, title='Section 200.2 Second section', children=[ Node('\n', label=['200', '2', 'a', '5', Node.INTERP_MARK], node_type=Node.INTERP, title='2(a)(5) First par', children=[nodeI1, nodeI2]) ]) ] ) nodeEP = Node('', label=['200', 'Subpart'], title='', children=[node201, node202], node_type=Node.EMPTYPART) res = build_whole_regtree(text) # Convert to JSON so we can ignore some unicode issues enc = NodeEncoder(sort_keys=True) self.assertEqual( enc.encode(build_whole_regtree(text)), enc.encode(Node("\n", label=["200"], title="PART 200-Regulation Q", children=[nodeEP, nodeA, nodeI])) )
def generate_key_terms(reg): """ Generate the key terms layer """ layer_generator = key_terms.KeyTerms(reg) layer_generator.build() print NodeEncoder().encode(layer_generator.build())
from regparser.tree.struct import NodeEncoder from regparser.tree.xml_parser import reg_text reg_xml_file = '/vagrant/data/regulations/regulation/rege-2011-31725.xml' reg_xml = open(reg_xml_file, 'r').read() tree = reg_text.build_tree(reg_xml) print NodeEncoder().encode(tree)
def generate_interpretations(reg): """ Generate the Interpretations layer """ layer_generator = interpretations.Interpretations(reg) print NodeEncoder().encode(layer_generator.build())
def generate_terms(reg): """ Generate the Terms layer """ layer_generator = terms.Terms(reg) print NodeEncoder().encode(layer_generator.build())
def generate_table_of_contents(reg_json): """ Generate the Table of Contents layer """ layer_generator = table_of_contents.TableOfContentsLayer(reg_json) toc = layer_generator.build() print NodeEncoder().encode(toc)
def generate_internal_citations(reg_json): """ Generate the internal ciations layer. """ layer_generator = internal_citations.InternalCitationParser(reg_json) layer = layer_generator.build() print NodeEncoder().encode(layer)
def generate_external_citations(reg_json): """ Generate the enxternal citations layer """ layer_generator = external_citations.ExternalCitationParser(reg_json) layer = layer_generator.build() print NodeEncoder().encode(layer)
import sys from lxml import etree from regparser.tree.struct import NodeEncoder from regparser.notice import find_section_by_section, fetch_document_number from regparser.notice import build_section_by_section, fetch_cfr_part if __name__ == "__main__": if len(sys.argv) < 2: print "Usage: python generate_notice path/to/rule.xml" print " e.g.: python generate_notice 28.xml" exit() rule = etree.parse(sys.argv[1]) part = fetch_cfr_part(rule) sxs = find_section_by_section(rule) sxs = build_section_by_section(sxs, part) print NodeEncoder().encode({ 'document_number': fetch_document_number(rule), 'cfr_part': part, 'section_by_section': sxs })
import codecs import sys from regparser.tree.appendix.tree import trees_from as appendix_trees from regparser.tree.interpretation import build as build_interp_tree from regparser.tree.reg_text import build_reg_text_tree from regparser.tree.struct import NodeEncoder from regparser.tree.supplement import find_supplement_start if __name__ == "__main__": if len(sys.argv) < 3: print "Usage: python generate_tree.py path/to/reg.txt part" print " e.g.: python generate_tree.py rege.txt 1005" exit() with codecs.open(sys.argv[1], encoding='utf-8') as f: reg = unicode(f.read()) interp = reg[find_supplement_start(reg):] part = int(sys.argv[2]) reg_tree = build_reg_text_tree(reg, part) interp_tree = build_interp_tree(interp, part) appendix_trees = appendix_trees(reg, part, reg_tree.label) reg_tree.children.extend(appendix_trees) reg_tree.children.append(interp_tree) print NodeEncoder().encode(reg_tree)