def test_build_whole_regtree_missing_interp(self):
        """Not all regs have an interpretation section."""
        text = "PART 200-Regulation Q\n"
        text += u"§ 200.1 First section.\n"
        text += "Section content\n"
        text += "Appendix A to Part 200 - Appendix Title\n"
        text += "Appendix content"

        node200_1 = Node("\nSection content\n", label=['200', '1'],
                         title=u"§ 200.1 First section.", children=[],
                         node_type=Node.REGTEXT)
        nodeA = Node("\nAppendix content", label=["200", "A"],
                     title="Appendix A to Part 200 - Appendix Title",
                     children=[], node_type=Node.APPENDIX)
        nodeEP = Node('', label=['200', 'Subpart'], title='',
                      children=[node200_1], node_type=Node.EMPTYPART)

        res = build_whole_regtree(text)
        #   Convert to JSON so we can ignore some unicode issues
        enc = NodeEncoder(sort_keys=True)
        self.assertEqual(
            enc.encode(build_whole_regtree(text)),
            enc.encode(Node("\n", label=["200"], title="PART 200-Regulation Q",
                            children=[nodeEP, nodeA]))
        )
    def test_build_whole_regtree_missing_interp(self):
        """Not all regs have an interpretation section."""
        text = "PART 200-Regulation Q\n"
        text += u"§ 200.1 First section.\n"
        text += "Section content\n"
        text += "Appendix A to Part 200 - Appendix Title\n"
        text += "Appendix content"

        node200_1 = Node("\nSection content\n",
                         label=['200', '1'],
                         title=u"§ 200.1 First section.",
                         children=[],
                         node_type=Node.REGTEXT)
        nodeA = Node("\nAppendix content",
                     label=["200", "A"],
                     title="Appendix A to Part 200 - Appendix Title",
                     children=[],
                     node_type=Node.APPENDIX)
        nodeEP = Node('',
                      label=['200', 'Subpart'],
                      title='',
                      children=[node200_1],
                      node_type=Node.EMPTYPART)

        res = build_whole_regtree(text)
        #   Convert to JSON so we can ignore some unicode issues
        enc = NodeEncoder(sort_keys=True)
        self.assertEqual(
            enc.encode(build_whole_regtree(text)),
            enc.encode(
                Node("\n",
                     label=["200"],
                     title="PART 200-Regulation Q",
                     children=[nodeEP, nodeA])))
    def test_build_whole_regtree(self):
        """Integration test for the plain-text regulation tree parser"""
        text = "PART 200-Regulation Q\n"
        text += u"§ 200.1 First section.\n"
        text += "(a) First par\n"
        text += "(b) Second par\n"
        text += u"§ 200.2 Second section.\n"
        text += "Content without sub pars\n"
        text += "Appendix A to Part 200 - Appendix Title\n"
        text += "A-1 Appendix 1\n"
        text += "(a) Appendix par 1\n"
        text += "Supplement I to Part 200 - Official Interpretations\n"
        text += "Section 200.2 Second section\n"
        text += "2(a)(5) First par\n"
        text += "1. Commentary 1\n"
        text += "2. Commentary 2\n"

        node201 = Node("\n",
                       label=['200', '1'],
                       title=u"§ 200.1 First section.",
                       children=[
                           Node(u"(a) First par\n", label=["200", "1", "a"]),
                           Node(u"(b) Second par\n", label=["200", "1", "b"])
                       ])
        node202 = Node("\nContent without sub pars\n",
                       label=["200", "2"],
                       title=u"§ 200.2 Second section.")
        nodeA = Node("\n",
                     label=["200", "A"],
                     node_type=Node.APPENDIX,
                     title="Appendix A to Part 200 - Appendix Title",
                     children=[
                         Node("\n",
                              label=["200", "A", "1"],
                              title="A-1 Appendix 1",
                              node_type=Node.APPENDIX,
                              children=[
                                  Node("(a) Appendix par 1\n",
                                       node_type=Node.APPENDIX,
                                       label=["200", "A", "1", "a"])
                              ])
                     ])
        nodeI1 = Node('1. Commentary 1\n',
                      node_type=Node.INTERP,
                      label=['200', '2', 'a', '5', Node.INTERP_MARK, '1'])
        nodeI2 = Node('2. Commentary 2\n',
                      node_type=Node.INTERP,
                      label=['200', '2', 'a', '5', Node.INTERP_MARK, '2'])
        nodeI = Node(
            '\n',
            label=['200', Node.INTERP_MARK],
            node_type=Node.INTERP,
            title='Supplement I to Part 200 - Official Interpretations',
            children=[
                Node('\n',
                     label=['200', '2', Node.INTERP_MARK],
                     node_type=Node.INTERP,
                     title='Section 200.2 Second section',
                     children=[
                         Node('\n',
                              label=['200', '2', 'a', '5', Node.INTERP_MARK],
                              node_type=Node.INTERP,
                              title='2(a)(5) First par',
                              children=[nodeI1, nodeI2])
                     ])
            ])
        nodeEP = Node('',
                      label=['200', 'Subpart'],
                      title='',
                      children=[node201, node202],
                      node_type=Node.EMPTYPART)

        res = build_whole_regtree(text)
        #   Convert to JSON so we can ignore some unicode issues
        enc = NodeEncoder(sort_keys=True)
        self.assertEqual(
            enc.encode(build_whole_regtree(text)),
            enc.encode(
                Node("\n",
                     label=["200"],
                     title="PART 200-Regulation Q",
                     children=[nodeEP, nodeA, nodeI])))
    def test_build_whole_regtree(self):
        """Integration test for the plain-text regulation tree parser"""
        text = "PART 200-Regulation Q\n"
        text += u"§ 200.1 First section.\n"
        text += "(a) First par\n"
        text += "(b) Second par\n"
        text += u"§ 200.2 Second section.\n"
        text += "Content without sub pars\n"
        text += "Appendix A to Part 200 - Appendix Title\n"
        text += "A-1 Appendix 1\n"
        text += "(a) Appendix par 1\n"
        text += "Supplement I to Part 200 - Official Interpretations\n"
        text += "Section 200.2 Second section\n"
        text += "2(a)(5) First par\n"
        text += "1. Commentary 1\n"
        text += "2. Commentary 2\n"

        node201 = Node("\n", label=['200', '1'],
                       title=u"§ 200.1 First section.", children=[
                           Node(u"(a) First par\n", label=["200", "1", "a"]),
                           Node(u"(b) Second par\n", label=["200", "1", "b"])
                       ])
        node202 = Node("\nContent without sub pars\n", label=["200", "2"],
                       title=u"§ 200.2 Second section.")
        nodeA = Node(
            "\n", label=["200", "A"], node_type=Node.APPENDIX,
            title="Appendix A to Part 200 - Appendix Title", children=[
                Node("\n",
                     label=["200", "A", "1"],
                     title="A-1 Appendix 1",
                     node_type=Node.APPENDIX,
                     children=[Node("(a) Appendix par 1\n",
                                    node_type=Node.APPENDIX,
                                    label=["200", "A", "1", "a"])])
            ]
        )
        nodeI1 = Node('1. Commentary 1\n', node_type=Node.INTERP,
                      label=['200', '2', 'a', '5', Node.INTERP_MARK, '1'])
        nodeI2 = Node('2. Commentary 2\n', node_type=Node.INTERP,
                      label=['200', '2', 'a', '5', Node.INTERP_MARK, '2'])
        nodeI = Node(
            '\n', label=['200', Node.INTERP_MARK], node_type=Node.INTERP,
            title='Supplement I to Part 200 - Official Interpretations',
            children=[
                Node('\n', label=['200', '2', Node.INTERP_MARK],
                     node_type=Node.INTERP,
                     title='Section 200.2 Second section',
                     children=[
                         Node('\n', label=['200', '2', 'a', '5',
                                           Node.INTERP_MARK],
                              node_type=Node.INTERP, title='2(a)(5) First par',
                              children=[nodeI1, nodeI2])
                     ])
            ]
        )
        nodeEP = Node('', label=['200', 'Subpart'], title='',
                      children=[node201, node202], node_type=Node.EMPTYPART)

        res = build_whole_regtree(text)
        #   Convert to JSON so we can ignore some unicode issues
        enc = NodeEncoder(sort_keys=True)
        self.assertEqual(
            enc.encode(build_whole_regtree(text)),
            enc.encode(Node("\n", label=["200"], title="PART 200-Regulation Q",
                            children=[nodeEP, nodeA, nodeI]))
        )
Example #5
0
def generate_key_terms(reg):
    """ Generate the key terms layer """
    layer_generator = key_terms.KeyTerms(reg)
    layer_generator.build()
    print NodeEncoder().encode(layer_generator.build())
from regparser.tree.struct import NodeEncoder
from regparser.tree.xml_parser import reg_text

reg_xml_file = '/vagrant/data/regulations/regulation/rege-2011-31725.xml'
reg_xml = open(reg_xml_file, 'r').read()

tree = reg_text.build_tree(reg_xml)
print NodeEncoder().encode(tree)
Example #7
0
def generate_interpretations(reg):
    """ Generate the Interpretations layer """
    layer_generator = interpretations.Interpretations(reg)
    print NodeEncoder().encode(layer_generator.build())
Example #8
0
def generate_terms(reg):
    """ Generate the Terms layer """
    layer_generator = terms.Terms(reg)
    print NodeEncoder().encode(layer_generator.build())
Example #9
0
def generate_table_of_contents(reg_json):
    """ Generate the Table of Contents layer """

    layer_generator = table_of_contents.TableOfContentsLayer(reg_json)
    toc = layer_generator.build()
    print NodeEncoder().encode(toc)
Example #10
0
def generate_internal_citations(reg_json):
    """ Generate the internal ciations layer. """
    layer_generator = internal_citations.InternalCitationParser(reg_json)
    layer = layer_generator.build()
    print NodeEncoder().encode(layer)
Example #11
0
def generate_external_citations(reg_json):
    """ Generate the enxternal citations layer """

    layer_generator = external_citations.ExternalCitationParser(reg_json)
    layer = layer_generator.build()
    print NodeEncoder().encode(layer)
import sys

from lxml import etree

from regparser.tree.struct import NodeEncoder
from regparser.notice import find_section_by_section, fetch_document_number
from regparser.notice import build_section_by_section, fetch_cfr_part

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print "Usage: python generate_notice path/to/rule.xml"
        print " e.g.: python generate_notice 28.xml"
        exit()
    rule = etree.parse(sys.argv[1])

    part = fetch_cfr_part(rule)

    sxs = find_section_by_section(rule)
    sxs = build_section_by_section(sxs, part)
    print NodeEncoder().encode({
        'document_number': fetch_document_number(rule),
        'cfr_part': part,
        'section_by_section': sxs
    })
import codecs
import sys

from regparser.tree.appendix.tree import trees_from as appendix_trees
from regparser.tree.interpretation import build as build_interp_tree
from regparser.tree.reg_text import build_reg_text_tree
from regparser.tree.struct import NodeEncoder
from regparser.tree.supplement import find_supplement_start

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print "Usage: python generate_tree.py path/to/reg.txt part"
        print " e.g.: python generate_tree.py rege.txt 1005"
        exit()
    with codecs.open(sys.argv[1], encoding='utf-8') as f:
        reg = unicode(f.read())

    interp = reg[find_supplement_start(reg):]

    part = int(sys.argv[2])
    reg_tree = build_reg_text_tree(reg, part)
    interp_tree = build_interp_tree(interp, part)
    appendix_trees = appendix_trees(reg, part, reg_tree.label)

    reg_tree.children.extend(appendix_trees)
    reg_tree.children.append(interp_tree)

    print NodeEncoder().encode(reg_tree)