コード例 #1
0
    def test_build_hash(self):
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 Best Section"
        sect1 = u"(a) I believe this is (b) the (1) best section "
        sect1 += "(2) don't (c) you?"
        subpart_a = u"Subpart A—First subpart"
        sect2_title = u"§ 204.2 Second Best Section"
        sect2 = u"Some sections \ndon't have must \ndepth at all."
        subpart_b = u"Subpart B—First subpart"
        sect4_title = u"§ 204.4 I Skipped One"
        sect4 = u"Others \n(a) Skip sections for (1) No \n(2) Apparent \n"
        sect4 += "(3) Reason"

        text = "\n".join(
            (title, sect1_title, sect1, subpart_a, sect2_title, sect2,
             subpart_b, sect4_title, sect4))
        reg = reg_text.build_reg_text_tree(text, 204)

        tree_hash = treediff.hash_nodes(reg)
        keys = tree_hash.keys()
        keys.sort()
        self.assertEquals(
            ['204', '204-1', '204-1-a', '204-1-b',
             '204-1-b-1', '204-1-b-2', '204-1-c', '204-2', '204-4',
             '204-4-a', '204-4-a-1', '204-4-a-2', '204-4-a-3', '204-Subpart',
             '204-Subpart-A', '204-Subpart-B'], keys)
コード例 #2
0
    def test_build_hash(self):
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 Best Section"
        sect1 = u"(a) I believe this is (b) the (1) best section "
        sect1 += "(2) don't (c) you?"
        subpart_a = u"Subpart A—First subpart"
        sect2_title = u"§ 204.2 Second Best Section"
        sect2 = u"Some sections \ndon't have must \ndepth at all."
        subpart_b = u"Subpart B—First subpart"
        sect4_title = u"§ 204.4 I Skipped One"
        sect4 = u"Others \n(a) Skip sections for (1) No \n(2) Apparent \n"
        sect4 += "(3) Reason"

        text = "\n".join((title, sect1_title, sect1, subpart_a, sect2_title,
                          sect2, subpart_b, sect4_title, sect4))
        reg = reg_text.build_reg_text_tree(text, 204)

        tree_hash = treediff.hash_nodes(reg)
        keys = tree_hash.keys()
        keys.sort()
        self.assertEquals([
            '204', '204-1', '204-1-a', '204-1-b', '204-1-b-1', '204-1-b-2',
            '204-1-c', '204-2', '204-4', '204-4-a', '204-4-a-1', '204-4-a-2',
            '204-4-a-3', '204-Subpart', '204-Subpart-A', '204-Subpart-B'
        ], keys)
コード例 #3
0
    def test_subparts(self):
        """ Create a tree with no subparts, then add subparts. """
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 First Section"
        sect1 = u"(a) I believe this is (b) the best section "
        sect2_title = u"§ 204.2 Second Section"
        sect2 = u"Some sections \ndon't have \ndepth at all."

        old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2])
        older = reg_text.build_reg_text_tree(old_text, 204)

        ntitle = u"Regulation Title"
        nsubpart_a = u"Subpart A—First subpart"
        nsect1_title = u"§ 204.1 First Section"
        nsect1 = u"(a) I believe this is (b) the best section "
        nsubpart_b = u"Subpart B—Second subpart"
        nsect2_title = u"§ 204.2 Second Section"
        nsect2 = u"Some sections \ndon't have \ndepth at all."

        new_text = "\n".join([
            ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title,
            nsect2
        ])
        newer = reg_text.build_reg_text_tree(new_text, 204)

        result = dict(
            difftree.changes_between(FrozenNode.from_node(older),
                                     FrozenNode.from_node(newer)))

        self.assertEquals(
            result['204-Subpart-A'], {
                "node": {
                    "text": u"",
                    "node_type": u"subpart",
                    "tagged_text": None,
                    "label": ("204", "Subpart", "A"),
                    "child_labels": ("204-1", ),
                    "title": u"First subpart"
                },
                "op": "added"
            })
        self.assertTrue('204-Subpart-B' in result)
        self.assertEquals(result['204-Subpart'], {"op": "deleted"})
        # Sections shouldn't have changed, though
        self.assertFalse('204-1' in result)
        self.assertFalse('204-2' in result)
コード例 #4
0
    def test_subparts(self):
        """ Create a tree with no subparts, then add subparts. """
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 First Section"
        sect1 = u"(a) I believe this is (b) the best section "
        sect2_title = u"§ 204.2 Second Section"
        sect2 = u"Some sections \ndon't have \ndepth at all."

        old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2])
        older = reg_text.build_reg_text_tree(old_text, 204)

        ntitle = u"Regulation Title"
        nsubpart_a = u"Subpart A—First subpart"
        nsect1_title = u"§ 204.1 First Section"
        nsect1 = u"(a) I believe this is (b) the best section "
        nsubpart_b = u"Subpart B—Second subpart"
        nsect2_title = u"§ 204.2 Second Section"
        nsect2 = u"Some sections \ndon't have \ndepth at all."

        new_text = "\n".join([ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title, nsect2])
        newer = reg_text.build_reg_text_tree(new_text, 204)

        result = dict(difftree.changes_between(FrozenNode.from_node(older), FrozenNode.from_node(newer)))

        self.assertEquals(
            result["204-Subpart-A"],
            {
                "node": {
                    "text": u"",
                    "node_type": u"subpart",
                    "tagged_text": None,
                    "label": ("204", "Subpart", "A"),
                    "child_labels": ("204-1",),
                    "title": u"First subpart",
                },
                "op": "added",
            },
        )
        self.assertTrue("204-Subpart-B" in result)
        self.assertEquals(result["204-Subpart"], {"op": "deleted"})
        # Sections shouldn't have changed, though
        self.assertFalse("204-1" in result)
        self.assertFalse("204-2" in result)
コード例 #5
0
    def test_subparts(self):
        """ Create a tree with no subparts, then add subparts. """
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 First Section"
        sect1 = u"(a) I believe this is (b) the best section "
        sect2_title = u"§ 204.2 Second Section"
        sect2 = u"Some sections \ndon't have \ndepth at all."

        old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2])
        older = reg_text.build_reg_text_tree(old_text, 204)

        ntitle = u"Regulation Title"
        nsubpart_a = u"Subpart A—First subpart"
        nsect1_title = u"§ 204.1 First Section"
        nsect1 = u"(a) I believe this is (b) the best section "
        nsubpart_b = u"Subpart B—Second subpart"
        nsect2_title = u"§ 204.2 Second Section"
        nsect2 = u"Some sections \ndon't have \ndepth at all."

        new_text = "\n".join([
            ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title,
            nsect2
        ])
        newer = reg_text.build_reg_text_tree(new_text, 204)

        comparer = treediff.Compare(older, newer)
        comparer.compare()

        self.assertEquals(
            comparer.changes['204-Subpart-A'], {
                "node": {
                    "text": "",
                    "node_type": "subpart",
                    "label": ["204", "Subpart", "A"],
                    "child_labels": ["204-1"],
                    "title": "First subpart"
                },
                "op": "added"
            })
        self.assertTrue('204-Subpart-B' in comparer.changes)
        self.assertEquals(comparer.changes['204-Subpart'], {"op": "deleted"})
コード例 #6
0
def build_whole_regtree(text):
    """Combine the output of numerous functions to get to a whole regulation
    tree."""
    part = find_cfr_part(text)
    reg_tree = build_reg_text_tree(text, part)
    appendices = appendix_trees(text, part, reg_tree.label)

    reg_tree.children.extend(appendices)
    supplement_start = find_supplement_start(text)
    if supplement_start is not None:
        interps = build_interp_tree(text[supplement_start:], part)
        reg_tree.children.append(interps)
    return reg_tree
コード例 #7
0
def build_whole_regtree(text):
    """Combine the output of numerous functions to get to a whole regulation
    tree."""
    part = find_cfr_part(text)
    reg_tree = build_reg_text_tree(text, part)
    appendices = appendix_trees(text, part, reg_tree.label)

    reg_tree.children.extend(appendices)
    supplement_start = find_supplement_start(text)
    if supplement_start is not None:
        interps = build_interp_tree(text[supplement_start:], part)
        reg_tree.children.append(interps)
    return reg_tree
コード例 #8
0
    def test_subparts(self):
        """ Create a tree with no subparts, then add subparts. """
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 First Section"
        sect1 = u"(a) I believe this is (b) the best section "
        sect2_title = u"§ 204.2 Second Section"
        sect2 = u"Some sections \ndon't have \ndepth at all."

        old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2])
        older = reg_text.build_reg_text_tree(old_text, 204)

        ntitle = u"Regulation Title"
        nsubpart_a = u"Subpart A—First subpart"
        nsect1_title = u"§ 204.1 First Section"
        nsect1 = u"(a) I believe this is (b) the best section "
        nsubpart_b = u"Subpart B—Second subpart"
        nsect2_title = u"§ 204.2 Second Section"
        nsect2 = u"Some sections \ndon't have \ndepth at all."

        new_text = "\n".join([
            ntitle, nsubpart_a, nsect1_title,
            nsect1, nsubpart_b, nsect2_title, nsect2])
        newer = reg_text.build_reg_text_tree(new_text, 204)

        comparer = treediff.Compare(older, newer)
        comparer.compare()

        self.assertEquals(
            comparer.changes['204-Subpart-A'],
            {"node": {
                "text": "", "node_type": "subpart",
                "label": ["204", "Subpart", "A"],
                "child_labels": ["204-1"],
                "title": "First subpart"},
                "op": "added"})
        self.assertTrue('204-Subpart-B' in comparer.changes)
        self.assertEquals(comparer.changes['204-Subpart'], {"op": "deleted"})
コード例 #9
0
    def test_build_reg_text_tree_sections(self):
        title = u"Regulation Title"
        subpart_a = u"Subpart A—First subpart"
        sect1_title = u"§ 204.1 Best Section"
        sect1 = u"(a) I believe this is (b) the (1) best section "
        sect1 += "(2) don't (c) you?"
        sect2_title = u"§ 204.2 Second Best Section"
        sect2 = u"Some sections \ndon't have must \ndepth at all."
        subpart_b = u"Subpart B—First subpart"
        sect4_title = u"§ 204.4 I Skipped One"
        sect4 = u"Others \n(a) Skip sections for (1) No \n(2) Apparent \n"
        sect4 += "(3) Reason"

        text = "\n".join((title, subpart_a, sect1_title, sect1, sect2_title,
                          sect2, subpart_b, sect4_title, sect4))

        reg = reg_text.build_reg_text_tree(text, 204)
        self.assertEqual(["204"], reg.label)
        self.assertEqual(title, reg.title)
        self.assertEqual("", reg.text.strip())
        self.assertEqual(2, len(reg.children))

        (subpart_a_tree, subpart_b_tree) = reg.children

        (sect1_tree, sect2_tree) = subpart_a_tree.children
        sect4_tree = subpart_b_tree.children[0]

        self.assertEqual(['204', '1'], sect1_tree.label)
        self.assertEqual(sect1_title, sect1_tree.title)
        self.assertEqual("", sect1_tree.text.strip())
        self.assertEqual(3, len(sect1_tree.children))
        self.assertEqual(0, len(sect1_tree.children[0].children))
        self.assertEqual(2, len(sect1_tree.children[1].children))
        self.assertEqual(0, len(sect1_tree.children[2].children))

        self.assertEqual(['204', '2'], sect2_tree.label)
        self.assertEqual(sect2_title, sect2_tree.title)
        self.assertEqual(sect2, sect2_tree.text.strip())
        self.assertEqual(0, len(sect2_tree.children))

        self.assertEqual(['204', '4'], sect4_tree.label)
        self.assertEqual(sect4_title, sect4_tree.title)
        self.assertEqual(u"Others", sect4_tree.text.strip())
        self.assertEqual(1, len(sect4_tree.children))
        self.assertEqual(3, len(sect4_tree.children[0].children))
コード例 #10
0
    def test_build_reg_text_empty_and_subpart(self):
        """ In some cases, we have a few sections before the first subpart. """
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 Best Section"
        sect1 = u"(a) I believe this is (b) the (1) best section "
        sect1 += "(2) don't (c) you?"
        subpart_a = u"Subpart A—First subpart"
        sect2_title = u"§ 204.2 Second Best Section"
        sect2 = u"Some sections \ndon't have must \ndepth at all."
        subpart_b = u"Subpart B—First subpart"
        sect4_title = u"§ 204.4 I Skipped One"
        sect4 = u"Others \n(a) Skip sections for (1) No \n(2) Apparent \n"
        sect4 += "(3) Reason"

        text = "\n".join((title, sect1_title, sect1, subpart_a, sect2_title,
                          sect2, subpart_b, sect4_title, sect4))
        reg = reg_text.build_reg_text_tree(text, 204)
        self.assertEqual(["204"], reg.label)
        self.assertEqual(title, reg.title)
        self.assertEqual("", reg.text.strip())
        self.assertEqual(3, len(reg.children))
コード例 #11
0
 def test_build_reg_text_tree_no_sections(self):
     text = "Regulation Title\nThen some more content"
     empty_part = Node('', [], ['201', 'Subpart'], '',
                       node_type=Node.EMPTYPART)
     self.assertEqual(Node(text, [empty_part], ['201'], 'Regulation Title'),
                      reg_text.build_reg_text_tree(text, 201))
コード例 #12
0
import codecs
import sys

from regparser.tree.appendix.tree import trees_from as appendix_trees
from regparser.tree.interpretation import build as build_interp_tree
from regparser.tree.reg_text import build_reg_text_tree
from regparser.tree.struct import NodeEncoder
from regparser.tree.supplement import find_supplement_start

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print "Usage: python generate_tree.py path/to/reg.txt part"
        print " e.g.: python generate_tree.py rege.txt 1005"
        exit()
    with codecs.open(sys.argv[1], encoding='utf-8') as f:
        reg = unicode(f.read())

    interp = reg[find_supplement_start(reg):]

    part = int(sys.argv[2])
    reg_tree = build_reg_text_tree(reg, part)
    interp_tree = build_interp_tree(interp, part)
    appendix_trees = appendix_trees(reg, part, reg_tree.label)

    reg_tree.children.extend(appendix_trees)
    reg_tree.children.append(interp_tree)

    print NodeEncoder().encode(reg_tree)