def test_build_hash(self): title = u"Regulation Title" sect1_title = u"§ 204.1 Best Section" sect1 = u"(a) I believe this is (b) the (1) best section " sect1 += "(2) don't (c) you?" subpart_a = u"Subpart A—First subpart" sect2_title = u"§ 204.2 Second Best Section" sect2 = u"Some sections \ndon't have must \ndepth at all." subpart_b = u"Subpart B—First subpart" sect4_title = u"§ 204.4 I Skipped One" sect4 = u"Others \n(a) Skip sections for (1) No \n(2) Apparent \n" sect4 += "(3) Reason" text = "\n".join( (title, sect1_title, sect1, subpart_a, sect2_title, sect2, subpart_b, sect4_title, sect4)) reg = reg_text.build_reg_text_tree(text, 204) tree_hash = treediff.hash_nodes(reg) keys = tree_hash.keys() keys.sort() self.assertEquals( ['204', '204-1', '204-1-a', '204-1-b', '204-1-b-1', '204-1-b-2', '204-1-c', '204-2', '204-4', '204-4-a', '204-4-a-1', '204-4-a-2', '204-4-a-3', '204-Subpart', '204-Subpart-A', '204-Subpart-B'], keys)
def test_build_hash(self): title = u"Regulation Title" sect1_title = u"§ 204.1 Best Section" sect1 = u"(a) I believe this is (b) the (1) best section " sect1 += "(2) don't (c) you?" subpart_a = u"Subpart A—First subpart" sect2_title = u"§ 204.2 Second Best Section" sect2 = u"Some sections \ndon't have must \ndepth at all." subpart_b = u"Subpart B—First subpart" sect4_title = u"§ 204.4 I Skipped One" sect4 = u"Others \n(a) Skip sections for (1) No \n(2) Apparent \n" sect4 += "(3) Reason" text = "\n".join((title, sect1_title, sect1, subpart_a, sect2_title, sect2, subpart_b, sect4_title, sect4)) reg = reg_text.build_reg_text_tree(text, 204) tree_hash = treediff.hash_nodes(reg) keys = tree_hash.keys() keys.sort() self.assertEquals([ '204', '204-1', '204-1-a', '204-1-b', '204-1-b-1', '204-1-b-2', '204-1-c', '204-2', '204-4', '204-4-a', '204-4-a-1', '204-4-a-2', '204-4-a-3', '204-Subpart', '204-Subpart-A', '204-Subpart-B' ], keys)
def test_subparts(self): """ Create a tree with no subparts, then add subparts. """ title = u"Regulation Title" sect1_title = u"§ 204.1 First Section" sect1 = u"(a) I believe this is (b) the best section " sect2_title = u"§ 204.2 Second Section" sect2 = u"Some sections \ndon't have \ndepth at all." old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2]) older = reg_text.build_reg_text_tree(old_text, 204) ntitle = u"Regulation Title" nsubpart_a = u"Subpart A—First subpart" nsect1_title = u"§ 204.1 First Section" nsect1 = u"(a) I believe this is (b) the best section " nsubpart_b = u"Subpart B—Second subpart" nsect2_title = u"§ 204.2 Second Section" nsect2 = u"Some sections \ndon't have \ndepth at all." new_text = "\n".join([ ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title, nsect2 ]) newer = reg_text.build_reg_text_tree(new_text, 204) result = dict( difftree.changes_between(FrozenNode.from_node(older), FrozenNode.from_node(newer))) self.assertEquals( result['204-Subpart-A'], { "node": { "text": u"", "node_type": u"subpart", "tagged_text": None, "label": ("204", "Subpart", "A"), "child_labels": ("204-1", ), "title": u"First subpart" }, "op": "added" }) self.assertTrue('204-Subpart-B' in result) self.assertEquals(result['204-Subpart'], {"op": "deleted"}) # Sections shouldn't have changed, though self.assertFalse('204-1' in result) self.assertFalse('204-2' in result)
def test_subparts(self): """ Create a tree with no subparts, then add subparts. """ title = u"Regulation Title" sect1_title = u"§ 204.1 First Section" sect1 = u"(a) I believe this is (b) the best section " sect2_title = u"§ 204.2 Second Section" sect2 = u"Some sections \ndon't have \ndepth at all." old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2]) older = reg_text.build_reg_text_tree(old_text, 204) ntitle = u"Regulation Title" nsubpart_a = u"Subpart A—First subpart" nsect1_title = u"§ 204.1 First Section" nsect1 = u"(a) I believe this is (b) the best section " nsubpart_b = u"Subpart B—Second subpart" nsect2_title = u"§ 204.2 Second Section" nsect2 = u"Some sections \ndon't have \ndepth at all." new_text = "\n".join([ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title, nsect2]) newer = reg_text.build_reg_text_tree(new_text, 204) result = dict(difftree.changes_between(FrozenNode.from_node(older), FrozenNode.from_node(newer))) self.assertEquals( result["204-Subpart-A"], { "node": { "text": u"", "node_type": u"subpart", "tagged_text": None, "label": ("204", "Subpart", "A"), "child_labels": ("204-1",), "title": u"First subpart", }, "op": "added", }, ) self.assertTrue("204-Subpart-B" in result) self.assertEquals(result["204-Subpart"], {"op": "deleted"}) # Sections shouldn't have changed, though self.assertFalse("204-1" in result) self.assertFalse("204-2" in result)
def test_subparts(self): """ Create a tree with no subparts, then add subparts. """ title = u"Regulation Title" sect1_title = u"§ 204.1 First Section" sect1 = u"(a) I believe this is (b) the best section " sect2_title = u"§ 204.2 Second Section" sect2 = u"Some sections \ndon't have \ndepth at all." old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2]) older = reg_text.build_reg_text_tree(old_text, 204) ntitle = u"Regulation Title" nsubpart_a = u"Subpart A—First subpart" nsect1_title = u"§ 204.1 First Section" nsect1 = u"(a) I believe this is (b) the best section " nsubpart_b = u"Subpart B—Second subpart" nsect2_title = u"§ 204.2 Second Section" nsect2 = u"Some sections \ndon't have \ndepth at all." new_text = "\n".join([ ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title, nsect2 ]) newer = reg_text.build_reg_text_tree(new_text, 204) comparer = treediff.Compare(older, newer) comparer.compare() self.assertEquals( comparer.changes['204-Subpart-A'], { "node": { "text": "", "node_type": "subpart", "label": ["204", "Subpart", "A"], "child_labels": ["204-1"], "title": "First subpart" }, "op": "added" }) self.assertTrue('204-Subpart-B' in comparer.changes) self.assertEquals(comparer.changes['204-Subpart'], {"op": "deleted"})
def build_whole_regtree(text): """Combine the output of numerous functions to get to a whole regulation tree.""" part = find_cfr_part(text) reg_tree = build_reg_text_tree(text, part) appendices = appendix_trees(text, part, reg_tree.label) reg_tree.children.extend(appendices) supplement_start = find_supplement_start(text) if supplement_start is not None: interps = build_interp_tree(text[supplement_start:], part) reg_tree.children.append(interps) return reg_tree
def test_subparts(self): """ Create a tree with no subparts, then add subparts. """ title = u"Regulation Title" sect1_title = u"§ 204.1 First Section" sect1 = u"(a) I believe this is (b) the best section " sect2_title = u"§ 204.2 Second Section" sect2 = u"Some sections \ndon't have \ndepth at all." old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2]) older = reg_text.build_reg_text_tree(old_text, 204) ntitle = u"Regulation Title" nsubpart_a = u"Subpart A—First subpart" nsect1_title = u"§ 204.1 First Section" nsect1 = u"(a) I believe this is (b) the best section " nsubpart_b = u"Subpart B—Second subpart" nsect2_title = u"§ 204.2 Second Section" nsect2 = u"Some sections \ndon't have \ndepth at all." new_text = "\n".join([ ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title, nsect2]) newer = reg_text.build_reg_text_tree(new_text, 204) comparer = treediff.Compare(older, newer) comparer.compare() self.assertEquals( comparer.changes['204-Subpart-A'], {"node": { "text": "", "node_type": "subpart", "label": ["204", "Subpart", "A"], "child_labels": ["204-1"], "title": "First subpart"}, "op": "added"}) self.assertTrue('204-Subpart-B' in comparer.changes) self.assertEquals(comparer.changes['204-Subpart'], {"op": "deleted"})
def test_build_reg_text_tree_sections(self): title = u"Regulation Title" subpart_a = u"Subpart A—First subpart" sect1_title = u"§ 204.1 Best Section" sect1 = u"(a) I believe this is (b) the (1) best section " sect1 += "(2) don't (c) you?" sect2_title = u"§ 204.2 Second Best Section" sect2 = u"Some sections \ndon't have must \ndepth at all." subpart_b = u"Subpart B—First subpart" sect4_title = u"§ 204.4 I Skipped One" sect4 = u"Others \n(a) Skip sections for (1) No \n(2) Apparent \n" sect4 += "(3) Reason" text = "\n".join((title, subpart_a, sect1_title, sect1, sect2_title, sect2, subpart_b, sect4_title, sect4)) reg = reg_text.build_reg_text_tree(text, 204) self.assertEqual(["204"], reg.label) self.assertEqual(title, reg.title) self.assertEqual("", reg.text.strip()) self.assertEqual(2, len(reg.children)) (subpart_a_tree, subpart_b_tree) = reg.children (sect1_tree, sect2_tree) = subpart_a_tree.children sect4_tree = subpart_b_tree.children[0] self.assertEqual(['204', '1'], sect1_tree.label) self.assertEqual(sect1_title, sect1_tree.title) self.assertEqual("", sect1_tree.text.strip()) self.assertEqual(3, len(sect1_tree.children)) self.assertEqual(0, len(sect1_tree.children[0].children)) self.assertEqual(2, len(sect1_tree.children[1].children)) self.assertEqual(0, len(sect1_tree.children[2].children)) self.assertEqual(['204', '2'], sect2_tree.label) self.assertEqual(sect2_title, sect2_tree.title) self.assertEqual(sect2, sect2_tree.text.strip()) self.assertEqual(0, len(sect2_tree.children)) self.assertEqual(['204', '4'], sect4_tree.label) self.assertEqual(sect4_title, sect4_tree.title) self.assertEqual(u"Others", sect4_tree.text.strip()) self.assertEqual(1, len(sect4_tree.children)) self.assertEqual(3, len(sect4_tree.children[0].children))
def test_build_reg_text_empty_and_subpart(self): """ In some cases, we have a few sections before the first subpart. """ title = u"Regulation Title" sect1_title = u"§ 204.1 Best Section" sect1 = u"(a) I believe this is (b) the (1) best section " sect1 += "(2) don't (c) you?" subpart_a = u"Subpart A—First subpart" sect2_title = u"§ 204.2 Second Best Section" sect2 = u"Some sections \ndon't have must \ndepth at all." subpart_b = u"Subpart B—First subpart" sect4_title = u"§ 204.4 I Skipped One" sect4 = u"Others \n(a) Skip sections for (1) No \n(2) Apparent \n" sect4 += "(3) Reason" text = "\n".join((title, sect1_title, sect1, subpart_a, sect2_title, sect2, subpart_b, sect4_title, sect4)) reg = reg_text.build_reg_text_tree(text, 204) self.assertEqual(["204"], reg.label) self.assertEqual(title, reg.title) self.assertEqual("", reg.text.strip()) self.assertEqual(3, len(reg.children))
def test_build_reg_text_tree_no_sections(self): text = "Regulation Title\nThen some more content" empty_part = Node('', [], ['201', 'Subpart'], '', node_type=Node.EMPTYPART) self.assertEqual(Node(text, [empty_part], ['201'], 'Regulation Title'), reg_text.build_reg_text_tree(text, 201))
import codecs import sys from regparser.tree.appendix.tree import trees_from as appendix_trees from regparser.tree.interpretation import build as build_interp_tree from regparser.tree.reg_text import build_reg_text_tree from regparser.tree.struct import NodeEncoder from regparser.tree.supplement import find_supplement_start if __name__ == "__main__": if len(sys.argv) < 3: print "Usage: python generate_tree.py path/to/reg.txt part" print " e.g.: python generate_tree.py rege.txt 1005" exit() with codecs.open(sys.argv[1], encoding='utf-8') as f: reg = unicode(f.read()) interp = reg[find_supplement_start(reg):] part = int(sys.argv[2]) reg_tree = build_reg_text_tree(reg, part) interp_tree = build_interp_tree(interp, part) appendix_trees = appendix_trees(reg, part, reg_tree.label) reg_tree.children.extend(appendix_trees) reg_tree.children.append(interp_tree) print NodeEncoder().encode(reg_tree)