def find_next_appendix_offsets(text): """Find the start/end of the next appendix. Accounts for supplements""" offsets = search.find_offsets(text, find_appendix_start) if offsets is None: return None start, end = offsets supplement_start = find_supplement_start(text) if supplement_start is not None and supplement_start < start: return None if supplement_start is not None and supplement_start < end: return (start, supplement_start) return (start, end)
def build_whole_regtree(text): """Combine the output of numerous functions to get to a whole regulation tree.""" part = find_cfr_part(text) reg_tree = build_reg_text_tree(text, part) appendices = appendix_trees(text, part, reg_tree.label) reg_tree.children.extend(appendices) supplement_start = find_supplement_start(text) if supplement_start is not None: interps = build_interp_tree(text[supplement_start:], part) reg_tree.children.append(interps) return reg_tree
def next_subpart_offsets(text): """Find the start,end of the next subpart""" offsets = find_offsets(text, find_next_subpart_start) if offsets is None: return None start, end = offsets appendix_start = find_appendix_start(text) supplement_start = find_supplement_start(text) if appendix_start is not None and appendix_start < end: return (start, appendix_start) if supplement_start is not None and supplement_start < end: return (start, supplement_start) return (start, end)
def next_subpart_offsets(text): """Find the start,end of the next subpart""" offsets = find_offsets(text, find_next_subpart_start) if offsets is None: return None start, end = offsets appendix_start = find_appendix_start(text) supplement_start = find_supplement_start(text) if appendix_start is not None and appendix_start < end: end = appendix_start elif supplement_start is not None and supplement_start < end: end = supplement_start if end >= start: return (start, end)
def next_section_offsets(text, part): """Find the start/end of the next section""" offsets = find_offsets(text, lambda t: find_next_section_start(t, part)) if offsets is None: return None start, end = offsets subpart_start = find_next_subpart_start(text) appendix_start = find_appendix_start(text) supplement_start = find_supplement_start(text) if subpart_start is not None \ and subpart_start > start and subpart_start < end: return (start, subpart_start) if appendix_start is not None and appendix_start < end: return (start, appendix_start) if supplement_start is not None and supplement_start < end: return (start, supplement_start) return (start, end)
def next_section_offsets(text, part): """Find the start/end of the next section""" offsets = find_offsets(text, lambda t: find_next_section_start(t, part)) if offsets is None: return None start, end = offsets subpart_start = find_next_subpart_start(text) appendix_start = find_appendix_start(text) supplement_start = find_supplement_start(text) if subpart_start is not None \ and subpart_start > start and subpart_start < end: end = subpart_start elif appendix_start is not None and appendix_start < end: end = appendix_start elif supplement_start is not None and supplement_start < end: end = supplement_start if end >= start: return (start, end)
def test_find_supplement_start(self): text = "Supplement A S\nOther\nSupplement I Thing\nXX Supplement C Q" self.assertEqual(21, supplement.find_supplement_start(text)) self.assertEqual(21, supplement.find_supplement_start(text, 'I')) self.assertEqual(0, supplement.find_supplement_start(text, 'A')) self.assertEqual(None, supplement.find_supplement_start(text, 'C'))
import codecs import sys from regparser.tree.appendix.tree import trees_from as appendix_trees from regparser.tree.interpretation import build as build_interp_tree from regparser.tree.reg_text import build_reg_text_tree from regparser.tree.struct import NodeEncoder from regparser.tree.supplement import find_supplement_start if __name__ == "__main__": if len(sys.argv) < 3: print "Usage: python generate_tree.py path/to/reg.txt part" print " e.g.: python generate_tree.py rege.txt 1005" exit() with codecs.open(sys.argv[1], encoding='utf-8') as f: reg = unicode(f.read()) interp = reg[find_supplement_start(reg):] part = int(sys.argv[2]) reg_tree = build_reg_text_tree(reg, part) interp_tree = build_interp_tree(interp, part) appendix_trees = appendix_trees(reg, part, reg_tree.label) reg_tree.children.extend(appendix_trees) reg_tree.children.append(interp_tree) print NodeEncoder().encode(reg_tree)