def find_next_appendix_offsets(text):
    """Find the start/end of the next appendix. Accounts for supplements"""
    offsets = search.find_offsets(text, find_appendix_start)
    if offsets is None:
        return None

    start, end = offsets
    supplement_start = find_supplement_start(text)
    if supplement_start is not None and supplement_start < start:
        return None
    if supplement_start is not None and supplement_start < end:
        return (start, supplement_start)
    return (start, end)
Exemple #2
0
def build_whole_regtree(text):
    """Combine the output of numerous functions to get to a whole regulation
    tree."""
    part = find_cfr_part(text)
    reg_tree = build_reg_text_tree(text, part)
    appendices = appendix_trees(text, part, reg_tree.label)

    reg_tree.children.extend(appendices)
    supplement_start = find_supplement_start(text)
    if supplement_start is not None:
        interps = build_interp_tree(text[supplement_start:], part)
        reg_tree.children.append(interps)
    return reg_tree
Exemple #3
0
def find_next_appendix_offsets(text):
    """Find the start/end of the next appendix. Accounts for supplements"""
    offsets = search.find_offsets(text, find_appendix_start)
    if offsets is None:
        return None

    start, end = offsets
    supplement_start = find_supplement_start(text)
    if supplement_start is not None and supplement_start < start:
        return None
    if supplement_start is not None and supplement_start < end:
        return (start, supplement_start)
    return (start, end)
def build_whole_regtree(text):
    """Combine the output of numerous functions to get to a whole regulation
    tree."""
    part = find_cfr_part(text)
    reg_tree = build_reg_text_tree(text, part)
    appendices = appendix_trees(text, part, reg_tree.label)

    reg_tree.children.extend(appendices)
    supplement_start = find_supplement_start(text)
    if supplement_start is not None:
        interps = build_interp_tree(text[supplement_start:], part)
        reg_tree.children.append(interps)
    return reg_tree
def next_subpart_offsets(text):
    """Find the start,end of the next subpart"""
    offsets = find_offsets(text, find_next_subpart_start)
    if offsets is None:
        return None
    start, end = offsets
    appendix_start = find_appendix_start(text)
    supplement_start = find_supplement_start(text)
    if appendix_start is not None and appendix_start < end:
        return (start, appendix_start)
    if supplement_start is not None and supplement_start < end:
        return (start, supplement_start)
    return (start, end)
def next_subpart_offsets(text):
    """Find the start,end of the next subpart"""
    offsets = find_offsets(text, find_next_subpart_start)
    if offsets is None:
        return None
    start, end = offsets
    appendix_start = find_appendix_start(text)
    supplement_start = find_supplement_start(text)
    if appendix_start is not None and appendix_start < end:
        end = appendix_start
    elif supplement_start is not None and supplement_start < end:
        end = supplement_start

    if end >= start:
        return (start, end)
def next_section_offsets(text, part):
    """Find the start/end of the next section"""
    offsets = find_offsets(text, lambda t: find_next_section_start(t, part))
    if offsets is None:
        return None

    start, end = offsets
    subpart_start = find_next_subpart_start(text)
    appendix_start = find_appendix_start(text)
    supplement_start = find_supplement_start(text)
    if subpart_start is not None \
            and subpart_start > start and subpart_start < end:
        return (start, subpart_start)
    if appendix_start is not None and appendix_start < end:
        return (start, appendix_start)
    if supplement_start is not None and supplement_start < end:
        return (start, supplement_start)
    return (start, end)
def next_section_offsets(text, part):
    """Find the start/end of the next section"""
    offsets = find_offsets(text, lambda t: find_next_section_start(t, part))
    if offsets is None:
        return None

    start, end = offsets
    subpart_start = find_next_subpart_start(text)
    appendix_start = find_appendix_start(text)
    supplement_start = find_supplement_start(text)
    if subpart_start is not None \
            and subpart_start > start and subpart_start < end:
        end = subpart_start
    elif appendix_start is not None and appendix_start < end:
        end = appendix_start
    elif supplement_start is not None and supplement_start < end:
        end = supplement_start

    if end >= start:
        return (start, end)
Exemple #9
0
 def test_find_supplement_start(self):
     text = "Supplement A S\nOther\nSupplement I Thing\nXX Supplement C Q"
     self.assertEqual(21, supplement.find_supplement_start(text))
     self.assertEqual(21, supplement.find_supplement_start(text, 'I'))
     self.assertEqual(0, supplement.find_supplement_start(text, 'A'))
     self.assertEqual(None, supplement.find_supplement_start(text, 'C'))
 def test_find_supplement_start(self):
     text = "Supplement A S\nOther\nSupplement I Thing\nXX Supplement C Q"
     self.assertEqual(21, supplement.find_supplement_start(text))
     self.assertEqual(21, supplement.find_supplement_start(text, 'I'))
     self.assertEqual(0, supplement.find_supplement_start(text, 'A'))
     self.assertEqual(None, supplement.find_supplement_start(text, 'C'))
import codecs
import sys

from regparser.tree.appendix.tree import trees_from as appendix_trees
from regparser.tree.interpretation import build as build_interp_tree
from regparser.tree.reg_text import build_reg_text_tree
from regparser.tree.struct import NodeEncoder
from regparser.tree.supplement import find_supplement_start

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print "Usage: python generate_tree.py path/to/reg.txt part"
        print " e.g.: python generate_tree.py rege.txt 1005"
        exit()
    with codecs.open(sys.argv[1], encoding='utf-8') as f:
        reg = unicode(f.read())

    interp = reg[find_supplement_start(reg):]

    part = int(sys.argv[2])
    reg_tree = build_reg_text_tree(reg, part)
    interp_tree = build_interp_tree(interp, part)
    appendix_trees = appendix_trees(reg, part, reg_tree.label)

    reg_tree.children.extend(appendix_trees)
    reg_tree.children.append(interp_tree)

    print NodeEncoder().encode(reg_tree)