def section_node(self):
        n1 = Node('n2', label=['200', '2'])
        n2 = Node('n2a', label=['200', '2', 'a'])

        n1.children = [n2]
        root = Node('root', label=['200'], children=[n1])
        return root
Ejemplo n.º 2
0
def build_tree(reg_xml):
    doc = etree.fromstring(reg_xml)
    preprocess_xml(doc)

    reg_part = get_reg_part(doc)
    title = get_title(doc)

    tree = Node("", [], [reg_part], title)

    part = doc.xpath('//PART')[0]

    subpart_xmls = [c for c in part.getchildren() if c.tag == 'SUBPART']
    if len(subpart_xmls) > 0:
        subparts = [build_subpart(reg_part, s) for s in subpart_xmls]
        tree.children = subparts
    else:
        section_xmls = [c for c in part.getchildren() if c.tag == 'SECTION']
        sections = []
        for section_xml in section_xmls:
            sections.extend(build_from_section(reg_part, section_xml))
        empty_part = reg_text.build_empty_part(reg_part)
        empty_part.children = sections
        tree.children = [empty_part]

    non_reg_sections = build_non_reg_text(doc, reg_part)
    tree.children += non_reg_sections

    return tree
    def test_dict_to_node(self):
        dict_node = {
            'text': 'node text',
            'label': ['205', 'A'],
            'node_type': 'appendix'}

        node = compiler.dict_to_node(dict_node)

        self.assertEqual(
            node,
            Node('node text', [], ['205', 'A'], None, 'appendix'))

        dict_node['tagged_text'] = '<E> Tagged </E> text.'

        node = compiler.dict_to_node(dict_node)

        actual_node = Node('node text', [], ['205', 'A'], None, 'appendix')
        actual_node.tagged_text = '<E> Tagged </E> text.'

        created_node = compiler.dict_to_node(dict_node)

        self.assertEqual(actual_node, created_node)
        self.assertEqual(actual_node.tagged_text, created_node.tagged_text)

        dict_node = {
            'text': 'node text'
        }

        node = compiler.dict_to_node(dict_node)
        self.assertEqual(node, dict_node)
    def test_create_xml_changes_child_stars(self):
        labels_amended = [Amendment('PUT', '200-2-a')]
        xml = etree.fromstring("<ROOT><P>(a) Content</P><STARS /></ROOT>")
        n2a = Node('(a) Content', label=['200', '2', 'a'],
                   source_xml=xml.xpath('//P')[0])
        n2b = Node('(b) Content', label=['200', '2', 'b'])
        n2 = Node('n2', label=['200', '2'], children=[n2a, n2b])
        root = Node('root', label=['200'], children=[n2])

        notice_changes = changes.NoticeChanges()
        build.create_xml_changes(labels_amended, root, notice_changes)

        self.assertTrue('200-2-a' in notice_changes.changes)
        self.assertTrue(1, len(notice_changes.changes['200-2-a']))
        change = notice_changes.changes['200-2-a'][0]
        self.assertEqual('PUT', change['action'])
        self.assertFalse('field' in change)

        n2a.text = n2a.text + ":"
        n2a.source_xml.text = n2a.source_xml.text + ":"

        notice_changes = changes.NoticeChanges()
        build.create_xml_changes(labels_amended, root, notice_changes)

        self.assertTrue('200-2-a' in notice_changes.changes)
        self.assertTrue(1, len(notice_changes.changes['200-2-a']))
        change = notice_changes.changes['200-2-a'][0]
        self.assertEqual('PUT', change['action'])
        self.assertEqual('[text]', change.get('field'))
    def section_node(self):
        n1 = Node("n2", label=["200", "2"])
        n2 = Node("n2a", label=["200", "2", "a"])

        n1.children = [n2]
        root = Node("root", label=["200"], children=[n1])
        return root
    def test_keyterm_is_first_not_first(self):
        node = Node('(a) This has a list: apples et seq.',
                    label=['101', '22', 'a'])
        node.tagged_text = '(a) This has a list: apples <E T="03">et seq.</E>'

        kt = KeyTerms(None)
        self.assertFalse(kt.keyterm_is_first(node, 'et seq.'))
def collapsed_markers_matches(node_text, tagged_text):
    """Find collapsed markers, i.e. tree node paragraphs that begin within a
    single XML node, within this text. Remove citations and other false
    positives. This is pretty hacky right now -- it focuses on the plain
    text but takes cues from the tagged text. @todo: streamline logic"""
    # In addition to the regex above, keyterms are an acceptable prefix. We
    # therefore convert keyterms to satisfy the above regex
    node_for_keyterms = Node(node_text, node_type=Node.INTERP,
                             label=[get_first_interp_marker(node_text)])
    node_for_keyterms.tagged_text = tagged_text
    keyterm = KeyTerms.get_keyterm(node_for_keyterms)
    if keyterm:
        node_text = node_text.replace(keyterm, '.'*len(keyterm))

    collapsed_markers = []
    for marker in _first_markers:
        possible = ((m, m.start(), m.end())
                    for m in marker.finditer(node_text) if m.start() > 0)
        possible = remove_citation_overlaps(node_text, possible)
        # If certain characters follow, kill it
        for following in ("e.", ")", u"”", '"', "'"):
            possible = [(m, s, end) for m, s, end in possible
                        if not node_text[end:].startswith(following)]
        possible = [m for m, _, _ in possible]
        # As all "1." collapsed markers must be emphasized, run a quick
        # check to weed out some false positives
        if '<E T="03">1' not in tagged_text:
            possible = filter(lambda m: m.group(1) != '1', possible)
        collapsed_markers.extend(possible)
    return collapsed_markers
Ejemplo n.º 8
0
def test_create_xml_changes_child_stars():
    labels_amended = [Amendment('PUT', '200-?-2-a')]
    with XMLBuilder("ROOT") as ctx:
        ctx.P("(a) Content")
        ctx.STARS()
    n2a = Node('(a) Content', label=['200', '2', 'a'],
               source_xml=ctx.xml.xpath('//P')[0])
    n2b = Node('(b) Content', label=['200', '2', 'b'])
    n2 = Node('n2', label=['200', '2'], children=[n2a, n2b])
    root = Node('root', label=['200'], children=[n2])

    notice_changes = changes.NoticeChanges()
    fetch.create_xml_changes(labels_amended, root, notice_changes)
    data = notice_changes[None]

    assert '200-2-a' in data
    assert len(data['200-2-a']) == 1
    change = data['200-2-a'][0]
    assert change['action'] == 'PUT'
    assert 'field' not in change

    n2a.text = n2a.text + ":"
    n2a.source_xml.text = n2a.source_xml.text + ":"

    notice_changes = changes.NoticeChanges()
    fetch.create_xml_changes(labels_amended, root, notice_changes)
    data = notice_changes[None]

    assert '200-2-a' in data
    assert len(data['200-2-a']) == 1
    change = data['200-2-a'][0]
    assert change['action'] == 'PUT'
    assert change.get('field') == '[text]'
 def test_no_keyterm(self):
     node = Node('(a) Apples are grown in New Zealand.',
                 label=['101', '22', 'a'])
     node.tagged_text = '(a) Apples are grown in New Zealand.'
     kt = KeyTerms(None)
     results = kt.process(node)
     self.assertEquals(results, None)
Ejemplo n.º 10
0
    def paragraph_with_marker(self, text, tagged_text):
        """The paragraph has a marker, like (a) or a. etc."""
        # To aid in determining collapsed paragraphs, replace any
        # keyterms present
        node_for_keyterms = Node(text, node_type=Node.APPENDIX)
        node_for_keyterms.tagged_text = tagged_text
        node_for_keyterms.label = [initial_marker(text)[0]]
        keyterm = KeyTerms.get_keyterm(node_for_keyterms)
        if keyterm:
            mtext = text.replace(keyterm, ';'*len(keyterm))
        else:
            mtext = text

        for mtext in split_paragraph_text(mtext):
            if keyterm:     # still need the original text
                mtext = mtext.replace(';'*len(keyterm), keyterm)
            # label_candidate = [initial_marker(mtext)[0]]
            # existing_node = None
            # for node in self.nodes:
            #     if node.label == label_candidate:
            #         existing_node = node
            # if existing_node:
            #     self.paragraph_counter += 1
            #     node = Node(mtext, node_type=Node.APPENDIX,
            #                 label=['dup{}'.format(self.paragraph_counter),
            #                        initial_marker(mtext)[0]])
            # else:
            node = Node(mtext, node_type=Node.APPENDIX,
                        label=[initial_marker(mtext)[0]])
            node.tagged_text = tagged_text
            self.nodes.append(node)
    def test_create_xml_changes_child_stars(self):
        labels_amended = [Amendment('PUT', '200-?-2-a')]
        with XMLBuilder("ROOT") as ctx:
            ctx.P("(a) Content")
            ctx.STARS()
        n2a = Node('(a) Content', label=['200', '2', 'a'],
                   source_xml=ctx.xml.xpath('//P')[0])
        n2b = Node('(b) Content', label=['200', '2', 'b'])
        n2 = Node('n2', label=['200', '2'], children=[n2a, n2b])
        root = Node('root', label=['200'], children=[n2])

        notice_changes = changes.NoticeChanges()
        amendments.create_xml_changes(labels_amended, root, notice_changes)
        data = notice_changes.changes_by_xml[None]

        self.assertIn('200-2-a', data)
        self.assertTrue(1, len(data['200-2-a']))
        change = data['200-2-a'][0]
        self.assertEqual('PUT', change['action'])
        self.assertNotIn('field', change)

        n2a.text = n2a.text + ":"
        n2a.source_xml.text = n2a.source_xml.text + ":"

        notice_changes = changes.NoticeChanges()
        amendments.create_xml_changes(labels_amended, root, notice_changes)
        data = notice_changes.changes_by_xml[None]

        self.assertIn('200-2-a', data)
        self.assertTrue(1, len(data['200-2-a']))
        change = data['200-2-a'][0]
        self.assertEqual('PUT', change['action'])
        self.assertEqual('[text]', change.get('field'))
    def test_create_xml_changes_child_stars(self):
        labels_amended = [Amendment("PUT", "200-2-a")]
        xml = etree.fromstring("<ROOT><P>(a) Content</P><STARS /></ROOT>")
        n2a = Node("(a) Content", label=["200", "2", "a"], source_xml=xml.xpath("//P")[0])
        n2b = Node("(b) Content", label=["200", "2", "b"])
        n2 = Node("n2", label=["200", "2"], children=[n2a, n2b])
        root = Node("root", label=["200"], children=[n2])

        notice_changes = changes.NoticeChanges()
        build.create_xml_changes(labels_amended, root, notice_changes)

        self.assertTrue("200-2-a" in notice_changes.changes)
        self.assertTrue(1, len(notice_changes.changes["200-2-a"]))
        change = notice_changes.changes["200-2-a"][0]
        self.assertEqual("PUT", change["action"])
        self.assertFalse("field" in change)

        n2a.text = n2a.text + ":"
        n2a.source_xml.text = n2a.source_xml.text + ":"

        notice_changes = changes.NoticeChanges()
        build.create_xml_changes(labels_amended, root, notice_changes)

        self.assertTrue("200-2-a" in notice_changes.changes)
        self.assertTrue(1, len(notice_changes.changes["200-2-a"]))
        change = notice_changes.changes["200-2-a"][0]
        self.assertEqual("PUT", change["action"])
        self.assertEqual("[text]", change.get("field"))
def collapsed_markers_matches(node_text, tagged_text):
    """Find collapsed markers, i.e. tree node paragraphs that begin within a
    single XML node, within this text. Remove citations and other false
    positives. This is pretty hacky right now -- it focuses on the plain
    text but takes cues from the tagged text. @todo: streamline logic"""
    # In addition to the regex above, keyterms are an acceptable prefix. We
    # therefore convert keyterms to satisfy the above regex
    node_for_keyterms = Node(node_text, node_type=Node.INTERP,
                             label=[get_first_interp_marker(node_text)])
    node_for_keyterms.tagged_text = tagged_text
    keyterm = KeyTerms.keyterm_in_node(node_for_keyterms)
    if keyterm:
        node_text = node_text.replace(keyterm, '.' * len(keyterm))

    collapsed_markers = []
    for marker in _first_markers:
        possible = [(m, m.start(), m.end())
                    for m in marker.finditer(node_text)]
        possible = remove_citation_overlaps(node_text, possible)
        possible = [triplet[0] for triplet in possible]
        collapsed_markers.extend(
            match for match in possible
            if not false_collapsed_marker(match, node_text, tagged_text)
        )
    return collapsed_markers
def nodes_from_interp_p(xml_node):
    """Given an XML node that contains text for an interpretation paragraph,
    split it into sub-paragraphs and account for trailing stars"""
    node_text = tree_utils.get_node_text(xml_node, add_spaces=True)
    text_with_tags = tree_utils.get_node_text_tags_preserved(xml_node)
    first_marker = get_first_interp_marker(text_with_tags)
    collapsed = collapsed_markers_matches(node_text, text_with_tags)

    #   -2 throughout to account for matching the character + period
    ends = [m.end() - 2 for m in collapsed[1:]] + [len(node_text)]
    starts = [m.end() - 2 for m in collapsed] + [len(node_text)]

    #   Node for this paragraph
    n = Node(node_text[0:starts[0]], label=[first_marker],
             node_type=Node.INTERP)
    n.tagged_text = text_with_tags
    yield n
    if n.text.endswith('* * *'):
        yield Node(label=[mtypes.INLINE_STARS])

    #   Collapsed-marker children
    for match, end in zip(collapsed, ends):
        marker = match.group(1)
        if marker == '1':
            marker = '<E T="03">1</E>'
        n = Node(node_text[match.end() - 2:end], label=[marker],
                 node_type=Node.INTERP)
        yield n
        if n.text.endswith('* * *'):
            yield Node(label=[mtypes.INLINE_STARS])
Ejemplo n.º 15
0
 def test_keyterm_and_emphasis(self):
     node = Node('(a) Apples. Apples are grown in '
                 + 'New Zealand.', label=['101', '22', 'a'])
     node.tagged_text = '(a) <E T="03">Apples.</E> Apples are grown in ' +\
         'New <E T="03">Zealand.</E>'
     kt = KeyTerms(None)
     results = kt.process(node)
     self.assertNotEqual(results, None)
     self.assertEqual(results[0]['key_term'], 'Apples.')
     self.assertEqual(results[0]['locations'], [0])
    def tree_with_subparts(self):
        nsa = Node("nsa", label=["205", "Subpart", "A"], node_type=Node.SUBPART)

        nsb = Node("nsb", label=["205", "Subpart", "B"], node_type=Node.SUBPART)

        nappa = Node("nappa", label=["205", "Appendix", "C"], node_type=Node.APPENDIX)

        root = Node("", label=["205"])
        root.children = [nsa, nsb, nappa]
        return root
Ejemplo n.º 17
0
    def test_emphasis_close_to_front(self):
        """ An emphasized word is close to the front, but is not a key term.
        """

        node = Node('(a) T et seq. has a list: apples',
                    label=['101', '22', 'a'])
        node.tagged_text = '(a) T <E T="03">et seq.</E> has a list: apples'

        kt = KeyTerms(None)
        self.assertFalse(kt.keyterm_is_first(node, 'et seq.'))
Ejemplo n.º 18
0
 def test_interpretation_markers(self):
     node = Node('3. et seq. has a list: apples',
                 label=['101', 'c', Node.INTERP_MARK, '3'],
                 node_type=Node.INTERP)
     node.tagged_text = '3. <E T="03">et seq.</E> has a list: apples'
     kt = KeyTerms(None)
     results = kt.process(node)
     self.assertNotEqual(results, None)
     self.assertEqual(results[0]['key_term'], 'et seq.')
     self.assertEqual(results[0]['locations'], [0])
Ejemplo n.º 19
0
    def paragraph_with_marker(self, text, next_text=''):
        """The paragraph has an (a) or a. etc."""
        marker, _ = initial_marker(text)
        n = Node(text, node_type=Node.APPENDIX, label=[marker])

        if initial_marker(next_text):
            next_marker, _ = initial_marker(next_text)
        else:
            next_marker = None

        this_p_levels = set(idx for idx, lvl in enumerate(p_levels)
                            if marker in lvl)
        next_p_levels = set(idx for idx, lvl in enumerate(p_levels)
                            if next_marker in lvl)
        previous_levels = [l for l in self.m_stack.m_stack if l]
        previous_p_levels = set()
        for stack_level in previous_levels:
            previous_p_levels.update(sn.p_level for _, sn in stack_level
                                     if hasattr(sn, 'p_level'))

        #   Ambiguity, e.g. 'i', 'v'. Disambiguate by looking forward
        if len(this_p_levels) > 1 and len(next_p_levels) == 1:
            next_p_level = next_p_levels.pop()
            #   e.g. an 'i' followed by a 'ii'
            if next_p_level in this_p_levels:
                this_p_idx = p_levels[next_p_level].index(marker)
                next_p_idx = p_levels[next_p_level].index(next_marker)
                if this_p_idx < next_p_idx:     # Heuristic
                    n.p_level = next_p_level
            #   e.g. (a)(1)(i) followed by an 'A'
            new_level = this_p_levels - previous_p_levels
            if next_p_level not in previous_p_levels and new_level:
                n.p_level = new_level.pop()

        #   Ambiguity. Disambiguate by looking backwards
        if len(this_p_levels) > 1 and not hasattr(n, 'p_level'):
            for stack_level in previous_levels:
                for lvl, stack_node in stack_level:
                    if getattr(stack_node, 'p_level', None) in this_p_levels:
                        #   Later levels replace earlier ones
                        n.p_level = stack_node.p_level

        #   Simple case (no ambiguity) and cases not seen above
        if not getattr(n, 'p_level', None):
            n.p_level = min(this_p_levels)  # rule of thumb: favor lower case

        #   Check if we've seen this type of marker before
        found_in_prev = False
        for stack_level in previous_levels:
            if stack_level and in_same_p_level(n, stack_level):
                found_in_prev = True
                self.depth = stack_level[-1][0]
        if not found_in_prev:   # New type of marker
            self.depth += 1
        self.m_stack.add(self.depth, n)
 def test_node_definitions_multiple_xml(self):
     """Find xml definitions which are separated by `and`"""
     stack = ParentStack().add(0, Node(label=['9999']))
     winter = Node("(4) Cold and dreary mean winter.", label=['9999', '4'])
     winter.tagged_text = ('(4) <E T="03">Cold</E> and '
                           '<E T="03">dreary</E> mean winter.')
     inc, _ = Terms(None).node_definitions(winter, stack)
     self.assertEqual(len(inc), 2)
     cold, dreary = inc
     self.assertEqual(cold, Ref('cold', '9999-4', 4))
     self.assertEqual(dreary, Ref('dreary', '9999-4', 13))
    def build_tree(self):
        n1 = Node('n1', label=['200', '1'])
        n2 = Node('n1i', label=['200', 1, 'i'])
        n3 = Node('n2', label=['200', '2'])
        n4 = Node('n3', label=['200', '3'])
        n5 = Node('n3a', label=['200', '3', 'a'])

        n1.children = [n2]
        n4.children = [n5]
        root = Node('root', label=['200'], children=[n1, n3, n4])
        return root
 def assert_finds_result(self, tagged_text, parent_title, *refs):
     """Given the tags and a title for a parent node, verify that the
     provided references are found"""
     parent = Node(label=['1000', '1'], title=parent_title)
     node = Node(re.sub(r"<[^>]*>", "", tagged_text))  # removes tags
     node.tagged_text = tagged_text
     results = def_finders.DefinitionKeyterm(parent).find(node)
     self.assertEqual(len(results), len(refs))
     for expected, actual in zip(refs, results):
         self.assertEqual(expected.term, actual.term)
         self.assertEqual(expected.start, actual.start)
    def build_tree(self):
        n1 = Node("n1", label=["200", "1"])
        n2 = Node("n1i", label=["200", 1, "i"])
        n3 = Node("n2", label=["200", "2"])
        n4 = Node("n3", label=["200", "3"])
        n5 = Node("n3a", label=["200", "3", "a"])

        n1.children = [n2]
        n4.children = [n5]
        root = Node("root", label=["200"], children=[n1, n3, n4])
        return root
Ejemplo n.º 24
0
    def test_keyterm_see(self):
        """ Keyterm tags sometimes enclose phrases such as 'See also' because
        those tags are also used for emphasis. """

        node = Node('(a) Apples. See Section 101.2',
                    label=['101', '22', 'a'])
        node.tagged_text = '(a) <E T="03">Apples. See also</E>'

        kt = KeyTerms(None)
        results = kt.process(node)
        self.assertEqual('Apples.', results[0]['key_term'])
Ejemplo n.º 25
0
    def test_emphasis_later(self):
        """ Don't pick up something that is emphasized later in a paragraph as
        a key-term. """

        node = Node('(a) This has a list: apples et seq.',
                    label=['101', '22', 'a'])
        node.tagged_text = '(a) This has a list: apples <E T="03">et seq.</E>'

        kt = KeyTerms(None)
        results = kt.process(node)
        self.assertEqual(results, None)
 def _assert_finds(self, tagged_text, *refs):
     """Compare the derived results to an expected number of references"""
     finder = def_finders.XMLTermMeans()
     text = re.sub(r"<[^>]*>", "", tagged_text)  # removes tags
     node = Node(text)
     node.tagged_text = tagged_text
     actual = finder.find(node)
     self.assertEqual(len(refs), len(actual))
     for ref, actual in zip(refs, actual):
         self.assertEqual(ref.term, actual.term)
         self.assertEqual(ref.start, actual.start)
    def tree_with_paragraphs(self):
        n1 = Node("n1", label=["205", "1"])
        n2 = Node("n2", label=["205", "2"])
        n4 = Node("n4", label=["205", "4"])

        n2a = Node("n2a", label=["205", "2", "a"])
        n2b = Node("n2b", label=["205", "2", "b"])
        n2.children = [n2a, n2b]

        root = Node("", label=["205"])
        root.children = [n1, n2, n4]
        return root
 def test_node_definitions_xml_or(self):
     """Find xml definitions which are separated by `or`"""
     stack = ParentStack().add(0, Node(label=['9999']))
     tamale = Node("(i) Hot tamale or tamale means nom nom",
                   label=['9999', '4'])
     tamale.tagged_text = ('(i) <E T="03">Hot tamale</E> or <E T="03"> '
                           'tamale</E> means nom nom ')
     inc, _ = Terms(None).node_definitions(tamale, stack)
     self.assertEqual(len(inc), 2)
     hot, tamale = inc
     self.assertEqual(hot, Ref('hot tamale', '9999-4', 4))
     self.assertEqual(tamale, Ref('tamale', '9999-4', 18))
Ejemplo n.º 29
0
    def derive_nodes(self, xml, processor=None):
        nodes = []
        plain_text = ''
        for marker, plain_text, tagged_text in split_by_markers(xml):
            node = Node(text=plain_text.strip(), label=[marker],
                        source_xml=xml)
            node.tagged_text = six.text_type(tagged_text.strip())
            nodes.append(node)

        if plain_text.endswith('* * *'):    # last in loop
            nodes.append(Node(label=[mtypes.INLINE_STARS]))
        return nodes
    def tree_with_paragraphs(self):
        n1 = Node('n1', label=['205', '1'])
        n2 = Node('n2', label=['205', '2'])
        n4 = Node('n4', label=['205', '4'])

        n2a = Node('n2a', label=['205', '2', 'a'])
        n2b = Node('n2b', label=['205', '2', 'b'])
        n2.children = [n2a, n2b]

        root = Node('', label=['205'])
        root.children = [n1, n2, n4]
        return root
Ejemplo n.º 31
0
    def test_move_interps(self):
        n1 = Node('n1', label=['205', '1', 'Interp'], node_type=Node.INTERP)
        n2 = Node('n2', label=['205', '2', 'Interp'], node_type=Node.INTERP)
        n4 = Node('n4', label=['205', '4', 'Interp'], node_type=Node.INTERP)

        n4c = Node('n4c',
                   label=['205', '4', 'c', 'Interp'],
                   node_type=Node.INTERP)

        n4.children = [n4c]

        n2a = Node('n2a',
                   label=['205', '2', 'a', 'Interp'],
                   node_type=Node.INTERP)
        n2b = Node('n2b',
                   label=['205', '2', 'b', 'Interp'],
                   node_type=Node.INTERP)
        n2a1 = Node('1. First',
                    label=['205', '2', 'a', 'Interp', '1'],
                    node_type=Node.INTERP)

        n2a.children = [n2a1]
        n2.children = [n2a, n2b]

        root = Node('', label=['205', 'Interp'], node_type=Node.INTERP)
        root.children = [n1, n2, n4]

        reg_tree = compiler.RegulationTree(root)
        reg_tree.move('205-2-a-Interp-1', ['205', '4', 'c', 'Interp', '5'])
Ejemplo n.º 32
0
 def test_underparagraph(self):
     text = 'Something something underparagraphs (a)(4) through (5)'
     citations = self.parser.process(Node(text, label=['1005', '6']))
     self.assertEqual(len(citations), 2)
Ejemplo n.º 33
0
 def test_has_parent_definitions_indicator_p_marker(self):
     t = Terms(None)
     stack = ParentStack()
     stack.add(0, Node("(a) Definitions. For purposes of this " +
                       "section except blah"))
     self.assertTrue(t.has_parent_definitions_indicator(stack))
Ejemplo n.º 34
0
 def test_multiple_paragraph_or(self):
     """ Ensure that an 'or' between internal citations is matched
     correctly. """
     text = u"set forth in paragraphs (b)(1) or (b)(2)"
     citations = self.parser.process(Node(text, label=['1005', '6']))
     self.assertEquals(2, len(citations))
Ejemplo n.º 35
0
 def derive_nodes(self, xml, processor=None):
     processor = USCodeProcessor()
     node = Node(label=[mtypes.MARKERLESS], source_xml=xml)
     return [processor.process(xml, node)]
    def test_pre_process(self):
        noname_subpart = Node(
            '',
            label=['88', 'Subpart'],
            node_type=Node.EMPTYPART,
            children=[
                Node(u"Definition. For the purposes of this part, "
                     + u"“abcd” is an alphabet", label=['88', '1'])])
        xqxq_subpart = Node(
            '',
            title='Subpart XQXQ: The unreadable',
            label=['88', 'Subpart', 'XQXQ'], node_type=Node.SUBPART,
            children=[
                Node(label=['88', '2'], children=[
                    Node(label=['88', '2', 'a'],
                         text="Definitions come later for the purposes of "
                              + "this section ",
                         children=[
                             Node(u"“AXAX” means axe-cop",
                                  label=['88', '2', 'a', '1'])]),
                    Node(label=['88', '2', 'b'], children=[
                        Node(label=['88', '2', 'b', 'i'], children=[
                            Node(label=['88', '2', 'b', 'i', 'A'],
                                 text=u"Definition. “Awesome sauce” means "
                                      + "great for the purposes of this "
                                      + "paragraph",)])])])])
        tree = Node(label=['88'], children=[noname_subpart, xqxq_subpart])
        t = Terms(tree)
        t.pre_process()

        self.assertTrue(('88',) in t.scoped_terms)
        self.assertEqual([Ref('abcd', '88-1', (44, 48))],
                         t.scoped_terms[('88',)])
        self.assertTrue(('88', '2') in t.scoped_terms)
        self.assertEqual([Ref('axax', '88-2-a-1', (1, 5))],
                         t.scoped_terms[('88', '2')])
        self.assertTrue(('88', '2', 'b', 'i', 'A') in t.scoped_terms)
        self.assertEqual([Ref('awesome sauce', '88-2-b-i-A', (13, 26))],
                         t.scoped_terms[('88', '2', 'b', 'i', 'A')])

        #   Check subparts are correct
        self.assertEqual({None: ['1'], 'XQXQ': ['2']}, dict(t.subpart_map))

        # Finally, make sure the references are added
        referenced = t.layer['referenced']
        self.assertTrue('abcd:88-1' in referenced)
        self.assertEqual('abcd', referenced['abcd:88-1']['term'])
        self.assertEqual('88-1', referenced['abcd:88-1']['reference'])
        self.assertEqual((44, 48), referenced['abcd:88-1']['position'])

        self.assertTrue('axax:88-2-a-1' in referenced)
        self.assertEqual('axax', referenced['axax:88-2-a-1']['term'])
        self.assertEqual('88-2-a-1', referenced['axax:88-2-a-1']['reference'])
        self.assertEqual((1, 5), referenced['axax:88-2-a-1']['position'])

        self.assertTrue('awesome sauce:88-2-b-i-A' in referenced)
        self.assertEqual('awesome sauce',
                         referenced['awesome sauce:88-2-b-i-A']['term'])
        self.assertEqual('88-2-b-i-A',
                         referenced['awesome sauce:88-2-b-i-A']['reference'])
        self.assertEqual((13, 26),
                         referenced['awesome sauce:88-2-b-i-A']['position'])
Ejemplo n.º 37
0
    def test_write_notice(self, mock_preamble, mock_fdsys,
                          mock_build_analysis):
        changes = {'1234-2': {'op': 'modified'},
                   '1234-3': {'op': 'deleted'},
                   '1234-4': {'op': 'added'}}
        reg_tree = Node("I'm the root", label=['1234'], children=[
            Node("I'll get analysis", label=['1234', '1']),
            Node("I will be modified", label=['1234', '2']),
            Node("I will be deleted", label=['1234', '3']),
            Node("I will be added", label=['1234', '4']),
        ])

        # Ensure we have some analysis just to include
        layers = {'analyses': {'1234-1': [{}]}}
        mock_build_analysis.return_value = etree.fromstring("""
          <analysisSection target="1234-1" notice="2015-12345" date="">
            This is some analysis
          </analysisSection>
        """)

        # An FDSYS
        mock_fdsys.return_value = etree.fromstring("""
            <fdsys>
                This is an fdsys
            </fdsys>
        """)

        # A preamble
        mock_preamble.return_value = etree.fromstring("""
            <preamble>
                This is the preamble
            </preamble>
        """)

        writer = XMLWriteContent("a/path",
                                 '2015-12345',
                                 layers=layers,
                                 notices={})

        # Without reg_tree
        with self.assertRaises(RuntimeError):
            writer.write_notice({})

        # Write a notice file
        mock_file = mock_open()
        with patch.object(builtins, 'open', mock_file, create=True):
            writer.write_notice({}, changes=changes, reg_tree=reg_tree,
                                left_doc_number='2015-01234')

        # Get the resulting XML
        file_handle = mock_file()
        xml_string = file_handle.write.call_args[0][0]
        notice_xml = etree.fromstring(xml_string)

        # Introspect our changes
        changeset = notice_xml.find('.//{eregs}changeset')
        self.assertEqual('2015-01234',
                         changeset.get('leftDocumentNumber'))
        self.assertEqual('2015-12345',
                         changeset.get('rightDocumentNumber'))

        changes = notice_xml.findall('.//{eregs}change')
        self.assertEqual(len(changes), 4)
        self.assertEqual(
            2, len([c for c in changes if c.get('operation') == 'modified']))
        self.assertEqual(
            1, len([c for c in changes if c.get('operation') == 'deleted']))
        self.assertEqual(
            1, len([c for c in changes if c.get('operation') == 'added']))

        self.assertEqual(
            1, len(notice_xml.findall('./{eregs}analysis')))
    def test_node_definitions(self):
        t = Terms(None)
        smart_quotes = [
            (u'This has a “worD” and then more',
             [Ref('word', 'aaa', (12, 16))]),
            (u'I have “anotheR word” term and “moree”', [
                Ref('another word', 'bbb', (8, 20)),
                Ref('moree', 'bbb', (32, 37))
            ]),
            (u'But the child “DoeS sEe”?', [Ref('does see', 'ccc', (15, 23))]),
            (u'Start with “this,”', [Ref('this', 'hhh', (12, 16))]),
            (u'Start with “this;”', [Ref('this', 'iii', (12, 16))]),
            (u'Start with “this.”', [Ref('this', 'jjj', (12, 16))]),
            (u'As do “subchildren”', [Ref('subchildren', 'ddd', (7, 18))])
        ]

        no_defs = [
            u'This has no defs', u'Also has no terms', u'Still no terms, but',
            u'the next one does'
        ]

        xml_defs = [
            (u'(4) Thing means a thing that is defined',
             u'(4) <E T="03">Thing</E> means a thing that is defined',
             Ref('thing', 'eee', (4, 9))),
            (u'(e) Well-meaning lawyers means people who do weird things',
             u'(e) <E T="03">Well-meaning lawyers</E> means people who do '
             u'weird things', Ref('well-meaning lawyers', 'fff', (4, 24))),
            (u'(e) Words have the same meaning as in a dictionary',
             u'(e) <E T="03">Words</E> have the same meaning as in a '
             u'dictionary', Ref('words', 'ffg', (4, 9))),
            (u'(e) Banana has the same meaning as bonono',
             u'(e) <E T="03">Banana</E> has the same meaning as bonono',
             Ref('banana', 'fgf', (4, 10))),
            (u'(f) Huge billowy clouds means I want to take a nap',
             u'(f) <E T="03">Huge billowy clouds</E> means I want to take a '
             u'nap', Ref('huge billowy clouds', 'ggg', (4, 23))),
            (u'(v) Lawyers, in relation to coders, means something very '
             u'different',
             u'(v) <E T="03">Lawyers</E>, in relation to coders, means '
             u'something very different', Ref(u'lawyers', '', (4, 11))),
        ]

        xml_no_defs = [
            (u'(d) Term1 or term2 means stuff',
             u'(d) <E T="03">Term1</E> or <E T="03">term2></E> means stuff')
        ]

        scope_term_defs = [
            ('For purposes of this section, the term blue means the color',
             Ref('blue', '11-11', (39, 43))),
            ('For purposes of paragraph (a)(1) of this section, the term ' +
             'cool bro means hip cat', Ref('cool bro', '11-22', (59, 67))),
            ('For purposes of this paragraph, po jo means "poor Joe"',
             Ref('po jo', '11-33', (32, 37)))
        ]

        stack = ParentStack()
        stack.add(0, Node(label=['999']))
        for txt in no_defs:
            defs, exc = t.node_definitions(Node(txt), stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, refs in smart_quotes:
            defs, exc = t.node_definitions(Node(txt), stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, xml in xml_no_defs:
            node = Node(txt)
            node.tagged_text = xml
            defs, exc = t.node_definitions(node, stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, xml, ref in xml_defs:
            node = Node(txt, label=[ref.label])
            node.tagged_text = xml
            defs, exc = t.node_definitions(node, stack)
            self.assertEqual([ref], defs)
            self.assertEqual([], exc)
        for txt, ref in scope_term_defs:
            defs, exc = t.node_definitions(
                Node(txt, label=ref.label.split('-')), stack)
            self.assertEqual([ref], defs)
            self.assertEqual([], exc)

        #   smart quotes are affected by the parent
        stack.add(1, Node('Definitions', label=['999', '1']))
        for txt in no_defs:
            defs, exc = t.node_definitions(Node(txt), stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, refs in smart_quotes:
            defs, exc = t.node_definitions(Node(txt, label=[refs[0].label]),
                                           stack)
            self.assertEqual(refs, defs)
            self.assertEqual([], exc)
        for txt, xml in xml_no_defs:
            node = Node(txt)
            node.tagged_text = xml
            defs, exc = t.node_definitions(node, stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, xml, ref in xml_defs:
            node = Node(txt, label=[ref.label])
            node.tagged_text = xml
            defs, exc = t.node_definitions(node, stack)
            self.assertEqual([ref], defs)
            self.assertEqual([], exc)
    def test_determine_scope(self):
        stack = ParentStack()
        t = Terms(None)

        stack.add(0, Node(label=['1000']))
        stack.add(1, Node(label=['1000', '1']))

        # Defaults to the entire reg
        self.assertEqual([('1000',)], t.determine_scope(stack))

        stack.add(1, Node('For the purposes of this part, blah blah',
                          label=['1001', '2']))
        self.assertEqual([('1001',), ('1001', Node.INTERP_MARK)],
                         t.determine_scope(stack))

        t.subpart_map = {
            'SubPart 1': ['A', '3'],
            'Other': []
        }
        stack.add(1, Node(label=['1000', '3']))
        stack.add(2, Node('For the purposes of this subpart, yada yada',
                          label=['1000', '3', 'c']))
        self.assertEqual([('1000', 'A'), ('1000', '3'),
                          ('1000', 'A', Node.INTERP_MARK),
                          ('1000', '3', Node.INTERP_MARK)],
                         t.determine_scope(stack))

        stack.add(2, Node('For the purposes of this section, blah blah',
                          label=['1000', '3', 'd']))
        self.assertEqual([('1000', '3'), ('1000', '3', Node.INTERP_MARK)],
                         t.determine_scope(stack))

        stack.add(3, Node('For the purposes of this paragraph, blah blah',
                          label=['1000', '3', 'd', '5']))
        self.assertEqual([('1000', '3', 'd', '5'),
                          ('1000', '3', 'd', '5', Node.INTERP_MARK)],
                         t.determine_scope(stack))

        stack.add(3, Node(label=['1002', '3', 'd', '6']))
        self.assertEqual([('1000', '3'), ('1000', '3', Node.INTERP_MARK)],
                         t.determine_scope(stack))

        stack.add(3, Node('Blah as used in this paragraph, blah blah',
                          label=['1000', '3', 'd', '7']))
        self.assertEqual([('1000', '3', 'd', '7'),
                          ('1000', '3', 'd', '7', Node.INTERP_MARK)],
                         t.determine_scope(stack))

        stack.add(4, Node(u'For the purposes of this § 1000.3(d)(6)(i), blah',
                          label=['1000', '3', 'd', '6', 'i']))
        self.assertEqual([('1000', '3', 'd', '6', 'i'),
                          ('1000', '3', 'd', '6', 'i', Node.INTERP_MARK)],
                         t.determine_scope(stack))

        stack.add(4, Node(u'For the purposes of § 1000.3, blah',
                          label=['1000', '3', 'd', '6', 'ii']))
        self.assertEqual([('1000', '3'),
                          ('1000', '3', Node.INTERP_MARK)],
                         t.determine_scope(stack))

        stack.add(4, Node('As used in this section, blah blah',
                          label=['1000', '3', 'd', '6', 'iii']))
        self.assertEqual(
            [('1000', '3'), ('1000', '3', Node.INTERP_MARK)],
            t.determine_scope(stack))
    def test_pre_process_subpart(self):
        root = Node("", label=['1212'])
        subpartA = Node("", label=['1212', 'Subpart', 'A'], title='Subpart A')
        section2 = Node("", label=['1212', '2'], title='1212.2')
        def1 = Node(u"“totes” means in total", label=['1212', '2', 'a'])
        subpartB = Node("", label=['1212', 'Subpart', 'B'], title='Subpart B')
        section22 = Node("\nFor the purposes of this subpart",
                         label=['1212', '22'],
                         title='1212.22')
        def2 = Node(u"“totes” means in extremely", label=['1212', '22', 'a'])

        root.children = [subpartA, subpartB]
        subpartA.children, subpartB.children = [section2], [section22]
        section2.children, section22.children = [def1], [def2]

        t = Terms(root)
        t.pre_process()
        self.assertTrue(('1212', ) in t.scoped_terms)
        self.assertEqual(len(t.scoped_terms[('1212', )]), 1)
        self.assertEqual('1212-2-a', t.scoped_terms[('1212', )][0].label)

        self.assertTrue(('1212', '22') in t.scoped_terms)
        self.assertEqual(len(t.scoped_terms[('1212', '22')]), 1)
        self.assertEqual('1212-22-a', t.scoped_terms[('1212', '22')][0].label)
Ejemplo n.º 41
0
    def test_write(self):
        """Integration test."""
        p3a = Node('(a) Par a', label=['1111', '3', 'a'])
        p3b = Node('(b) Par b', label=['1111', '3', 'b'])
        p3 = Node('Things like: ',
                  label=['1111', '3'],
                  title='Section 3',
                  children=[p3a, p3b])
        sub = Node('',
                   label=['1111', 'Subpart', 'E'],
                   title='Subpart E',
                   node_type=Node.SUBPART,
                   children=[p3])
        a3a = Node('Appendix A-3(a)',
                   label=['1111', 'A', '3(a)'],
                   title='A-3(a) - Some Title',
                   node_type=Node.APPENDIX)
        app = Node('',
                   label=['1111', 'A'],
                   title='Appendix A',
                   node_type=Node.APPENDIX,
                   children=[a3a])
        i3a1 = Node('1. P1',
                    label=['1111', '3', 'a', 'Interp', '1'],
                    node_type=Node.INTERP)
        i3a = Node('',
                   label=['1111', '3', 'a', 'Interp'],
                   node_type=Node.INTERP,
                   children=[i3a1],
                   title='Paragraph 3(a)')
        i31 = Node('1. Section 3',
                   label=['1111', '3', 'Interp', '1'],
                   node_type=Node.INTERP)
        i3 = Node('',
                  label=['1111', '3', 'Interp'],
                  node_type=Node.INTERP,
                  title='Section 1111.3',
                  children=[i3a, i31])
        i = Node('',
                 label=['1111', 'Interp'],
                 node_type=Node.INTERP,
                 title='Supplement I',
                 children=[i3])
        tree = Node('Root text',
                    label=['1111'],
                    title='Regulation Joe',
                    children=[sub, app, i])

        writer = GitWriteContent("/regulation/1111/v1v1")
        writer.write(tree)

        dir_path = settings.GIT_OUTPUT_DIR + "regulation" + os.path.sep
        dir_path += '1111' + os.path.sep

        self.assertTrue(os.path.exists(dir_path + '.git'))
        dirs, files = [], []
        for dirname, child_dirs, filenames in os.walk(dir_path):
            if ".git" not in dirname:
                dirs.extend(
                    os.path.join(dirname, c) for c in child_dirs
                    if c != '.git')
                files.extend(os.path.join(dirname, f) for f in filenames)
        for path in (('Subpart-E', ), ('Subpart-E', '3'),
                     ('Subpart-E', '3', 'a'), ('Subpart-E', '3', 'b'), ('A', ),
                     ('A', '3(a)'), ('Interp', ), ('Interp', '3-Interp'),
                     ('Interp', '3-Interp', '1'),
                     ('Interp', '3-Interp', 'a-Interp'), ('Interp', '3-Interp',
                                                          'a-Interp', '1')):
            path = dir_path + os.path.join(*path)
            self.assertTrue(path in dirs)
            self.assertTrue(path + os.path.sep + 'index.md' in files)

        p3c = p3b
        p3c.text = '(c) Moved!'
        p3c.label = ['1111', '3', 'c']

        writer = GitWriteContent("/regulation/1111/v2v2")
        writer.write(tree)

        dir_path = settings.GIT_OUTPUT_DIR + "regulation" + os.path.sep
        dir_path += '1111' + os.path.sep

        self.assertTrue(os.path.exists(dir_path + '.git'))
        dirs, files = [], []
        for dirname, child_dirs, filenames in os.walk(dir_path):
            if ".git" not in dirname:
                dirs.extend(
                    os.path.join(dirname, c) for c in child_dirs
                    if c != '.git')
                files.extend(os.path.join(dirname, f) for f in filenames)
        for path in (('Subpart-E', ), ('Subpart-E', '3'),
                     ('Subpart-E', '3', 'a'), ('Subpart-E', '3', 'c'), ('A', ),
                     ('A', '3(a)'), ('Interp', ), ('Interp', '3-Interp'),
                     ('Interp', '3-Interp', '1'),
                     ('Interp', '3-Interp', 'a-Interp'), ('Interp', '3-Interp',
                                                          'a-Interp', '1')):
            path = dir_path + os.path.join(*path)
            self.assertTrue(path in dirs)
            self.assertTrue(path + os.path.sep + 'index.md' in files)
        self.assertFalse(dir_path +
                         os.path.join('Subpart-E', '3', 'b') in dirs)

        commit = Repo(dir_path).head.commit
        self.assertTrue('v2v2' in commit.message)
        self.assertEqual(1, len(commit.parents))
        commit = commit.parents[0]
        self.assertTrue('v1v1' in commit.message)
        self.assertEqual(1, len(commit.parents))
        commit = commit.parents[0]
        self.assertTrue('1111' in commit.message)
        self.assertEqual(0, len(commit.parents))
Ejemplo n.º 42
0
 def derive_nodes(self, xml, processor=None):
     node = Node(table_xml_to_plaintext(xml), label=[mtypes.MARKERLESS],
                 source_xml=xml)
     node.tagged_text = etree.tounicode(xml).strip()
     return [node]
Ejemplo n.º 43
0
 def test_pre_process(self):
     el = ExampleLayer(Node('some text'))
     self.assertEqual(None, el.pre_process())
Ejemplo n.º 44
0
 def derive_nodes(self, xml, processor=None):
     text = ''
     for gid_xml in xml.xpath('./GID'):
         text += '![]({0})'.format(gid_xml.text)
     return [Node(text, label=[mtypes.MARKERLESS])]
Ejemplo n.º 45
0
 def derive_nodes(self, xml, processor=None):
     tagged = tree_utils.get_node_text_tags_preserved(xml).strip()
     return [Node(text=tree_utils.get_node_text(xml).strip(),
                  tagged_text=tagged,
                  label=[mtypes.MARKERLESS])]
Ejemplo n.º 46
0
 def derive_nodes(self, xml, processor=None):
     processor = FlatParagraphProcessor()
     text = (xml.text or '').strip()
     node = Node(text=text, node_type=self.node_type,
                 label=[mtypes.MARKERLESS])
     return [processor.process(xml, node)]
Ejemplo n.º 47
0
 def derive_nodes(self, xml, processor=None):
     return [Node(label=[mtypes.STARS_TAG])]
Ejemplo n.º 48
0
 def test_process_method(self):
     node = Node("The requirements in paragraph (a)(4)(iii) of",
                 label=['1005', '6'])
     citations = self.parser.process(node)
     self.assertEqual(len(citations), 1)
Ejemplo n.º 49
0
 def derive_nodes(self, xml, processor=None):
     processor = SimpleHierarchyProcessor()
     node = Node(label=[mtypes.MARKERLESS],
                 source_xml=xml,
                 node_type=self.node_type)
     return [processor.process(xml, node)]
def process_inner_children(inner_stack, xml_node):
    """Process the following nodes as children of this interpretation. This
    is very similar to reg_text.py:build_from_section()"""
    children = itertools.takewhile(
        lambda x: not is_title(x), xml_node.itersiblings())
    nodes = []
    for xml_node in filter(lambda c: c.tag in ('P', 'STARS'), children):
        node_text = tree_utils.get_node_text(xml_node, add_spaces=True)
        text_with_tags = tree_utils.get_node_text_tags_preserved(xml_node)
        first_marker = get_first_interp_marker(text_with_tags)
        if xml_node.tag == 'STARS':
            nodes.append(Node(label=[mtypes.STARS_TAG]))
        elif not first_marker and nodes:
            logger.warning("Couldn't determine interp marker. Appending to "
                           "previous paragraph: %s", node_text)
            previous = nodes[-1]
            previous.text += "\n\n" + node_text
            if hasattr(previous, 'tagged_text'):
                previous.tagged_text += "\n\n" + text_with_tags
            else:
                previous.tagged_text = text_with_tags
        else:
            collapsed = collapsed_markers_matches(node_text, text_with_tags)

            #   -2 throughout to account for matching the character + period
            ends = [m.end() - 2 for m in collapsed[1:]] + [len(node_text)]
            starts = [m.end() - 2 for m in collapsed] + [len(node_text)]

            #   Node for this paragraph
            n = Node(node_text[0:starts[0]], label=[first_marker],
                     node_type=Node.INTERP)
            n.tagged_text = text_with_tags
            nodes.append(n)
            if n.text.endswith('* * *'):
                nodes.append(Node(label=[mtypes.INLINE_STARS]))

            #   Collapsed-marker children
            for match, end in zip(collapsed, ends):
                marker = match.group(1)
                if marker == '1':
                    marker = '<E T="03">1</E>'
                n = Node(node_text[match.end() - 2:end], label=[marker],
                         node_type=Node.INTERP)
                nodes.append(n)
                if n.text.endswith('* * *'):
                    nodes.append(Node(label=[mtypes.INLINE_STARS]))

    # Trailing stars don't matter; slightly more efficient to ignore them
    while nodes and nodes[-1].label[0] in mtypes.stars:
        nodes = nodes[:-1]

    # Use constraint programming to figure out possible depth assignments
    depths = derive_depths(
        [node.label[0] for node in nodes],
        [rules.depth_type_order([(mtypes.ints, mtypes.em_ints),
                                 (mtypes.roman, mtypes.upper),
                                 mtypes.upper, mtypes.em_ints,
                                 mtypes.em_roman])])
    if depths:
        # Find the assignment which violates the least of our heuristics
        depths = heuristics.prefer_multiple_children(depths, 0.5)
        depths = sorted(depths, key=lambda d: d.weight, reverse=True)
        depths = depths[0]
        for node, par in zip(nodes, depths):
            if par.typ != mtypes.stars:
                last = inner_stack.peek()
                node.label = [l.replace('<E T="03">', '').replace('</E>', '')
                              for l in node.label]
                if len(last) == 0:
                    inner_stack.push_last((3 + par.depth, node))
                else:
                    inner_stack.add(3 + par.depth, node)
Ejemplo n.º 51
0
    def test_process(self):
        root = Node(children=[
            Node("Interp11a", [Node("child1"), Node("child2")],
                 ['102', '11', 'a', Node.INTERP_MARK],
                 node_type=Node.INTERP),
            Node("Interp11c5v",
                 label=['102', '11', 'c', '5', 'v', Node.INTERP_MARK],
                 node_type=Node.INTERP),
            Node("InterpB5ii",
                 label=['102', 'B', '5', 'ii', Node.INTERP_MARK],
                 node_type=Node.INTERP),
            Node(children=[
                Node(children=[
                    Node("Interp9c1",
                         label=['102', '9', 'c', '1', Node.INTERP_MARK],
                         node_type=Node.INTERP)
                ],
                     label=['102'])
            ])
        ])

        interp = Interpretations(root)
        interp.pre_process()
        interp11a = interp.process(Node(label=['102', '11', 'a']))
        interp11c5v = interp.process(Node(label=['102', '11', 'c', '5', 'v']))
        interpB5ii = interp.process(Node(label=['102', 'B', '5', 'ii']))
        interp9c1 = interp.process(Node(label=['102', '9', 'c', '1']))

        self.assertEqual(1, len(interp11a))
        self.assertEqual(1, len(interp11c5v))
        self.assertEqual(1, len(interpB5ii))
        self.assertEqual(1, len(interp9c1))
        self.assertEqual('102-11-a-Interp', interp11a[0]['reference'])
        self.assertEqual('102-11-c-5-v-Interp', interp11c5v[0]['reference'])
        self.assertEqual('102-B-5-ii-Interp', interpB5ii[0]['reference'])
        self.assertEqual('102-9-c-1-Interp', interp9c1[0]['reference'])
        self.assertEqual(None, interp.process(Node(label=["102", "10", "a"])))
Ejemplo n.º 52
0
    def test_add_child_interp(self):
        reg_tree = compiler.RegulationTree(None)
        n1 = Node('n1', label=['205', '1', 'Interp'])
        n5 = Node('n5', label=['205', '5', 'Interp'])
        n9 = Node('n9', label=['205', '9', 'Interp'])
        n10 = Node('n10', label=['205', '10', 'Interp'])

        children = [n1, n5, n10]
        children = reg_tree.add_child(children, n9)
        self.assertEqual(children, [n1, n5, n9, n10])

        n1.label = ['205', '1', 'a', '1', 'i', 'Interp']
        n5.label = ['205', '1', 'a', '1', 'v', 'Interp']
        n9.label = ['205', '1', 'a', '1', 'ix', 'Interp']
        n10.label = ['205', '1', 'a', '1', 'x', 'Interp']
        children = [n1, n5, n10]
        children = reg_tree.add_child(children, n9)
        self.assertEqual(children, [n1, n5, n9, n10])

        n1.label = ['205', '1', 'a', 'Interp', '1', 'i']
        n5.label = ['205', '1', 'a', 'Interp', '1', 'v']
        n9.label = ['205', '1', 'a', 'Interp', '1', 'ix']
        n10.label = ['205', '1', 'a', 'Interp', '1', 'x']
        children = [n1, n5, n10]
        children = reg_tree.add_child(children, n9)
        self.assertEqual(children, [n1, n5, n9, n10])

        n1.label = ['205', '1', 'Interp', '1']
        n5.label = ['205', '1', 'a', 'Interp']
        children = [n1]
        children = reg_tree.add_child(children, n5)
        self.assertEqual(children, [n1, n5])
        children = [n5]
        children = reg_tree.add_child(children, n1)
        self.assertEqual(children, [n1, n5])
Ejemplo n.º 53
0
 def test_cfr_format(self):
     """We aren't processing this form yet"""
     text = "12 CFR 1026.3(d)"
     result = self.parser.process(Node(text, label=['1111']))
     self.assertEqual(None, result)
Ejemplo n.º 54
0
 def test_process(self):
     el = ExampleLayer(Node("other text"))
     self.assertEqual(NotImplemented, el.process(Node("oo")))
Ejemplo n.º 55
0
    def test_to_xml_interp(self):
        """ Test that interpretations get formatted correctly """
        interp_nodes = Node(
            text=u'',
            children=[
                Node(text=u'Interp for section',
                     children=[
                         Node(text=u'Interp targetting reg paragraph',
                              children=[
                                  Node(text=u'A Keyterm. Interp sp.',
                                       children=[],
                                       label=[u'1111',
                                              u'1',
                                              'a',
                                              u'Interp',
                                              u'1'],
                                       title=None,
                                       node_type=u'interp'),
                                  Node(text=u'Lone Keyterm. Or not.',
                                       children=[],
                                       label=[u'1111',
                                              u'1',
                                              'a',
                                              u'Interp',
                                              u'2'],
                                       title=None,
                                       node_type=u'interp'),
                              ],
                              label=[u'1111', u'1', 'a', u'Interp'],
                              title=u'1111.1 (a) Interp',
                              node_type=u'interp'),
                     ],
                     label=[u'1111', u'1', u'Interp'],
                     title=u'1111.1 Interp',
                     node_type=u'interp'),
            ],
            label=[u'1111', u'Interp'],
            title=u'Interpretations',
            node_type=u'interp')

        layers = {
            'terms': {
                "1111-1-a-Interp-2": [{
                    "offsets": [[0, 12]], "ref": "lone keyterm:1111-1-a"
                }],
                'referenced': {}},
            'graphics': {},
            'keyterms': {
                u'1111-1-a-Interp-1': [{'locations': [0],
                                        'key_term': u'A Keyterm.'}],
                u'1111-1-a-Interp-2': [{'locations': [0],
                                        'key_term': u'Lone Keyterm.'}],
            },
            'interpretations': {
                u'1111-1-a': [{'reference': u'1111-1-a-Interp'}],
            },
            'paragraph-markers': {
                u'1111-1-a-Interp-1': [{"text": "1.", "locations": [0]}],
                u'1111-1-a-Interp-2': [{"text": "2.", "locations": [0]}],
            },
        }
        notices = [{
            'document_number': '2015-12345',
        }]

        writer = XMLWriteContent("a/path", '2015-12345',
                                 layers=layers, notices=notices)

        elm = writer.to_xml(interp_nodes)

        interp_para = elm.find(
            './/interpParagraph[@label="1111-1-a-Interp"]')
        interp_sub_paras = interp_para.findall(
            'interpParagraph')

        # Check that paragraph targets are correct.
        self.assertEqual(interp_para.get('target'), '1111-1-a')
        self.assertEqual(interp_sub_paras[0].get('target'), None)

        # Check that title keyterm is correct
        self.assertNotEqual(interp_para.find('title'), None)
        self.assertEqual(interp_sub_paras[0].find('title').get('type'),
                         'keyterm')
        self.assertTrue('A Keyterm.' not in
                        interp_sub_paras[0].find('content').text)

        # For the second sub para there should be a <ref> in <title> and
        # nothing in content
        self.assertEqual(interp_sub_paras[1].find('title').get('type'),
                         'keyterm')
        self.assertTrue(interp_sub_paras[1].find('content').text is None)
        # self.assertTrue(len(interp_sub_paras[1].find('content')) is 0)

        # Check that paragraph markers are correct
        self.assertEqual(interp_para.get('marker'), None)
        self.assertEqual(interp_sub_paras[0].get('marker'), '1.')
        self.assertEqual(interp_sub_paras[1].get('marker'), '2.')
Ejemplo n.º 56
0
    def test_replace_node_and_subtree(self):
        n1 = Node('n1', label=['205', '1'])
        n2 = Node('n2', label=['205', '2'])
        n4 = Node('n4', label=['205', '4'])

        n2a = Node('n2a', label=['205', '2', 'a'])
        n2b = Node('n2b', label=['205', '2', 'b'])
        n2.children = [n2a, n2b]

        root = Node('', label=['205'])
        root.children = [n1, n2, n4]

        reg_tree = compiler.RegulationTree(root)

        a2 = Node('a2', label=['205', '2'])
        a2e = Node('a2e', label=['205', '2', 'e'])
        a2f = Node('a2f', label=['205', '2', 'f'])
        a2.children = [a2e, a2f]

        reg_tree.replace_node_and_subtree(a2)

        new_tree = Node('', label=[205])
        new_tree.children = [n1, a2, n4]

        self.assertEqual(new_tree, reg_tree.tree)
        self.assertEqual(None, find(reg_tree.tree, '205-2-a'))
Ejemplo n.º 57
0
    def test_look_for_defs(self, node_definitions):
        """We should be walking through the tree to find terms. Test this by
        documenting which nodes are touched. We should be _ignoring_ certain
        subtrees (notable, any which aren't associated w/ regtext)"""
        node_definitions.side_effect = lambda n, _: ([], [n.label_id()])
        t = Terms(None)

        root = Node(label=['111'], children=[
            Node(label=['111', 'Subpart'], node_type=Node.EMPTYPART, children=[
                Node(label=['111', '1'], children=[
                    Node(label=['111', '1', 'a']),
                    Node(label=['111', '1', 'b']),
                    Node(label=['111', '1', 'c'])]),
                Node(label=['111', '2'], children=[
                    Node(label=['111', '2', 'p1'], node_type=Node.EXTRACT,
                         children=[Node(label=['111', '2', 'p1', 'p1'])])
                ])]),
            Node(label=['111', 'A'], node_type=Node.APPENDIX, children=[
                Node(label=['111', 'A', '1'], node_type=Node.APPENDIX)])])
        t.look_for_defs(root)
        self.assertItemsEqual(
            t.scoped_terms['EXCLUDED'],
            # note the absence of APPENDIX, and anything below an EXTRACT
            ['111', '111-Subpart', '111-1', '111-1-a', '111-1-b', '111-1-c',
             '111-2'])
Ejemplo n.º 58
0
def process_inner_children(inner_stack, xml_node):
    """Process the following nodes as children of this interpretation. This
    is very similar to reg_text.py:build_from_section()"""
    # manual hierarchy should work here too
    manual_hierarchy_flag = False
    try:
        part_and_section = re.search('[0-9]+\.[0-9]+', xml_node.text).group(0)
        part, section = part_and_section.split('.')
        part_and_section += '-Interp'

        if part in PARAGRAPH_HIERARCHY and part_and_section in PARAGRAPH_HIERARCHY[
                part]:
            manual_hierarchy_flag = True
    except Exception:
        pass

    children = itertools.takewhile(lambda x: not is_title(x),
                                   xml_node.itersiblings())
    nodes = []
    for i, xml_node in enumerate(
            filter(lambda c: c.tag in ('P', 'STARS'), children)):
        node_text = tree_utils.get_node_text(xml_node, add_spaces=True)
        text_with_tags = tree_utils.get_node_text_tags_preserved(xml_node)
        first_marker = get_first_interp_marker(text_with_tags)
        if xml_node.tag == 'STARS':
            nodes.append(Node(label=[mtypes.STARS_TAG]))
        elif not first_marker and nodes:
            logging.warning(
                "Couldn't determine interp marker. "
                "Appending node and hoping that manual hierarchy is specified")

            n = Node(node_text, label=[str(i)], node_type=Node.INTERP)
            n.tagged_text = text_with_tags
            nodes.append(n)

        else:
            collapsed = collapsed_markers_matches(node_text, text_with_tags)

            #   -2 throughout to account for matching the character + period
            ends = [m.end() - 2 for m in collapsed[1:]] + [len(node_text)]
            starts = [m.end() - 2 for m in collapsed] + [len(node_text)]

            #   Node for this paragraph
            n = Node(node_text[0:starts[0]],
                     label=[first_marker],
                     node_type=Node.INTERP)
            n.tagged_text = text_with_tags
            nodes.append(n)
            if n.text.endswith('* * *'):
                nodes.append(Node(label=[mtypes.INLINE_STARS]))

            #   Collapsed-marker children
            for match, end in zip(collapsed, ends):
                marker = match.group(1)
                if marker == '1':
                    marker = '<E T="03">1</E>'
                n = Node(node_text[match.end() - 2:end],
                         label=[marker],
                         node_type=Node.INTERP)
                nodes.append(n)
                if n.text.endswith('* * *'):
                    nodes.append(Node(label=[mtypes.INLINE_STARS]))

    # Trailing stars don't matter; slightly more efficient to ignore them
    while nodes and nodes[-1].label[0] in mtypes.stars:
        nodes = nodes[:-1]

    # Use constraint programming to figure out possible depth assignments
    # use manual hierarchy if it's specified
    if not manual_hierarchy_flag:
        depths = derive_depths([n.label[0] for n in nodes], [
            rules.depth_type_order([(mtypes.ints, mtypes.em_ints),
                                    (mtypes.roman, mtypes.upper), mtypes.upper,
                                    mtypes.em_ints, mtypes.em_roman])
        ])

    if not manual_hierarchy_flag and depths:
        # Find the assignment which violates the least of our heuristics
        depths = heuristics.prefer_multiple_children(depths, 0.5)
        depths = sorted(depths, key=lambda d: d.weight, reverse=True)
        depths = depths[0]
        for node, par in zip(nodes, depths):
            if par.typ != mtypes.stars:
                last = inner_stack.peek()
                node.label = [
                    l.replace('<E T="03">', '').replace('</E>', '')
                    for l in node.label
                ]
                if len(last) == 0:
                    inner_stack.push_last((3 + par.depth, node))
                else:
                    inner_stack.add(3 + par.depth, node)
    elif nodes and manual_hierarchy_flag:
        logging.warning('Using manual depth hierarchy.')
        depths = PARAGRAPH_HIERARCHY[part][part_and_section]
        if len(nodes) == len(depths):
            for node, depth in zip(nodes, depths):
                last = inner_stack.peek()
                node.label = [
                    l.replace('<E T="03">', '').replace('</E>', '')
                    for l in node.label
                ]
                if len(last) == 0:
                    inner_stack.push_last((3 + depth, node))
                else:
                    inner_stack.add(3 + depth, node)
        else:
            logging.error(
                'Manual hierarchy length does not match node list length!')

    elif nodes and not manual_hierarchy_flag:
        logging.warning('Could not derive depth (interp):\n {}'.format(
            [n.label[0] for n in nodes]))
        # just add nodes in sequential order then
        for node in nodes:
            last = inner_stack.peek()
            node.label = [
                l.replace('<E T="03">', '').replace('</E>', '')
                for l in node.label
            ]
            if len(last) == 0:
                inner_stack.push_last((3, node))
            else:
                inner_stack.add(3, node)
Ejemplo n.º 59
0
 def paragraph_no_marker(self, text):
     """The paragraph has no (a) or a. etc."""
     self.paragraph_counter += 1
     n = Node(text, node_type=Node.APPENDIX,
              label=['p' + str(self.paragraph_counter)])
     self.nodes.append(n)
Ejemplo n.º 60
0
 def derive_nodes(self, xml, processor=None):
     # This should match HD elements only at lower levels, and for now we'll
     # just put them into the titles
     return [Node(text='', title=tree_utils.get_node_text(xml).strip(),
                  label=[mtypes.MARKERLESS])]