def test_get_markers_and_text_emph(self):
        text = '(A) aaaa. (<E T="03">1</E>) 1111'
        xml = etree.fromstring('<P>%s</P>' % text)
        markers = reg_text.get_markers(text)
        result = reg_text.get_markers_and_text(xml, markers)

        a, a1 = result
        self.assertEqual(('A', ('(A) aaaa. ', '(A) aaaa. ')), a)
        self.assertEqual(('<E T="03">1</E>', ('(1) 1111',
                                              '(<E T="03">1</E>) 1111')), a1)
Exemple #2
0
 def test_get_markers_collapsed(self):
     """Only find collapsed markers if they are followed by a marker in
     sequence"""
     text = u'(a) <E T="03">aaa</E>—(1) 111. (i) iii'
     self.assertEqual(reg_text.get_markers(text), ['a'])
     self.assertEqual(reg_text.get_markers(text, 'b'), ['a'])
     self.assertEqual(reg_text.get_markers(text, 'A'), ['a', '1', 'i'])
     self.assertEqual(reg_text.get_markers(text, 'ii'), ['a', '1', 'i'])
     self.assertEqual(reg_text.get_markers(text, mtypes.STARS_TAG),
                      ['a', '1', 'i'])
     self.assertEqual(reg_text.get_markers(text, '2'), ['a', '1'])
 def test_get_markers_collapsed(self):
     """Only find collapsed markers if they are followed by a marker in
     sequence"""
     text = u'(a) <E T="03">aaa</E>—(1) 111. (i) iii'
     self.assertEqual(reg_text.get_markers(text), ['a'])
     self.assertEqual(reg_text.get_markers(text, 'b'), ['a'])
     self.assertEqual(reg_text.get_markers(text, 'A'), ['a', '1', 'i'])
     self.assertEqual(reg_text.get_markers(text, 'ii'), ['a', '1', 'i'])
     self.assertEqual(reg_text.get_markers(text, mtypes.STARS_TAG),
                      ['a', '1', 'i'])
     self.assertEqual(reg_text.get_markers(text, '2'), ['a', '1'])
Exemple #4
0
 def get_subsections_for_paragraph(self, paragraph, next_paragraph):
     subsections = []
     if next_paragraph:
         next_markers = initial_markers(next_paragraph)
         if len(next_markers) > 0:
             next_marker = next_markers[0]
         else:
             next_marker = "MARKERLESS"
     else:
         next_marker = None
     markers = get_markers(paragraph, next_marker)
     if not markers:
         subsections.append(('MARKERLESS', paragraph))
     else:
         marker_regex = ".*" + "(\( ?%s ?\).*)"*len(markers) % tuple(markers)
         match = re.match(marker_regex, paragraph, re.S)
         subsections.extend(zip(markers, match.groups()))
     return subsections
Exemple #5
0
    def test_get_markers_and_text(self):
        text = u'(a) <E T="03">Transfer </E>—(1) <E T="03">Notice.</E> follow'
        wrap = '<P>%s</P>' % text

        doc = etree.fromstring(wrap)
        markers = reg_text.get_markers(text)
        result = reg_text.get_markers_and_text(doc, markers)

        markers = [r[0] for r in result]
        self.assertEqual(markers, [u'a', u'1'])

        text = [r[1][0] for r in result]
        self.assertEqual(text, [u'(a) Transfer —', u'(1) Notice. follow'])

        tagged = [r[1][1] for r in result]
        self.assertEqual(tagged, [
            u'(a) <E T="03">Transfer </E>—',
            u'(1) <E T="03">Notice.</E> follow'
        ])
    def test_get_markers_and_text(self):
        text = u'(a) <E T="03">Transfer </E>—(1) <E T="03">Notice.</E> follow'
        wrap = '<P>%s</P>' % text

        doc = etree.fromstring(wrap)
        markers = reg_text.get_markers(text)
        result = reg_text.get_markers_and_text(doc, markers)

        markers = [r[0] for r in result]
        self.assertEqual(markers, [u'a', u'1'])

        text = [r[1][0] for r in result]
        self.assertEqual(text, [u'(a) Transfer —', u'(1) Notice. follow'])

        tagged = [r[1][1] for r in result]
        self.assertEqual(
            tagged,
            [u'(a) <E T="03">Transfer </E>—',
             u'(1) <E T="03">Notice.</E> follow'])
 def get_subsections_for_paragraph(self, paragraph, next_paragraph):
     subsections = []
     if next_paragraph:
         next_markers = initial_markers(next_paragraph)
         if len(next_markers) > 0:
             next_marker = next_markers[0]
         else:
             next_marker = "MARKERLESS"
     else:
         next_marker = None
     markers = get_markers(paragraph, next_marker)
     if not markers:
         subsections.append(('MARKERLESS', paragraph))
     else:
         tail = paragraph
         for marker in markers:
             head, tail = self.split_text_by_marker(marker, tail)
             subsections.append((marker, tail))
     return subsections
Exemple #8
0
 def test_get_markers(self):
     text = u'(a) <E T="03">Transfer </E>—(1) <E T="03">Notice.</E> follow'
     markers = reg_text.get_markers(text)
     self.assertEqual(markers, [u'a', u'1'])
 def test_get_markers_bad_citation(self):
     text = '(vi)<E T="03">Keyterm.</E>The information required by '
     text += 'paragraphs (a)(2), (a)(4)(iii), (a)(5), (b) through (d), '
     text += '(f), and (g) with respect to something, (i), (j), (l) '
     text += 'through (p), (q)(1), and (r) with respect to something.'
     self.assertEqual(['vi'], reg_text.get_markers(text))
 def test_get_markers(self):
     text = u'(a) <E T="03">Transfer </E>—(1) <E T="03">Notice.</E> follow'
     markers = reg_text.get_markers(text)
     self.assertEqual(markers, [u'a', u'1'])
text = """(b)(1) Pursuant to 5 U.S.C. 552a(j)(2), records contained in FEC 12,"""
"""
Office of Inspector General Investigative Files, are exempt from the
provisions of 5 U.S.C. 552a, except subsections (b), (c) (1) and (2),
(e)(4) (A) through (F), (e) (6), (7), (9), (10), and (11) and (f) , and
the corresponding provisions of 11 CFR part 1, to the extent this system
of records relates in any way to the enforcement of criminal laws."""

text = """(d) Meeting. (1) Meeting means the deliberation of at least
four voting members of the Commission in collegia where such deliberations
determine or result in the joint conduct or disposition of official Commission
business. For the purpose of this section, joint conduct does not include,
for example, situations where the requisite number of members is physically
present in one place but not conducting agency business as a body
(e.g., at a meeting at which one member is giving a speech while a
number of other members are present in the audience).
A deliberation conducted through telephone or similar
communications equipment by means\n\nof which all persons
participating can hear each other will be considered a
meeting under this section."""

text = """    (d) Meeting. (1) Meeting means the deliberation of at least four
voting members of the Commission in collegia where such deliberations
determine or result in the joint conduct or disposition of official
Commission business."""

print(get_markers(text))
print(any_depth_p.parseString(text))
print(collapsed_markers(text))