예제 #1
0
    def test_make_html(self):
        """Can we make basic HTML conversions properly?"""
        good_html = (
            '<pre class="inline">asdf </pre><span class="citation '
            'no-link"><span class="volume">22</span> <span '
            'class="reporter">U.S.</span> <span class="page">33</span>'
            '</span><pre class="inline"> asdf</pre>')

        # Simple example
        s = 'asdf 22 U.S. 33 asdf'
        opinion = Opinion(plain_text=s)
        citations = get_citations(s)
        new_html = create_cited_html(opinion, citations)
        self.assertEqual(
            good_html,
            new_html,
        )

        # Using a variant format for U.S. (Issue #409)
        s = 'asdf 22 U. S. 33 asdf'
        opinion = Opinion(plain_text=s)
        citations = get_citations(s)
        new_html = create_cited_html(opinion, citations)
        self.assertEqual(
            good_html,
            new_html,
        )
예제 #2
0
    def test_make_html(self):
        """Can we make basic HTML conversions properly?"""
        good_html = ('<pre class="inline">asdf </pre><span class="citation '
                     'no-link"><span class="volume">22</span> <span '
                     'class="reporter">U.S.</span> <span class="page">33</span>'
                     '</span><pre class="inline"> asdf</pre>')

        # Simple example
        s = 'asdf 22 U.S. 33 asdf'
        opinion = Opinion(plain_text=s)
        citations = get_citations(s)
        new_html = create_cited_html(opinion, citations)
        self.assertEqual(
            good_html,
            new_html,
        )

        # Using a variant format for U.S. (Issue #409)
        s = 'asdf 22 U. S. 33 asdf'
        opinion = Opinion(plain_text=s)
        citations = get_citations(s)
        new_html = create_cited_html(opinion, citations)
        self.assertEqual(
            good_html,
            new_html,
        )
예제 #3
0
    def test_make_html_from_matched_citation_objects(self) -> None:
        """Can we render matched citation objects as HTML?"""
        # This test case is similar to the two above, except it allows us to
        # test the rendering of citation objects that we assert are correctly
        # matched. (No matching is performed in the previous cases.)
        # fmt: off

        test_triples = [
            # Id. citation with page number ("Id., at 123, 124")
            ('asdf, Id., at 123, 124. Lorem ipsum dolor sit amet',
             IdCitation(id_token='Id.,',
                        after_tokens=['at', '123', '124'],
                        has_page=True),
             '<pre class="inline">asdf</pre><span class="citation" data-id="'
             'MATCH_ID">, <a href="MATCH_URL"><span class="id_token">Id.,'
             '</span> at 123, 124</a></span><pre class="inline">. Lorem ipsum'
             ' dolor sit amet</pre>'),

            # Id. citation with complex page number ("Id. @ 123:1, ¶¶ 124")
            ('asdf, Id. @ 123:1, ¶¶ 124. Lorem ipsum dolor sit amet',
             IdCitation(id_token='Id.',
                        after_tokens=['@', '123:1', '¶¶', '124'],
                        has_page=True),
             '<pre class="inline">asdf</pre><span class="citation" data-id="'
             'MATCH_ID">, <a href="MATCH_URL"><span class="id_token">Id.'
             '</span> @ 123:1, ¶¶ 124</a></span><pre class="inline">. Lorem '
             'ipsum dolor sit amet</pre>'),

            # Id. citation without page number ("Id. Something else")
            ('asdf, Id. Lorem ipsum dolor sit amet',
             IdCitation(id_token='Id.',
                        after_tokens=['Lorem', 'ipsum'],
                        has_page=False),
             '<pre class="inline">asdf</pre><span class="citation" data-id='
             '"MATCH_ID">, <a href="MATCH_URL"><span class="id_token">Id.'
             '</span></a> Lorem ipsum </span><pre class="inline">dolor sit '
             'amet</pre>'),
        ]

        # fmt: on
        for plain_text, citation, expected_html in test_triples:
            print(
                "Testing object to HTML rendering for %s..." % plain_text,
                end=" ",
            )
            citation.match_url = "MATCH_URL"
            citation.match_id = "MATCH_ID"
            opinion = Opinion(plain_text=plain_text)
            created_html = create_cited_html(opinion, [citation])
            self.assertEqual(
                created_html,
                expected_html,
                msg="\n%s\n\n    !=\n\n%s" % (created_html, expected_html),
            )
            print("✓")
예제 #4
0
    def test_make_html_from_html(self) -> None:
        """Can we convert the HTML of an opinion into modified HTML?"""
        # fmt: off

        test_pairs = [
            # Id. citation with HTML tags
            ('<div><p>the improper views of the Legislature.\" 2 <i>id.,</i> '
             'at 73.</p>\n<p>Nathaniel Gorham of Massachusetts</p></div>',
             '<div><p>the improper views of the Legislature." 2<span class="'
             'citation no-link"> <i><span class="id_token">id.,</span></i> at '
             '73.</span></p>\n<p>Nathaniel Gorham of Massachusetts</p></div>'),

            # Id. citation with an intervening HTML tag
            #  (We expect the HTML to be unchanged, since it's too risky to
            #   modify with another tag in the way)
            ('<div><p>the improper views of the Legislature.\" 2 <i>id.,</i> '
             'at <b>73, bolded</b>.</p>\n<p>Nathaniel Gorham of Massachusetts'
             '</p></div>',
             '<div><p>the improper views of the Legislature.\" 2 <i>id.,</i> '
             'at <b>73, bolded</b>.</p>\n<p>Nathaniel Gorham of Massachusetts'
             '</p></div>'),

            # Ibid. citation with HTML tags
            ('<div><p>possess any peculiar knowledge of the mere policy of '
             'public measures.\" <i>Ibid.</i> Gerry of Massachusetts '
             'like</p></div>',
             '<div><p>possess any peculiar knowledge of the mere policy of '
             'public measures."<span class="citation no-link"> <i><span class='
             '"id_token">Ibid.</span></i> Gerry of Massachusetts </span>like'
             '</p></div>'),
        ]

        # fmt: on
        for s, expected_html in test_pairs:
            print("Testing html to html conversion for %s..." % s, end=" ")
            opinion = Opinion(html=s)
            citations = get_citations(s, clean=("html", "whitespace"))
            created_html = create_cited_html(opinion, citations)
            self.assertEqual(
                created_html,
                expected_html,
                msg="\n%s\n\n    !=\n\n%s" % (created_html, expected_html),
            )
            print("✓")
예제 #5
0
    def test_make_html_from_plain_text(self) -> None:
        """Can we convert the plain text of an opinion into HTML?"""
        # fmt: off

        full_citation_html = ('<pre class="inline">asdf </pre><span class="'
                              'citation no-link"><span class="volume">22'
                              '</span> <span class="reporter">U.S.</span> '
                              '<span class="page">33</span> </span><pre class='
                              '"inline">asdf</pre>')
        test_pairs = [
            # Simple example for full citations
            ('asdf 22 U.S. 33 asdf', full_citation_html),

            # Using a variant format for U.S. (Issue #409)
            ('asdf 22 U. S. 33 asdf', full_citation_html),

            # Full citation across line break
            ('asdf John v. Doe, 123\nU.S. 456, upholding foo bar',
             '<pre class="inline">asdf John v. Doe, </pre><span class="'
             'citation no-link"><span class="volume">123</span>\n<span class='
             '"reporter">U.S.</span> <span class="page">456</span></span><pre'
             ' class="inline">, upholding foo bar</pre>'),

            # Basic short form citation
            ('existing text asdf, 515 U.S., at 240. foobar',
             '<pre class="inline">existing text </pre><span class="citation '
             'no-link"><span class="antecedent_guess">asdf,</span> <span '
             'class="volume">515</span> <span class="reporter">U.S.</span>, '
             'at <span class="page">240</span></span><pre class="inline">. '
             'foobar</pre>'),

            # Short form citation with no comma after reporter in original
            ('existing text asdf, 1 U. S. at 2. foobar',
             '<pre class="inline">existing text </pre><span class="citation '
             'no-link"><span class="antecedent_guess">asdf,</span> <span class'
             '="volume">1</span> <span class="reporter">U.S.</span> at <span '
             'class="page">2</span></span><pre class="inline">. foobar</pre>'),

            # Short form citation across line break
            ('asdf.’ ” 123 \n U.S., at 456. Foo bar foobar',
             '<pre class="inline">asdf.’ </pre><span class="'
             'citation no-link"><span class="antecedent_guess">”'
             '</span> <span class="volume">123</span> \n <span class='
             '"reporter">U.S.</span>, at <span class="page">456</span></span>'
             '<pre class="inline">. Foo bar foobar</pre>'),

            # First kind of supra citation (standard kind)
            ('existing text asdf, supra, at 2. foobar',
             '<pre class="inline">existing text </pre><span class="citation '
             'no-link"><span class="antecedent_guess">asdf,</span> supra, at '
             '<span class="page">2</span></span><pre class="inline">. foobar'
             '</pre>'),

            # Second kind of supra citation (with volume)
            ('existing text asdf, 123 supra, at 2. foo bar',
             '<pre class="inline">existing text </pre><span class="citation '
             'no-link"><span class="antecedent_guess">asdf,</span> <span '
             'class="volume">123</span> supra, at <span class="page">2</span>'
             '</span><pre class="inline">. foo bar</pre>'),

            # Third kind of supra citation (sans page)
            ('existing text asdf, supra, foo bar',
             '<pre class="inline">existing text </pre><span class="citation '
             'no-link"><span class="antecedent_guess">asdf,</span> supra'
             '</span><pre class="inline">, foo bar</pre>'),

            # Fourth kind of supra citation (with period)
            ('existing text asdf, supra. foo bar',
             '<pre class="inline">existing text </pre><span class="citation '
             'no-link"><span class="antecedent_guess">asdf,</span> supra'
             '</span><pre class="inline">. foo bar</pre>'),

            # Supra citation across line break
            ('existing text asdf, supra, at\n99 (quoting foo)',
             '<pre class="inline">existing text </pre><span class="citation '
             'no-link"><span class="antecedent_guess">asdf,</span> supra, '
             'at\n<span class="page">99</span> </span><pre class="inline">'
             '(quoting foo)</pre>'),

            # Id. citation ("Id., at 123")
            ('asdf, id., at 123. Lorem ipsum dolor sit amet',
             '<pre class="inline">asdf</pre><span class="citation no-link">, '
             '<span class="id_token">id.,</span> at 123. </span><pre class="'
             'inline">Lorem ipsum dolor sit amet</pre>'),

            # Duplicate Id. citation
            ('asd, id., at 123. Lo rem ip sum. asdf, id., at 123. Lo rem ip.',
             '<pre class="inline">asd</pre><span class="citation no-link">, '
             '<span class="id_token">id.,</span> at 123. </span><pre class="'
             'inline">Lo rem ip sum. asdf</pre><span class="citation '
             'no-link">, <span class="id_token">id.,</span> at 123. </span>'
             '<pre class="inline">Lo rem ip.</pre>'),

            # Id. citation across line break
            ('asdf." Id., at 315.\n       Lorem ipsum dolor sit amet',
             '<pre class="inline">asdf."</pre><span class="citation no-link"> '
             '<span class="id_token">Id.,</span> at 315.\n</span><pre class="'
             'inline">       Lorem ipsum dolor sit amet</pre>'),

            # Ibid. citation ("... Ibid.")
            ('asdf, Ibid. Lorem ipsum dolor sit amet',
             '<pre class="inline">asdf</pre><span class="citation no-link">, '
             '<span class="id_token">Ibid.</span> Lorem ipsum dolor </span>'
             '<pre class="inline">sit amet</pre>'),

            # NonopinionCitation (currently nothing should happen here)
            ('Lorem ipsum dolor sit amet. U.S. Code §3617. Foo bar.',
             '<pre class="inline">Lorem ipsum dolor sit amet. U.S. Code '
             '§3617. Foo bar.</pre>'),
        ]

        # fmt: on
        for s, expected_html in test_pairs:
            print("Testing plain text to html conversion for %s..." % s,
                  end=" ")
            opinion = Opinion(plain_text=s)
            citations = get_citations(s)
            created_html = create_cited_html(opinion, citations)
            self.assertEqual(
                created_html,
                expected_html,
                msg="\n%s\n\n    !=\n\n%s" % (created_html, expected_html),
            )
            print("✓")