def test_make_html(self): """Can we make basic HTML conversions properly?""" good_html = ( '<pre class="inline">asdf </pre><span class="citation ' 'no-link"><span class="volume">22</span> <span ' 'class="reporter">U.S.</span> <span class="page">33</span>' '</span><pre class="inline"> asdf</pre>') # Simple example s = 'asdf 22 U.S. 33 asdf' opinion = Opinion(plain_text=s) citations = get_citations(s) new_html = create_cited_html(opinion, citations) self.assertEqual( good_html, new_html, ) # Using a variant format for U.S. (Issue #409) s = 'asdf 22 U. S. 33 asdf' opinion = Opinion(plain_text=s) citations = get_citations(s) new_html = create_cited_html(opinion, citations) self.assertEqual( good_html, new_html, )
def test_make_html(self): """Can we make basic HTML conversions properly?""" good_html = ('<pre class="inline">asdf </pre><span class="citation ' 'no-link"><span class="volume">22</span> <span ' 'class="reporter">U.S.</span> <span class="page">33</span>' '</span><pre class="inline"> asdf</pre>') # Simple example s = 'asdf 22 U.S. 33 asdf' opinion = Opinion(plain_text=s) citations = get_citations(s) new_html = create_cited_html(opinion, citations) self.assertEqual( good_html, new_html, ) # Using a variant format for U.S. (Issue #409) s = 'asdf 22 U. S. 33 asdf' opinion = Opinion(plain_text=s) citations = get_citations(s) new_html = create_cited_html(opinion, citations) self.assertEqual( good_html, new_html, )
def test_make_html_from_matched_citation_objects(self) -> None: """Can we render matched citation objects as HTML?""" # This test case is similar to the two above, except it allows us to # test the rendering of citation objects that we assert are correctly # matched. (No matching is performed in the previous cases.) # fmt: off test_triples = [ # Id. citation with page number ("Id., at 123, 124") ('asdf, Id., at 123, 124. Lorem ipsum dolor sit amet', IdCitation(id_token='Id.,', after_tokens=['at', '123', '124'], has_page=True), '<pre class="inline">asdf</pre><span class="citation" data-id="' 'MATCH_ID">, <a href="MATCH_URL"><span class="id_token">Id.,' '</span> at 123, 124</a></span><pre class="inline">. Lorem ipsum' ' dolor sit amet</pre>'), # Id. citation with complex page number ("Id. @ 123:1, ¶¶ 124") ('asdf, Id. @ 123:1, ¶¶ 124. Lorem ipsum dolor sit amet', IdCitation(id_token='Id.', after_tokens=['@', '123:1', '¶¶', '124'], has_page=True), '<pre class="inline">asdf</pre><span class="citation" data-id="' 'MATCH_ID">, <a href="MATCH_URL"><span class="id_token">Id.' '</span> @ 123:1, ¶¶ 124</a></span><pre class="inline">. Lorem ' 'ipsum dolor sit amet</pre>'), # Id. citation without page number ("Id. Something else") ('asdf, Id. Lorem ipsum dolor sit amet', IdCitation(id_token='Id.', after_tokens=['Lorem', 'ipsum'], has_page=False), '<pre class="inline">asdf</pre><span class="citation" data-id=' '"MATCH_ID">, <a href="MATCH_URL"><span class="id_token">Id.' '</span></a> Lorem ipsum </span><pre class="inline">dolor sit ' 'amet</pre>'), ] # fmt: on for plain_text, citation, expected_html in test_triples: print( "Testing object to HTML rendering for %s..." % plain_text, end=" ", ) citation.match_url = "MATCH_URL" citation.match_id = "MATCH_ID" opinion = Opinion(plain_text=plain_text) created_html = create_cited_html(opinion, [citation]) self.assertEqual( created_html, expected_html, msg="\n%s\n\n !=\n\n%s" % (created_html, expected_html), ) print("✓")
def test_make_html_from_html(self) -> None: """Can we convert the HTML of an opinion into modified HTML?""" # fmt: off test_pairs = [ # Id. citation with HTML tags ('<div><p>the improper views of the Legislature.\" 2 <i>id.,</i> ' 'at 73.</p>\n<p>Nathaniel Gorham of Massachusetts</p></div>', '<div><p>the improper views of the Legislature." 2<span class="' 'citation no-link"> <i><span class="id_token">id.,</span></i> at ' '73.</span></p>\n<p>Nathaniel Gorham of Massachusetts</p></div>'), # Id. citation with an intervening HTML tag # (We expect the HTML to be unchanged, since it's too risky to # modify with another tag in the way) ('<div><p>the improper views of the Legislature.\" 2 <i>id.,</i> ' 'at <b>73, bolded</b>.</p>\n<p>Nathaniel Gorham of Massachusetts' '</p></div>', '<div><p>the improper views of the Legislature.\" 2 <i>id.,</i> ' 'at <b>73, bolded</b>.</p>\n<p>Nathaniel Gorham of Massachusetts' '</p></div>'), # Ibid. citation with HTML tags ('<div><p>possess any peculiar knowledge of the mere policy of ' 'public measures.\" <i>Ibid.</i> Gerry of Massachusetts ' 'like</p></div>', '<div><p>possess any peculiar knowledge of the mere policy of ' 'public measures."<span class="citation no-link"> <i><span class=' '"id_token">Ibid.</span></i> Gerry of Massachusetts </span>like' '</p></div>'), ] # fmt: on for s, expected_html in test_pairs: print("Testing html to html conversion for %s..." % s, end=" ") opinion = Opinion(html=s) citations = get_citations(s, clean=("html", "whitespace")) created_html = create_cited_html(opinion, citations) self.assertEqual( created_html, expected_html, msg="\n%s\n\n !=\n\n%s" % (created_html, expected_html), ) print("✓")
def test_make_html_from_plain_text(self) -> None: """Can we convert the plain text of an opinion into HTML?""" # fmt: off full_citation_html = ('<pre class="inline">asdf </pre><span class="' 'citation no-link"><span class="volume">22' '</span> <span class="reporter">U.S.</span> ' '<span class="page">33</span> </span><pre class=' '"inline">asdf</pre>') test_pairs = [ # Simple example for full citations ('asdf 22 U.S. 33 asdf', full_citation_html), # Using a variant format for U.S. (Issue #409) ('asdf 22 U. S. 33 asdf', full_citation_html), # Full citation across line break ('asdf John v. Doe, 123\nU.S. 456, upholding foo bar', '<pre class="inline">asdf John v. Doe, </pre><span class="' 'citation no-link"><span class="volume">123</span>\n<span class=' '"reporter">U.S.</span> <span class="page">456</span></span><pre' ' class="inline">, upholding foo bar</pre>'), # Basic short form citation ('existing text asdf, 515 U.S., at 240. foobar', '<pre class="inline">existing text </pre><span class="citation ' 'no-link"><span class="antecedent_guess">asdf,</span> <span ' 'class="volume">515</span> <span class="reporter">U.S.</span>, ' 'at <span class="page">240</span></span><pre class="inline">. ' 'foobar</pre>'), # Short form citation with no comma after reporter in original ('existing text asdf, 1 U. S. at 2. foobar', '<pre class="inline">existing text </pre><span class="citation ' 'no-link"><span class="antecedent_guess">asdf,</span> <span class' '="volume">1</span> <span class="reporter">U.S.</span> at <span ' 'class="page">2</span></span><pre class="inline">. foobar</pre>'), # Short form citation across line break ('asdf.’ ” 123 \n U.S., at 456. Foo bar foobar', '<pre class="inline">asdf.’ </pre><span class="' 'citation no-link"><span class="antecedent_guess">”' '</span> <span class="volume">123</span> \n <span class=' '"reporter">U.S.</span>, at <span class="page">456</span></span>' '<pre class="inline">. Foo bar foobar</pre>'), # First kind of supra citation (standard kind) ('existing text asdf, supra, at 2. foobar', '<pre class="inline">existing text </pre><span class="citation ' 'no-link"><span class="antecedent_guess">asdf,</span> supra, at ' '<span class="page">2</span></span><pre class="inline">. foobar' '</pre>'), # Second kind of supra citation (with volume) ('existing text asdf, 123 supra, at 2. foo bar', '<pre class="inline">existing text </pre><span class="citation ' 'no-link"><span class="antecedent_guess">asdf,</span> <span ' 'class="volume">123</span> supra, at <span class="page">2</span>' '</span><pre class="inline">. foo bar</pre>'), # Third kind of supra citation (sans page) ('existing text asdf, supra, foo bar', '<pre class="inline">existing text </pre><span class="citation ' 'no-link"><span class="antecedent_guess">asdf,</span> supra' '</span><pre class="inline">, foo bar</pre>'), # Fourth kind of supra citation (with period) ('existing text asdf, supra. foo bar', '<pre class="inline">existing text </pre><span class="citation ' 'no-link"><span class="antecedent_guess">asdf,</span> supra' '</span><pre class="inline">. foo bar</pre>'), # Supra citation across line break ('existing text asdf, supra, at\n99 (quoting foo)', '<pre class="inline">existing text </pre><span class="citation ' 'no-link"><span class="antecedent_guess">asdf,</span> supra, ' 'at\n<span class="page">99</span> </span><pre class="inline">' '(quoting foo)</pre>'), # Id. citation ("Id., at 123") ('asdf, id., at 123. Lorem ipsum dolor sit amet', '<pre class="inline">asdf</pre><span class="citation no-link">, ' '<span class="id_token">id.,</span> at 123. </span><pre class="' 'inline">Lorem ipsum dolor sit amet</pre>'), # Duplicate Id. citation ('asd, id., at 123. Lo rem ip sum. asdf, id., at 123. Lo rem ip.', '<pre class="inline">asd</pre><span class="citation no-link">, ' '<span class="id_token">id.,</span> at 123. </span><pre class="' 'inline">Lo rem ip sum. asdf</pre><span class="citation ' 'no-link">, <span class="id_token">id.,</span> at 123. </span>' '<pre class="inline">Lo rem ip.</pre>'), # Id. citation across line break ('asdf." Id., at 315.\n Lorem ipsum dolor sit amet', '<pre class="inline">asdf."</pre><span class="citation no-link"> ' '<span class="id_token">Id.,</span> at 315.\n</span><pre class="' 'inline"> Lorem ipsum dolor sit amet</pre>'), # Ibid. citation ("... Ibid.") ('asdf, Ibid. Lorem ipsum dolor sit amet', '<pre class="inline">asdf</pre><span class="citation no-link">, ' '<span class="id_token">Ibid.</span> Lorem ipsum dolor </span>' '<pre class="inline">sit amet</pre>'), # NonopinionCitation (currently nothing should happen here) ('Lorem ipsum dolor sit amet. U.S. Code §3617. Foo bar.', '<pre class="inline">Lorem ipsum dolor sit amet. U.S. Code ' '§3617. Foo bar.</pre>'), ] # fmt: on for s, expected_html in test_pairs: print("Testing plain text to html conversion for %s..." % s, end=" ") opinion = Opinion(plain_text=s) citations = get_citations(s) created_html = create_cited_html(opinion, citations) self.assertEqual( created_html, expected_html, msg="\n%s\n\n !=\n\n%s" % (created_html, expected_html), ) print("✓")