def testHTML2Text(self): """Test correct splitting of HTML""" text = html2text("<div>line1\r\nline2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1\r\n line2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1 \r\n line2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1 \r \n line2</div>") self.assertEquals(text, "line1 line2")
def test_html2text(self): self.assertEqual(html2text('x'), 'x') self.assertEqual(html2text('<p class="shiny">text x</p>'), 'text x') # Accented character, issue #7. Well, this does not illustrate the # error, as that is in javascript, but it seems nice to test this. self.assertEqual( html2text('SOMEWORDWITH\xc3\xa8'), 'SOMEWORDWITH\xc3\xa8') # '<' and '>' are removed for safety when they are part of a tag: self.assertEqual( html2text('text <script>alert(42)</script>'), 'text alert(42)') # Actual greater/smaller than signs still work fine: self.assertEqual( html2text('1 < 2 > 0'), '1 < 2 > 0') # Well, the actual text that we get when you save the above in a # definition actually has escaped tags, which shows fine: self.assertEqual( html2text('1 &lt; 2'), '1 < 2') # We are not fooled by dangerous escaped tags: self.assertEqual( html2text('text <script>alert(42)</script>'), 'text scriptalert(42)/script') # This may look scary but is actually harmless: it is shown literally # in the browser, no alert pops up. self.assertEqual( html2text('text &lt;script&gt;alert(42)' '&lt;/script&gt;'), 'text <script>alert(42)</script>')
def test_html2text(self): self.assertEqual(html2text('x'), 'x') self.assertEqual(html2text('<p class="shiny">text x</p>'), 'text x') # Accented character, issue #7. Well, this does not illustrate the # error, as that is in javascript, but it seems nice to test this. self.assertEqual(html2text('SOMEWORDWITH\xc3\xa8'), 'SOMEWORDWITH\xc3\xa8') # '<' and '>' are removed for safety when they are part of a tag: self.assertEqual(html2text('text <script>alert(42)</script>'), 'text alert(42)') # Actual greater/smaller than signs still work fine: self.assertEqual(html2text('1 < 2 > 0'), '1 < 2 > 0') # Well, the actual text that we get when you save the above in a # definition actually has escaped tags, which shows fine: self.assertEqual(html2text('1 &lt; 2'), '1 < 2') # We are not fooled by dangerous escaped tags: self.assertEqual( html2text('text <script>alert(42)</script>'), 'text scriptalert(42)/script') # This may look scary but is actually harmless: it is shown literally # in the browser, no alert pops up. self.assertEqual( html2text('text &lt;script&gt;alert(42)' '&lt;/script&gt;'), 'text <script>alert(42)</script>')
def Description(self, from_catalog=False): """Returns cleaned text""" if from_catalog: cat = self.getCatalog() brains = cat.searchResults(id=self.getId()) if not brains: return self.Description() brain = brains[0] return brain.Description else: html = self.getDefinition() return html2text(html)
def testHTML2Text(self): """Test correct splitting of HTML""" text = html2text("<div>line1\r\nline2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1\r\n line2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1 \r\n line2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1 \r \n line2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div><ul><li>Seleção campeã!</li></ul></div>") self.assertEquals(text, "- Seleção campeã!".encode("utf-8")) text = html2text( "<div><ul><li>Seleção campeã!</li>" "</ul></div>") self.assertEquals(text, "- Seleção campeã!".encode("utf-8")) text = html2text( "<div><ul><li>Seleção campeã!</li></ul></div>") self.assertEquals(text, "- Seleção campeã!".encode("utf-8"))
def testHTML2Text(self): """Test correct splitting of HTML""" text = html2text("<div>line1\r\nline2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1\r\n line2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1 \r\n line2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div>line1 \r \n line2</div>") self.assertEquals(text, "line1 line2") text = html2text("<div><ul><li>Seleção campeã!</li></ul></div>") self.assertEquals(text, u"- Seleção campeã!".encode("utf-8")) text = html2text( "<div><ul><li>Seleção campeã!</li>" "</ul></div>") self.assertEquals(text, u"- Seleção campeã!".encode("utf-8")) text = html2text( "<div><ul><li>Seleção campeã!</li></ul></div>") self.assertEquals(text, u"- Seleção campeã!".encode("utf-8"))