def _inner(value): if isinstance(value, six.string_types): washer = HTMLWasher() return washer.wash(value, allowed_tag_whitelist=allowed_tag_whitelist) else: return value
class XSSEscapingTest(InvenioTestCase): """Test functions related to the prevention of XSS attacks.""" def __init__(self, methodName='test'): self.washer = HTMLWasher() InvenioTestCase.__init__(self, methodName) def test_forbidden_formatting_tags(self): """htmlutils - washing of tags altering formatting of a page (e.g. </html>)""" test_str = """</html></body></pre>""" self.assertEqual(self.washer.wash(html_buffer=test_str), '') self.assertEqual(self.washer.wash(html_buffer=test_str, render_unallowed_tags=True), '</html></body></pre>') def test_forbidden_script_tags(self): """htmlutils - washing of tags defining scripts (e.g. <script>)""" test_str = """<script>malicious_function();</script>""" self.assertEqual(self.washer.wash(html_buffer=test_str), '') self.assertEqual(self.washer.wash(html_buffer=test_str, render_unallowed_tags=True), '<script>malicious_function();</script>') def test_forbidden_attributes(self): """htmlutils - washing of forbidden attributes in allowed tags (e.g. onLoad)""" # onload test_str = """<p onload="javascript:malicious_functtion();">""" self.assertEqual(self.washer.wash(html_buffer=test_str), '<p>') # tricky: css calling a javascript test_str = """<p style="background: url('http://malicious_site.com/malicious_script.js');">""" self.assertEqual(self.washer.wash(html_buffer=test_str), '<p>') def test_fake_url(self): """htmlutils - washing of fake URLs which execute scripts""" test_str = """<a href="javascript:malicious_function();">link</a>""" self.assertEqual(self.washer.wash(html_buffer=test_str), '<a href="">link</a>') # Pirates could encode ascii values, or use uppercase letters... test_str = """<a href="javasCRipt:malicious_function();">link</a>""" self.assertEqual(self.washer.wash(html_buffer=test_str), '<a href="">link</a>') # MSIE treats 'java\ns\ncript:' the same way as 'javascript:' # Here we test with: # j # avas # crIPt : test_str = """<a href="j\n avas\n crIPt :malicious_function();">link</a>""" self.assertEqual(self.washer.wash(html_buffer=test_str), '<a href="">link</a>')
class HTMLAutomaticLinksTransformation(InvenioTestCase): """Test functions related to transforming links into HTML context""" def __init__(self, methodName='test'): self.washer = HTMLWasher() InvenioTestCase.__init__(self, methodName) def test_transform_link(self): """htmlutils - transforming a link""" body_input = 'https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es' body_expected = '<a href="https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es">https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es</a>' self.assertEqual(self.washer.wash(html_buffer=body_input, automatic_link_transformation=True), body_expected) def test_transform_several_links(self): """htmlutils - transforming several links""" body_input = 'some text https://cds.cern.ch/collection/Videos?ln=es more text https://cds.cern.ch/search?p=%27CERN+News' body_expected = 'some text <a href="https://cds.cern.ch/collection/Videos?ln=es">https://cds.cern.ch/collection/Videos?ln=es</a> more text <a href="https://cds.cern.ch/search?p=%27CERN">https://cds.cern.ch/search?p=%27CERN</a>+News' self.assertEqual(self.washer.wash(html_buffer=body_input, automatic_link_transformation=True), body_expected) def test_transform_just_valid_links(self): """htmlutils - transforming just valid links""" body_input = body_input = 'some text https://cds.cern.ch/collection/Videos?ln=es more text https://cds..cern/search?p=%27CERN+News' body_expected = 'some text <a href="https://cds.cern.ch/collection/Videos?ln=es">https://cds.cern.ch/collection/Videos?ln=es</a> more text https://cds..cern/search?p=%27CERN+News' self.assertEqual(self.washer.wash(html_buffer=body_input, automatic_link_transformation=True), body_expected) def test_not_transform_link(self): """htmlutils - not transforming a link""" body_input = '<a href="https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es">Multimedia</a>' body_expected = '<a href="https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es">Multimedia</a>' self.assertEqual(self.washer.wash(html_buffer=body_input, automatic_link_transformation=True), body_expected)
class HTMLWashingTest(InvenioTestCase): """Test functions related to general washing of HTML source""" def __init__(self, methodName='test'): self.washer = HTMLWasher() InvenioTestCase.__init__(self, methodName) def test_wash_html(self): """htmlutils - washing HTML tags""" # Simple test case test_str = 'Spam and <b><blink>eggs</blink></b>' self.assertEqual(self.washer.wash(html_buffer=test_str), 'Spam and <b>eggs</b>') # Show 'escaped' tags test_str = 'Spam and <b><blink>eggs</blink></b>' self.assertEqual(self.washer.wash(html_buffer=test_str, render_unallowed_tags=True), 'Spam and <b><blink>eggs</blink></b>') # Keep entity and character references test_str = '<b> a < b > c </b> ÷' self.assertEqual(self.washer.wash(html_buffer=test_str), '<b> a < b > c </b> ÷') # Remove content of <script> tags test_str = '<script type="text/javacript">alert("foo")</script>bar' self.assertEqual(self.washer.wash(html_buffer=test_str), 'bar') test_str = '<script type="text/javacript"><!--alert("foo")--></script>bar' self.assertEqual(self.washer.wash(html_buffer=test_str), 'bar') # Remove content of <style> tags test_str = '<style>.myclass {color:#f00}</style><span class="myclass">styled text</span>' self.assertEqual(self.washer.wash(html_buffer=test_str), 'styled text') test_str = '<style><!-- .myclass {color:#f00} --></style><span class="myclass">styled text</span>' self.assertEqual(self.washer.wash(html_buffer=test_str), 'styled text')
def __init__(self, methodName='test'): self.washer = HTMLWasher() InvenioTestCase.__init__(self, methodName)