Example #1
0
 def _inner(value):
     if isinstance(value, six.string_types):
         washer = HTMLWasher()
         return washer.wash(value,
                            allowed_tag_whitelist=allowed_tag_whitelist)
     else:
         return value
 def _inner(value):
     if isinstance(value, six.string_types):
         washer = HTMLWasher()
         return washer.wash(value,
                            allowed_tag_whitelist=allowed_tag_whitelist)
     else:
         return value
class XSSEscapingTest(InvenioTestCase):
    """Test functions related to the prevention of XSS attacks."""

    def __init__(self, methodName='test'):
        self.washer = HTMLWasher()
        InvenioTestCase.__init__(self, methodName)

    def test_forbidden_formatting_tags(self):
        """htmlutils - washing of tags altering formatting of a page (e.g. </html>)"""
        test_str = """</html></body></pre>"""
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         '')
        self.assertEqual(self.washer.wash(html_buffer=test_str,
                                          render_unallowed_tags=True),
                         '&lt;/html&gt;&lt;/body&gt;&lt;/pre&gt;')

    def test_forbidden_script_tags(self):
        """htmlutils - washing of tags defining scripts (e.g. <script>)"""
        test_str = """<script>malicious_function();</script>"""
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         '')
        self.assertEqual(self.washer.wash(html_buffer=test_str,
                                          render_unallowed_tags=True),
                         '&lt;script&gt;malicious_function();&lt;/script&gt;')

    def test_forbidden_attributes(self):
        """htmlutils - washing of forbidden attributes in allowed tags (e.g. onLoad)"""
        # onload
        test_str = """<p onload="javascript:malicious_functtion();">"""
        self.assertEqual(self.washer.wash(html_buffer=test_str), '<p>')
        # tricky: css calling a javascript
        test_str = """<p style="background: url('http://malicious_site.com/malicious_script.js');">"""
        self.assertEqual(self.washer.wash(html_buffer=test_str), '<p>')

    def test_fake_url(self):
        """htmlutils - washing of fake URLs which execute scripts"""
        test_str = """<a href="javascript:malicious_function();">link</a>"""
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         '<a href="">link</a>')
        # Pirates could encode ascii values, or use uppercase letters...
        test_str = """<a href="&#106;a&#118;asCRi&#112;t:malicious_function();">link</a>"""
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         '<a href="">link</a>')
        # MSIE treats 'java\ns\ncript:' the same way as 'javascript:'
        # Here we test with:
        # j
        #     avas
        #   crIPt :
        test_str = """<a href="&#106;\n    a&#118;as\n  crI&#80;t :malicious_function();">link</a>"""
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         '<a href="">link</a>')
class HTMLAutomaticLinksTransformation(InvenioTestCase):
    """Test functions related to transforming links into HTML context"""

    def __init__(self, methodName='test'):
        self.washer = HTMLWasher()
        InvenioTestCase.__init__(self, methodName)

    def test_transform_link(self):
        """htmlutils - transforming a link"""
        body_input = 'https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es'
        body_expected = '<a href="https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es">https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es</a>'
        self.assertEqual(self.washer.wash(html_buffer=body_input,
                                          automatic_link_transformation=True),
                         body_expected)

    def test_transform_several_links(self):
        """htmlutils - transforming several links"""
        body_input = 'some text https://cds.cern.ch/collection/Videos?ln=es more text https://cds.cern.ch/search?p=%27CERN+News'
        body_expected = 'some text <a href="https://cds.cern.ch/collection/Videos?ln=es">https://cds.cern.ch/collection/Videos?ln=es</a> more text <a href="https://cds.cern.ch/search?p=%27CERN">https://cds.cern.ch/search?p=%27CERN</a>+News'
        self.assertEqual(self.washer.wash(html_buffer=body_input,
                                          automatic_link_transformation=True),
                         body_expected)

    def test_transform_just_valid_links(self):
        """htmlutils - transforming just valid links"""
        body_input = body_input = 'some text https://cds.cern.ch/collection/Videos?ln=es more text https://cds..cern/search?p=%27CERN+News'
        body_expected = 'some text <a href="https://cds.cern.ch/collection/Videos?ln=es">https://cds.cern.ch/collection/Videos?ln=es</a> more text https://cds..cern/search?p=%27CERN+News'
        self.assertEqual(self.washer.wash(html_buffer=body_input,
                                          automatic_link_transformation=True),
                         body_expected)

    def test_not_transform_link(self):
        """htmlutils - not transforming a link"""
        body_input = '<a href="https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es">Multimedia</a>'
        body_expected = '<a href="https://cds.cern.ch/collection/Multimedia%20%26%20Outreach?ln=es">Multimedia</a>'
        self.assertEqual(self.washer.wash(html_buffer=body_input,
                                          automatic_link_transformation=True),
                         body_expected)
class HTMLWashingTest(InvenioTestCase):
    """Test functions related to general washing of HTML source"""

    def __init__(self, methodName='test'):
        self.washer = HTMLWasher()
        InvenioTestCase.__init__(self, methodName)

    def test_wash_html(self):
        """htmlutils - washing HTML tags"""

        # Simple test case
        test_str = 'Spam and <b><blink>eggs</blink></b>'
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         'Spam and <b>eggs</b>')

        # Show 'escaped' tags
        test_str = 'Spam and <b><blink>eggs</blink></b>'
        self.assertEqual(self.washer.wash(html_buffer=test_str,
                                          render_unallowed_tags=True),
                         'Spam and <b>&lt;blink&gt;eggs&lt;/blink&gt;</b>')

        # Keep entity and character references
        test_str = '<b> a &lt; b &gt; c </b> &#247;'
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         '<b> a &lt; b &gt; c </b> &#247;')

        # Remove content of <script> tags
        test_str = '<script type="text/javacript">alert("foo")</script>bar'
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         'bar')
        test_str = '<script type="text/javacript"><!--alert("foo")--></script>bar'
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         'bar')

        # Remove content of <style> tags
        test_str = '<style>.myclass {color:#f00}</style><span class="myclass">styled text</span>'
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         'styled text')
        test_str = '<style><!-- .myclass {color:#f00} --></style><span class="myclass">styled text</span>'
        self.assertEqual(self.washer.wash(html_buffer=test_str),
                         'styled text')
 def __init__(self, methodName='test'):
     self.washer = HTMLWasher()
     InvenioTestCase.__init__(self, methodName)