Ejemplo n.º 1
0
def test_Links():
    cases = [
        Case(
            """<a href="http://www.google.com">""",
            """<a href="http://www.google.com" rel="nofollow">""",
        ),
        Case(
            """<a href="//www.google.com">""",
            """<a href="//www.google.com" rel="nofollow">""",
        ),
        Case(
            """<a href="/www.google.com">""",
            """<a href="/www.google.com" rel="nofollow">""",
        ),
        Case(
            """<a href="www.google.com">""",
            """<a href="www.google.com" rel="nofollow">""",
        ),
        Case("""<a href="javascript:alert(1)">""", ""),
        Case("""<a href="#">""", ""),
        Case("""<a href="#top">""", """<a href="#top" rel="nofollow">"""),
        Case("""<a href="?q=1">""", """<a href="?q=1" rel="nofollow">"""),
        Case("""<a href="?q=1&r=2">""",
             """<a href="?q=1&amp;r=2" rel="nofollow">"""),
        Case("""<a href="?q=1&q=2">""",
             """<a href="?q=1&amp;q=2" rel="nofollow">"""),
        Case(
            """<a href="?q=%7B%22value%22%3A%22a%22%7D">""",
            """<a href="?q=%7B%22value%22%3A%22a%22%7D" rel="nofollow">""",
        ),
        Case(
            """<a href="?q=1&r=2&s=:foo@">""",
            """<a href="?q=1&amp;r=2&amp;s=:foo@" rel="nofollow">""",
        ),
        Case(
            """<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==" alt="Red dot" />""",
            """<img alt="Red dot"/>""",
        ),
        Case("""<img src="giraffe.gif" />""", """<img src="giraffe.gif"/>"""),
        Case(
            """<img src="giraffe.gif?height=500&width=500" />""",
            """<img src="giraffe.gif?height=500&amp;width=500"/>""",
        ),
    ]

    p = UGCPolicy()
    p.RequireParseableURLs(True)

    def test_cases(case):
        assert p.sanitize(case.input) == case.output

    pool = ThreadPool(4)
    pool.map(test_cases, cases)
Ejemplo n.º 2
0
def test_SignatureBehaviour():
    cases = [
        Case("Hi.\n", "Hi.\n"),
        Case("\t\n \n\t", "\t\n \n\t"),
    ]

    p = UGCPolicy()
    for case in cases:
        assert p.sanitize(case.input) == case.output
Ejemplo n.º 3
0
def test_LinkTargets():
    cases = [
        Case(
            """<a href="http://www.google.com">""",
            """<a href="http://www.google.com" rel="nofollow noopener" target="_blank">""",
        ),
        Case(
            """<a href="//www.google.com">""",
            """<a href="//www.google.com" rel="nofollow noopener" target="_blank">""",
        ),
        Case("""<a href="/www.google.com">""",
             """<a href="/www.google.com">"""),
        Case("""<a href="www.google.com">""", """<a href="www.google.com">"""),
        Case("""<a href="javascript:alert(1)">""", ""),
        Case("""<a href="#">""", ""),
        Case("""<a href="#top">""", """<a href="#top">"""),
        Case("""<a href="?q=1">""", """<a href="?q=1">"""),
        Case(
            """<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==" alt="Red dot" />""",
            """<img alt="Red dot"/>""",
        ),
        Case("""<img src="giraffe.gif" />""", """<img src="giraffe.gif"/>"""),
    ]

    p = UGCPolicy()
    p.RequireParseableURLs(True)
    p.RequireNoFollowOnLinks(False)
    p.RequireNoFollowOnFullyQualifiedLinks(True)
    p.AddTargetBlankToFullyQualifiedLinks(True)

    def test_cases(case):
        assert p.sanitize(case.input) == case.output

    pool = ThreadPool(4)
    pool.map(test_cases, cases)
Ejemplo n.º 4
0
def test_memory_leak_in_sanitization():
    process = psutil.Process()
    previous_memory = process.memory_info().rss

    TEST = """
<html>
    <head>
    <script type="text/javascript" src="evil-site"></script>
    <link rel="alternate" type="text/rss" src="evil-rss">
    <style>
        body {background-image: url(javascript:do_evil)};
        div {color: expression(evil)};
    </style>
    </head>
    <body onload="evil_function()">
    <!-- I am interpreted for EVIL! -->
    <a href="javascript:evil_function()">a link</a>
    <a href="#" onclick="evil_function()">another link</a>
    <p onclick="evil_function()">a paragraph</p>
    <div style="display: none">secret EVIL!</div>
    <object> of EVIL! </object>
    <iframe src="evil-site"></iframe>
    <form action="evil-site">
        Password: <input type="password" name="password">
    </form>
    <blink>annoying EVIL!</blink>
    <a href="evil-site">spam spam SPAM!</a>
    <image src="evil!">
    </body>
</html>"""

    p = UGCPolicy()
    for x in range(300000):
        p.sanitize(TEST)

    process = psutil.Process()
    new_memory = process.memory_info().rss
    assert new_memory < (previous_memory * 2)
Ejemplo n.º 5
0
def test_HrefSanitization():
    cases = [
        Case(
            """abc<a href="https://abc&quot;&gt;<script&gt;alert(1)<&#x2f;script/">CLICK""",
            """abc<a href="https://abc&amp;quot;&gt;&lt;script&gt;alert(1)&lt;/script/" rel="nofollow">CLICK""",
        ),
        Case(
            """<a href="https://abc&quot;&gt;<script&gt;alert(1)<&#x2f;script/">""",
            """<a href="https://abc&amp;quot;&gt;&lt;script&gt;alert(1)&lt;/script/" rel="nofollow">""",
        ),
    ]
    p = UGCPolicy()
    for case in cases:
        assert p.sanitize(case.input) == case.output
Ejemplo n.º 6
0
def test_02_a_tag():
    entries = (
        ('<a href="/foo">foo</a>', None),
        (
            '<a href="/foo" name="bar" target="some" title="baz" cookies="yesplease">foo</a>',
            '<a href="/foo" name="bar" target="some" title="baz">foo</a>',
        ),
        ('<a href="http://somewhere.else">foo</a>', None),
        ('<a href="https://somewhere.else">foo</a>', None),
        # These test cases don't pass because bluemonday strips the link entirely
        # ('<a href="javascript:alert()">foo</a>', '<a href="#">foo</a>'),
        # ('<a href="javascript%3Aalert()">foo</a>', '<a href="#">foo</a>'),
        ('<a href="mailto:[email protected]">foo</a>', None),
        ('<a href="tel:1-234-567-890">foo</a>', None),
    )

    p = UGCPolicy()
    p.RequireNoFollowOnLinks(False)
    p.AllowAttrs("name", "target").OnElements("a")
    p.AllowURLSchemes("tel")

    for before, after in entries:
        after = before if after is None else after
        assert p.sanitize(before) == after
Ejemplo n.º 7
0
        "autoplay",
        "buffered",
        "controls",
        "crossorigin",
        "loop",
        "muted",
        "playsinline",
        "poster",
        "preload",
        "src",
    ],
    "source": ["src", "type"],
    "iframe": ["width", "height", "src", "frameborder", "allow", "allowfullscreen"],
}

SANITIZER = UGCPolicy()

for TAGS in (PAGE_STRUCTURE_TAGS, META_TAGS, FORM_TAGS, ANNOYING_TAGS, MEDIA_TAGS):
    for element in TAGS:
        SANITIZER.AllowElements(element)
        SANITIZER.AllowAttrs(*TAGS[element]).OnElements(element)

# Allow safe attrs copied from lxml
SANITIZER.AllowAttrs(*SAFE_ATTRS).Globally()

# Allow styling globally
SANITIZER.AllowAttrs("class", "style").Globally()

# Allow styling via bluemonday
SANITIZER.AllowStyling()
Ejemplo n.º 8
0
def test_AllowComments():
    p = UGCPolicy()
    assert p.sanitize("1 <!-- 2 --> 3") == "1  3"
    p.AllowComments()
    assert p.sanitize("1 <!-- 2 --> 3") == "1 <!-- 2 --> 3"
Ejemplo n.º 9
0
def test_UGCPolicy():
    cases = [
        Case("Hello, World!", "Hello, World!"),
        Case("Hello, <b>World</b>!", "Hello, <b>World</b>!"),
        Case(
            "<p>Hello, <b onclick=alert(1337)>World</b>!</p>",
            "<p>Hello, <b>World</b>!</p>",
        ),
        Case(
            "<p onclick=alert(1337)>Hello, <b>World</b>!</p>",
            "<p>Hello, <b>World</b>!</p>",
        ),
        Case("""<a href="javascript:alert(1337)">foo</a>""", "foo"),
        Case(
            """<img src="http://example.org/foo.gif">""",
            """<img src="http://example.org/foo.gif">""",
        ),
        Case(
            """<img src="http://example.org/x.gif" alt="y" width=96 height=64 border=0>""",
            """<img src="http://example.org/x.gif" alt="y" width="96" height="64">""",
        ),
        Case(
            """<img src="http://example.org/x.png" alt="y" width="widgy" height=64 border=0>""",
            """<img src="http://example.org/x.png" alt="y" height="64">""",
        ),
        Case(
            """<a href="foo.html">Link text</a>""",
            """<a href="foo.html" rel="nofollow">Link text</a>""",
        ),
        Case(
            """<a href="foo.html" onclick="alert(1337)">Link text</a>""",
            """<a href="foo.html" rel="nofollow">Link text</a>""",
        ),
        Case(
            """<a href="http://example.org/x.html" onclick="alert(1337)">Link text</a>""",
            """<a href="http://example.org/x.html" rel="nofollow">Link text</a>""",
        ),
        Case(
            """<a href="https://example.org/x.html" onclick="alert(1337)">Link text</a>""",
            """<a href="https://example.org/x.html" rel="nofollow">Link text</a>""",
        ),
        Case(
            """<a href="//example.org/x.html" onclick="alert(1337)">Link text</a>""",
            """<a href="//example.org/x.html" rel="nofollow">Link text</a>""",
        ),
        Case(
            """<a href="javascript:alert(1337).html" onclick="alert(1337)">Link text</a>""",
            """Link text""",
        ),
        Case(
            """<a name="header" id="header">Header text</a>""",
            """<a id="header">Header text</a>""",
        ),
        Case(
            """<img src="planets.gif" width="145" height="126" alt="" usemap="#demomap"><map name="demomap"><area shape="rect" coords="0,0,82,126" href="demo.htm" alt="1"><area shape="circle" coords="90,58,3" href="demo.htm" alt="2"><area shape="circle" coords="124,58,8" href="demo.htm" alt="3"></map>""",
            """<img src="planets.gif" width="145" height="126" alt="" usemap="#demomap"><map name="demomap"><area shape="rect" coords="0,0,82,126" href="demo.htm" alt="1" rel="nofollow"><area shape="circle" coords="90,58,3" href="demo.htm" alt="2" rel="nofollow"><area shape="circle" coords="124,58,8" href="demo.htm" alt="3" rel="nofollow"></map>""",
        ),
        Case(
            """<table style="color: rgb(0, 0, 0);"><tbody><tr><th>Column One</th><th>Column Two</th></tr><tr><td align="center" style="background-color: rgb(255, 255, 254);"><font size="2">Size 2</font></td><td align="center" style="background-color: rgb(255, 255, 254);"><font size="7">Size 7</font></td></tr></tbody></table>""",
            """<table><tbody><tr><th>Column One</th><th>Column Two</th></tr><tr><td align="center">Size 2</td><td align="center">Size 7</td></tr></tbody></table>""",
        ),
        Case(
            """xss<a href="http://www.google.de" style="color:red;" onmouseover=alert(1) onmousemove="alert(2)" onclick=alert(3)>g<img src="http://example.org"/>oogle</a>""",
            """xss<a href="http://www.google.de" rel="nofollow">g<img src="http://example.org"/>oogle</a>""",
        ),
        Case(
            "<table>Hallo\r\n<script>SCRIPT</script>\nEnde\n\r",
            "<table>Hallo\n\nEnde\n\n",
        ),
    ]

    p = UGCPolicy()
    for case in cases:
        assert p.sanitize(case.input) == case.output