def test_sanitize_remove_script_elem(self): markup = Markup('<script>alert("Foo")</script>') self.assertEquals('', markup.sanitize()) markup = Markup('<SCRIPT SRC="http://example.com/"></SCRIPT>') self.assertEquals('', markup.sanitize()) markup = Markup('<SCR\0IPT>alert("foo")</SCR\0IPT>') self.assertRaises(HTMLParseError, markup.sanitize) markup = Markup('<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') self.assertRaises(HTMLParseError, markup.sanitize)
def test_sanitize_remove_script_elem(self): markup = Markup('<script>alert("Foo")</script>') self.assertEquals('', markup.sanitize()) markup = Markup('<SCRIPT SRC="http://example.com/"></SCRIPT>') self.assertEquals('', markup.sanitize()) markup = Markup('<SCR\0IPT>alert("foo")</SCR\0IPT>') self.assertRaises(HTMLParseError, markup.sanitize) markup = Markup('<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') self.assertRaises(HTMLParseError, markup.sanitize)
def test_sanitize_remove_style_scripts(self): # Inline style with url() using javascript: scheme markup = Markup('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div>', markup.sanitize()) # Inline style with url() using javascript: scheme, using control char markup = Markup('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div>', markup.sanitize()) # Inline style with url() using javascript: scheme, in quotes markup = Markup('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') self.assertEquals('<div>', markup.sanitize()) # IE expressions in CSS not allowed markup = Markup('<DIV STYLE=\'width: expression(alert("foo"));\'>') self.assertEquals('<div>', markup.sanitize()) markup = Markup('<DIV STYLE=\'background: url(javascript:alert("foo"));' 'color: #fff\'>') self.assertEquals('<div style="color: #fff">', markup.sanitize())
def test_sanitize_remove_src_javascript(self): markup = Markup('<img src=\'javascript:alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Case-insensitive protocol matching markup = Markup('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Grave accents (not parsed) markup = Markup('<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') self.assertRaises(HTMLParseError, markup.sanitize) # Protocol encoded using UTF-8 numeric entities markup = Markup( '<IMG SRC=\'javascri' 'pt:alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Protocol encoded using UTF-8 numeric entities without a semicolon # (which is allowed because the max number of digits is used) markup = Markup( '<IMG SRC=\'java' 'script' ':alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Protocol encoded using UTF-8 numeric hex entities without a semicolon # (which is allowed because the max number of digits is used) markup = Markup('<IMG SRC=\'javascri' 'pt:alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Embedded tab character in protocol markup = Markup('<IMG SRC=\'jav\tascript:alert("foo");\'>') self.assertEquals('<img />', markup.sanitize()) # Embedded tab character in protocol, but encoded this time markup = Markup('<IMG SRC=\'jav	ascript:alert("foo");\'>') self.assertEquals('<img />', markup.sanitize())
def test_sanitize_remove_src_javascript(self): markup = Markup('<img src=\'javascript:alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Case-insensitive protocol matching markup = Markup('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Grave accents (not parsed) markup = Markup('<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') self.assertRaises(HTMLParseError, markup.sanitize) # Protocol encoded using UTF-8 numeric entities markup = Markup('<IMG SRC=\'javascri' 'pt:alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Protocol encoded using UTF-8 numeric entities without a semicolon # (which is allowed because the max number of digits is used) markup = Markup('<IMG SRC=\'java' 'script' ':alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Protocol encoded using UTF-8 numeric hex entities without a semicolon # (which is allowed because the max number of digits is used) markup = Markup('<IMG SRC=\'javascri' 'pt:alert("foo")\'>') self.assertEquals('<img />', markup.sanitize()) # Embedded tab character in protocol markup = Markup('<IMG SRC=\'jav\tascript:alert("foo");\'>') self.assertEquals('<img />', markup.sanitize()) # Embedded tab character in protocol, but encoded this time markup = Markup('<IMG SRC=\'jav	ascript:alert("foo");\'>') self.assertEquals('<img />', markup.sanitize())
def test_sanitize_remove_style_scripts(self): # Inline style with url() using javascript: scheme markup = Markup( '<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div>', markup.sanitize()) # Inline style with url() using javascript: scheme, using control char markup = Markup( '<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div>', markup.sanitize()) # Inline style with url() using javascript: scheme, in quotes markup = Markup( '<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') self.assertEquals('<div>', markup.sanitize()) # IE expressions in CSS not allowed markup = Markup('<DIV STYLE=\'width: expression(alert("foo"));\'>') self.assertEquals('<div>', markup.sanitize()) markup = Markup( '<DIV STYLE=\'background: url(javascript:alert("foo"));' 'color: #fff\'>') self.assertEquals('<div style="color: #fff">', markup.sanitize())
def test_sanitize_unchanged(self): markup = Markup('<a href="#">fo<br />o</a>') self.assertEquals('<a href="#">fo<br />o</a>', markup.sanitize())
def test_sanitize_remove_onclick_attr(self): markup = Markup('<div onclick=\'alert("foo")\' />') self.assertEquals('<div></div>', markup.sanitize())
def test_sanitize_invalid_entity(self): markup = Markup('&junk;') self.assertEquals('&junk;', markup.sanitize())
def test_sanitize_escape_attr(self): markup = Markup('<div title="<foo>"></div>') self.assertEquals('<div title="<foo>"></div>', markup.sanitize())
def test_sanitize_entityref_text(self): markup = Markup('<a href="#">foö</a>') self.assertEquals(u'<a href="#">foö</a>', markup.sanitize())
def test_sanitize_escape_text(self): markup = Markup('<a href="#">fo&</a>') self.assertEquals('<a href="#">fo&</a>', markup.sanitize()) markup = Markup('<a href="#"><foo></a>') self.assertEquals('<a href="#"><foo></a>', markup.sanitize())
def test_sanitize_unchanged(self): markup = Markup('<a href="#">fo<br />o</a>') self.assertEquals('<a href="#">fo<br />o</a>', markup.sanitize())
def test_sanitize_remove_onclick_attr(self): markup = Markup('<div onclick=\'alert("foo")\' />') self.assertEquals('<div></div>', markup.sanitize())
def test_sanitize_escape_text(self): markup = Markup('<a href="#">fo&</a>') self.assertEquals('<a href="#">fo&</a>', markup.sanitize()) markup = Markup('<a href="#"><foo></a>') self.assertEquals('<a href="#"><foo></a>', markup.sanitize())
def test_sanitize_escape_attr(self): markup = Markup('<div title="<foo>"></div>') self.assertEquals('<div title="<foo>"></div>', markup.sanitize())
def test_sanitize_entityref_text(self): markup = Markup('<a href="#">foö</a>') self.assertEquals(u'<a href="#">foö</a>', markup.sanitize())
def test_sanitize_invalid_entity(self): markup = Markup('&junk;') self.assertEquals('&junk;', markup.sanitize())
def test_sanitize_close_empty_tag(self): markup = Markup('<a href="#">fo<br>o</a>') self.assertEquals('<a href="#">fo<br />o</a>', markup.sanitize())
def test_sanitize_close_empty_tag(self): markup = Markup('<a href="#">fo<br>o</a>') self.assertEquals('<a href="#">fo<br />o</a>', markup.sanitize())