def test_sanitize_unchanged(self): html = HTML('<a href="#">fo<br />o</a>') self.assertEqual('<a href="#">fo<br/>o</a>', (html | HTMLSanitizer()).render()) html = HTML('<a href="#with:colon">foo</a>') self.assertEqual('<a href="#with:colon">foo</a>', (html | HTMLSanitizer()).render())
def test_sanitize_escape_text(self): html = HTML('<a href="#">fo&</a>') self.assertEqual('<a href="#">fo&</a>', (html | HTMLSanitizer()).render()) html = HTML('<a href="#"><foo></a>') self.assertEqual('<a href="#"><foo></a>', (html | HTMLSanitizer()).render())
def test_sanitize_remove_script_elem(self): html = HTML(u'<script>alert("Foo")</script>') self.assertEquals('', (html | HTMLSanitizer()).render()) html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>') self.assertEquals('', (html | HTMLSanitizer()).render()) self.assertRaises(ParseError, HTML, u'<SCR\0IPT>alert("foo")</SCR\0IPT>') self.assertRaises(ParseError, HTML, u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
def test_sanitize_remove_script_elem(self): html = HTML(u'<script>alert("Foo")</script>') self.assertEquals('', (html | HTMLSanitizer()).render()) html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>') self.assertEquals('', (html | HTMLSanitizer()).render()) src = u'<SCR\0IPT>alert("foo")</SCR\0IPT>' self.assert_parse_error_or_equal('<SCR\x00IPT>alert("foo")', src) src = u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>' self.assert_parse_error_or_equal('<SCRIPT&XYZ; ' 'SRC="http://example.com/">', src)
def test_sanitize_remove_style_scripts(self): sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) # Inline style with url() using javascript: scheme html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) # Inline style with url() using javascript: scheme, using control char html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) # Inline style with url() using javascript: scheme, in quotes html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') self.assertEquals('<div/>', (html | sanitizer).render()) # IE expressions in CSS not allowed html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));' 'color: #fff\'>') self.assertEquals('<div style="color: #fff"/>', (html | sanitizer).render()) # Inline style with url() using javascript: scheme, using unicode # escapes html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render())
def test_sanitize_remove_src_javascript(self): html = HTML('<img src=\'javascript:alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Case-insensitive protocol matching html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Grave accents (not parsed) src = '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>' self.assert_parse_error_or_equal('<img/>', src) # Protocol encoded using UTF-8 numeric entities html = HTML('<IMG SRC=\'javascri' 'pt:alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Protocol encoded using UTF-8 numeric entities without a semicolon # (which is allowed because the max number of digits is used) html = HTML('<IMG SRC=\'java' 'script' ':alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Protocol encoded using UTF-8 numeric hex entities without a semicolon # (which is allowed because the max number of digits is used) html = HTML('<IMG SRC=\'javascri' 'pt:alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Embedded tab character in protocol html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Embedded tab character in protocol, but encoded this time html = HTML('<IMG SRC=\'jav	ascript:alert("foo");\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render())
def assert_parse_error_or_equal(self, expected, exploit, allow_strip=False): try: html = HTML(exploit) except ParseError: return sanitized_html = (html | HTMLSanitizer()).render() if not sanitized_html and allow_strip: return self.assertEqual(expected, sanitized_html)
def _gen_ticket_entry(self, t, a_class=''): id = str(t.get('id')) status = t.get('status') summary = to_unicode(t.get('summary')) owner = to_unicode(t.get('owner')) description = to_unicode(t.get('description')[:1024]) url = t.get('href') if status == 'closed': a_class = a_class + 'closed' else: a_class = a_class + 'open' markup = format_to_html(self.env, self.ref.context, description) # Escape, if requested if self.sanitize is True: try: description = HTMLParser(StringIO(markup) ).parse() | HTMLSanitizer() except ParseError: description = escape(markup) else: description = markup # Replace tags that destruct tooltips too much desc = self.end_RE.sub(']', Markup(description)) desc = self.del_RE.sub('', desc) # need 2nd run after purging newline in table cells in 1st run desc = self.del_RE.sub('', desc) desc = self.item_RE.sub('X', desc) desc = self.tab_RE.sub('[|||]', desc) description = self.open_RE.sub('[', desc) tip = tag.span(Markup(description)) ticket = '#' + id ticket = tag.a(ticket, href=url) ticket(tip, class_='tip', target='_blank') ticket = tag.div(ticket) ticket(class_=a_class, align='left') # fix stripping of regular leading space in IE blank = ' ' ticket(Markup(blank), summary, ' (', owner, ')') summary = tag(summary, ' (', owner, ')') ticket_short = '#' + id ticket_short = tag.a(ticket_short, href=url) ticket_short(target='_blank', title_=summary) ticket_short = tag.span(ticket_short) ticket_short(class_=a_class) return ticket,ticket_short
def test_sanitize_remove_style_phishing(self): sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) # The position property is not allowed html = HTML(u'<div style="position:absolute;top:0"></div>') self.assertEquals('<div style="top:0"/>', (html | sanitizer).render()) # Normal margins get passed through html = HTML(u'<div style="margin:10px 20px"></div>') self.assertEquals('<div style="margin:10px 20px"/>', (html | sanitizer).render()) # But not negative margins html = HTML(u'<div style="margin:-1000px 0 0"></div>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<div style="margin-left:-2000px 0 0"></div>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<div style="margin-left:1em 1em 1em -4000px"></div>') self.assertEquals('<div/>', (html | sanitizer).render())
def test_sanitize_remove_onclick_attr(self): html = HTML('<div onclick=\'alert("foo")\' />') self.assertEqual('<div/>', (html | HTMLSanitizer()).render())
def test_sanitize_close_empty_tag(self): html = HTML('<a href="#">fo<br>o</a>') self.assertEqual('<a href="#">fo<br/>o</a>', (html | HTMLSanitizer()).render())
def test_sanitize_invalid_entity(self): html = HTML('&junk;') self.assertEqual('&junk;', (html | HTMLSanitizer()).render())
def test_sanitize_entityref_text(self): html = HTML('<a href="#">foö</a>') self.assertEqual('<a href="#">foö</a>', (html | HTMLSanitizer()).render(encoding=None))
def test_sanitize_escape_attr(self): html = HTML('<div title="<foo>"></div>') self.assertEqual('<div title="<foo>"/>', (html | HTMLSanitizer()).render())
def test_sanitize_entityref_text(self): html = HTML('<a href="#">foö</a>') self.assertEquals(u'<a href="#">foö</a>', unicode(html | HTMLSanitizer()))
def test_sanitize_close_empty_tag(self): html = HTML('<a href="#">fo<br>o</a>') self.assertEquals(u'<a href="#">fo<br/>o</a>', unicode(html | HTMLSanitizer()))
def test_sanitize_unchanged(self): html = HTML('<a href="#">fo<br />o</a>') self.assertEquals(u'<a href="#">fo<br/>o</a>', unicode(html | HTMLSanitizer()))
def test_sanitize_invalid_entity(self): html = HTML('&junk;') self.assertEquals('&junk;', unicode(html | HTMLSanitizer()))
def test_sanitize_remove_input_password(self): html = HTML('<form><input type="password" /></form>') self.assertEqual('<form/>', (html | HTMLSanitizer()).render())
def test_sanitize_remove_comments(self): html = HTML('''<div><!-- conditional comment crap --></div>''') self.assertEqual('<div/>', (html | HTMLSanitizer()).render())
def StyleSanitizer(): safe_attrs = HTMLSanitizer.SAFE_ATTRS | frozenset(['style']) return HTMLSanitizer(safe_attrs=safe_attrs)
res = re.search(r'FRAGMENT\(([^)]*)\)', line) if res: current_fragment_name = res.groups()[0] else: if current_fragment_name == fragment_name: fragment.append(line) out = '\n'.join(fragment) # If we have a preview format, use it if dest_format: # We can trust the output and do not need to call the HTML sanitizer # below. The HTML sanitization leads to whitespace being stripped. safe_content = True out = Mimeview(self.env).render(ctxt, dest_format, out, force_source=True) # Escape if needed if not safe_content and not self.config.getbool('wiki', 'render_unsafe_content', False): try: out = HTMLParser(StringIO(out)).parse() | HTMLSanitizer() except ParseError: out = escape(out) return out # IPermissionRequestor methods def get_permission_actions(self): yield 'INCLUDE_URL'
def assert_parse_error_or_equal(self, expected, exploit): try: html = HTML(exploit) except ParseError: return self.assertEqual(expected, (html | HTMLSanitizer()).render())