def test_sanitize_remove_script_elem(self): html = HTML(u'<script>alert("Foo")</script>') self.assertEqual('', (html | HTMLSanitizer()).render()) html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>') self.assertEqual('', (html | HTMLSanitizer()).render()) src = u'<SCR\0IPT>alert("foo")</SCR\0IPT>' self.assert_parse_error_or_equal('<SCR\x00IPT>alert("foo")', src, allow_strip=True) src = u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>' self.assert_parse_error_or_equal( '<SCRIPT&XYZ; ' 'SRC="http://example.com/">', src, allow_strip=True)
def gettags(self): if self.tags is None: return None try: return HTML(md.convert(self.tags)) | HTMLSanitizer() except ParseError: return self.tags
def getplanned(self): if self.planned is None: return None try: return HTML(md.convert(self.planned)) | HTMLSanitizer() except ParseError: return self.planned
def test_fill_input_checkbox_no_value(self): html = HTML("""<form><p> <input type="checkbox" name="foo" /> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <input type="checkbox" name="foo"/> </p></form>""", unicode(html))
def test_fill_textarea_single_value(self): html = HTML("""<form><p> <textarea name="foo"></textarea> </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) self.assertEquals("""<form><p> <textarea name="foo">bar</textarea> </p></form>""", unicode(html))
def test_fill_input_hidden_single_value(self): html = HTML("""<form><p> <input type="hidden" name="foo" /> </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) self.assertEquals("""<form><p> <input type="hidden" name="foo" value="bar"/> </p></form>""", unicode(html))
def test_fill_input_text_multi_value(self): html = HTML("""<form><p> <input type="text" name="foo" /> </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) self.assertEquals("""<form><p> <input type="text" name="foo" value="bar"/> </p></form>""", unicode(html))
def test_fill_input_password_disabled(self): html = HTML(u"""<form><p> <input type="password" name="pass" /> </p></form>""") | HTMLFormFiller(data={'pass': '******'}) self.assertEquals("""<form><p> <input type="password" name="pass"/> </p></form>""", html.render())
def comment(self, id, cancel=False, **data): link = self.data.get(id) if not link: raise cherrypy.NotFound() if cherrypy.request.method == 'POST': if cancel: raise cherrypy.HTTPRedirect('/info/%s' % link.id) form = CommentForm() try: data = form.to_python(data) markup = HTML(data['content']) | HTMLSanitizer() data['content'] = markup.render('xhtml') comment = link.add_comment(**data) if not ajax.is_xhr(): raise cherrypy.HTTPRedirect('/info/%s' % link.id) return template.render('_comment.html', comment=comment, num=len(link.comments)) except Invalid as e: errors = e.unpack_errors() else: errors = {} if ajax.is_xhr(): stream = template.render('_form.html', link=link, errors=errors) else: stream = template.render(link=link, comment=None, errors=errors) return stream | HTMLFormFiller(data=data)
def test_fill_textarea_multi_value(self): html = HTML(u"""<form><p> <textarea name="foo"></textarea> </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) self.assertEquals("""<form><p> <textarea name="foo">bar</textarea> </p></form>""", html.render())
def html(macro, environ, data, *args, **kwargs): """Displays raw HTML content. This macro allows you to display raw HTML with any **safe** content. **NB:** Any elements considered unsafe are automatically stripped. **Arguments:** //No Arguments// **Example(s):** {{{ <<html>> <h1>Hello World!</h1> <</html>> }}} <<html>> <h1>Hello World!</h1> <</html>> """ if not macro.body: return None return Markup("".join(serializer(sanitizer(HTML(macro.body)))))
def test_fill_input_hidden_multi_value(self): html = HTML(u"""<form><p> <input type="hidden" name="foo" /> </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) self.assertEquals("""<form><p> <input type="hidden" name="foo" value="bar"/> </p></form>""", html.render())
def check_report_permission(): delimiter = '</tr>' reportstream = str(buffer) reports_raw = reportstream.split(delimiter) reportstream = '' for report in reports_raw: if report != None and len(report) != 0: # determine the report id s = report.find('/report/') if s == -1: continue e = report.find('\"', s) if e == -1: continue report_id = report[s + len('/report/'):e] if self._has_permission(user, report_id): reportstream += report return HTML(reportstream)
def test_fill_input_password_enabled(self): html = HTML(u"""<form><p> <input type="password" name="pass" /> </p></form>""") | HTMLFormFiller(data={'pass': '******'}, passwords=True) self.assertEquals("""<form><p> <input type="password" name="pass" value="1234"/> </p></form>""", html.render())
def test_fill_input_hidden_no_value(self): html = HTML(u"""<form><p> <input type="hidden" name="foo" /> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <input type="hidden" name="foo"/> </p></form>""", html.render())
def test_fill_input_text_single_value(self): html = HTML(u"""<form><p> <input type="text" name="foo" /> </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) self.assertEquals("""<form><p> <input type="text" name="foo" value="bar"/> </p></form>""", html.render())
def analytic_info_html(category: str, book: ez_epub.Book, url): print('Getting info of ' + url) response = session.get(url=url) tree = html.fromstring(response.content) title = tree.xpath("//h1[@class='book__header__title']/text()")[0].strip() subtitle = tree.xpath( "//h2[@class='book__header__subtitle']/text()")[0].strip() tree_author = [ author.strip().replace('By ', '') for author in tree.xpath("//div[@class='book__header__author']/text()") ] # tree_info__category = "; ".join(tree.xpath("//div[@class='book__header__info__category']//a/text()")) tree_image = tree.xpath("//div[@class='book__header__image']/img/@src")[0] tree_synopsis = tree.xpath("//div[@ref='synopsis']")[0] # tree_book_faq = tree.xpath("//div[@class='book__faq']")[0] html_synopsis = html.tostring(tree_synopsis) book.impl.description = HTML(html_synopsis, encoding='utf-8') book.impl.addMeta('publisher', 'Blinkist') book.impl.addMeta('tag', category) book.impl.addMeta('subtitle', subtitle) # TODO: who is it for?, about the author story_cover = io.BytesIO(session.get(tree_image).content) book.impl.addCover(fileobj=story_cover) book.title = title book.authors = tree_author book.impl.url = url return book
def test_fill_input_radio_multi_empty_string(self): html = HTML(u"""<form><p> <input type="radio" name="foo" value="" /> </p></form>""") self.assertEquals("""<form><p> <input type="radio" name="foo" value="" checked="checked"/> </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
def test_fill_input_checkbox_single_value_auto_no_value(self): html = HTML(u"""<form><p> <input type="checkbox" name="foo" /> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <input type="checkbox" name="foo"/> </p></form>""", html.render())
def test_property_name(self): html = HTML('<div style="display:none;border-left-color:red;' 'user_defined:1;-moz-user-selct:-moz-all">prop</div>', encoding='utf-8') self.assertEqual('<div style="display:none; border-left-color:red' '">prop</div>', unicode(html | TracHTMLSanitizer()))
def test_sanitize_remove_style_phishing(self): sanitizer = StyleSanitizer() # The position property is not allowed html = HTML('<div style="position:absolute;top:0"></div>') self.assertEqual('<div style="top:0"/>', (html | sanitizer).render()) # Normal margins get passed through html = HTML('<div style="margin:10px 20px"></div>') self.assertEqual('<div style="margin:10px 20px"/>', (html | sanitizer).render()) # But not negative margins html = HTML('<div style="margin:-1000px 0 0"></div>') self.assertEqual('<div/>', (html | sanitizer).render()) html = HTML('<div style="margin-left:-2000px 0 0"></div>') self.assertEqual('<div/>', (html | sanitizer).render()) html = HTML('<div style="margin-left:1em 1em 1em -4000px"></div>') self.assertEqual('<div/>', (html | sanitizer).render())
def test_fill_textarea_no_value(self): html = HTML(u"""<form><p> <textarea name="foo"></textarea> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <textarea name="foo"/> </p></form>""", html.render())
def test_fill_input_text_no_value(self): html = HTML("""<form><p> <input type="text" name="foo" /> </p></form>""") | HTMLFormFiller() self.assertEqual( """<form><p> <input type="text" name="foo"/> </p></form>""", html.render())
def test(expected, content): html = HTML(content) sanitizer = TracHTMLSanitizer(safe_schemes=['http', 'data'], safe_origins=[ 'data:', 'http://example.net', 'https://example.org/' ]) self.assertEqual(expected, unicode(html | sanitizer))
def test_sanitize_unsafe_props(self): html = HTML('<div style="POSITION:RELATIVE">XSS</div>') self.assertEqual('<div>XSS</div>', str(html | StyleSanitizer())) html = HTML('<div style="behavior:url(test.htc)">XSS</div>') self.assertEqual('<div>XSS</div>', str(html | StyleSanitizer())) html = HTML('<div style="-ms-behavior:url(test.htc) url(#obj)">' 'XSS</div>') self.assertEqual('<div>XSS</div>', str(html | StyleSanitizer())) html = HTML("""<div style="-o-link:'javascript:alert(1)';""" """-o-link-source:current">XSS</div>""") self.assertEqual('<div>XSS</div>', str(html | StyleSanitizer())) html = HTML("""<div style="-moz-binding:url(xss.xbl)">XSS</div>""") self.assertEqual('<div>XSS</div>', str(html | StyleSanitizer()))
def test_sanitize_remove_src_javascript(self): html = HTML('<img src=\'javascript:alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Case-insensitive protocol matching html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Grave accents (not parsed) src = '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>' self.assert_parse_error_or_equal('<img/>', src) # Protocol encoded using UTF-8 numeric entities html = HTML('<IMG SRC=\'javascri' 'pt:alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Protocol encoded using UTF-8 numeric entities without a semicolon # (which is allowed because the max number of digits is used) html = HTML('<IMG SRC=\'java' 'script' ':alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Protocol encoded using UTF-8 numeric hex entities without a semicolon # (which is allowed because the max number of digits is used) html = HTML('<IMG SRC=\'javascri' 'pt:alert("foo")\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Embedded tab character in protocol html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render()) # Embedded tab character in protocol, but encoded this time html = HTML('<IMG SRC=\'jav	ascript:alert("foo");\'>') self.assertEqual('<img/>', (html | HTMLSanitizer()).render())
def test_basic(self): self.pkg_url = 'http://site.com/data.csv' self.archive_url = 'http://webarchive.nationalarchives.gov.uk/tna/+/' + self.pkg_url # before filter # <a href="http://www.annakarenina.com/download/x=1&y=2" target="_blank">Full text. Needs escaping: " Umlaut: u</a> self.pkg_page = HTML(self.pkg_page).render() self.check_named_element(self.pkg_page, 'a', 'href="%s"' % self.pkg_url) self.check_named_element(self.pkg_page, 'a', '!href="%s"' % self.archive_url) res = archive_filter(HTML(self.pkg_page)) res = res.render('html').decode('utf8') print res # after filter self.check_named_element(res, 'a', 'href="%s"' % self.pkg_url) self.check_named_element(res, 'a', 'href="%s"' % self.archive_url)
def test_unicode_escapes(self): html = HTML(r'<div style="top:exp\72 ess\000069 on(alert())">' r'XSS</div>', encoding='utf-8') self.assertEqual('<div>XSS</div>', unicode(html | TracHTMLSanitizer())) # escaped backslash html = HTML(r'<div style="top:exp\5c ression(alert())">XSS</div>', encoding='utf-8') self.assertEqual(r'<div style="top:exp\\ression(alert())">XSS</div>', unicode(html | TracHTMLSanitizer())) html = HTML(r'<div style="top:exp\5c 72 ession(alert())">XSS</div>', encoding='utf-8') self.assertEqual(r'<div style="top:exp\\72 ession(alert())">XSS</div>', unicode(html | TracHTMLSanitizer())) # escaped control characters html = HTML(r'<div style="top:exp\000000res\1f sion(alert())">' r'XSS</div>', encoding='utf-8') self.assertEqual('<div style="top:exp res sion(alert())">XSS</div>', unicode(html | TracHTMLSanitizer()))
def test_fill_textarea_preserve_original(self): html = HTML(u"""<form><p> <textarea name="foo"></textarea> <textarea name="bar">Original value</textarea> </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) self.assertEquals("""<form><p> <textarea name="foo">Some text</textarea> <textarea name="bar">Original value</textarea> </p></form>""", html.render())
def filter(self, stream): from pylons import request, tmpl_context as c routes = request.environ.get('pylons.routes_dict') if routes.get('controller') == 'package' and \ routes.get('action') == 'search': data = { 'bbox': request.params.get('ext_bbox',''), 'default_extent': config.get('ckan.spatial.default_map_extent','') } stream = stream | Transformer('body//div[@id="dataset-search-ext"]')\ .append(HTML(html.SPATIAL_SEARCH_FORM % data)) stream = stream | Transformer('head')\ .append(HTML(html.SPATIAL_SEARCH_FORM_EXTRA_HEADER % data)) stream = stream | Transformer('body')\ .append(HTML(html.SPATIAL_SEARCH_FORM_EXTRA_FOOTER % data)) return stream