def test_fill_input_password_enabled(self): html = HTML("""<form><p> <input type="password" name="pass" /> </p></form>""") | HTMLFormFiller(data={'pass': '******'}, passwords=True) self.assertEquals("""<form><p> <input type="password" name="pass" value="1234"/> </p></form>""", html.render())
def test_fill_input_text_single_value(self): html = HTML("""<form><p> <input type="text" name="foo" /> </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) self.assertEquals("""<form><p> <input type="text" name="foo" value="bar"/> </p></form>""", html.render())
def test_fill_input_password_disabled(self): html = HTML("""<form><p> <input type="password" name="pass" /> </p></form>""") | HTMLFormFiller(data={'pass': '******'}) self.assertEquals("""<form><p> <input type="password" name="pass"/> </p></form>""", html.render())
def test_fill_input_hidden_multi_value(self): html = HTML("""<form><p> <input type="hidden" name="foo" /> </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) self.assertEquals("""<form><p> <input type="hidden" name="foo" value="bar"/> </p></form>""", html.render())
def test_fill_input_hidden_no_value(self): html = HTML("""<form><p> <input type="hidden" name="foo" /> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <input type="hidden" name="foo"/> </p></form>""", html.render())
def test_fill_textarea_no_value(self): html = HTML("""<form><p> <textarea name="foo"></textarea> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <textarea name="foo"/> </p></form>""", html.render())
def test_fill_textarea_multi_value(self): html = HTML("""<form><p> <textarea name="foo"></textarea> </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) self.assertEquals("""<form><p> <textarea name="foo">bar</textarea> </p></form>""", html.render())
def test_fill_input_checkbox_single_value_auto_no_value(self): html = HTML("""<form><p> <input type="checkbox" name="foo" /> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <input type="checkbox" name="foo"/> </p></form>""", html.render())
def test_fill_textarea_preserve_original(self): html = HTML("""<form><p> <textarea name="foo"></textarea> <textarea name="bar">Original value</textarea> </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) self.assertEquals("""<form><p> <textarea name="foo">Some text</textarea> <textarea name="bar">Original value</textarea> </p></form>""", html.render())
def test_fill_textarea_multiple(self): # Ensure that the subsequent textarea doesn't get the data from the # first html = HTML("""<form><p> <textarea name="foo"></textarea> <textarea name="bar"></textarea> </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) self.assertEquals("""<form><p> <textarea name="foo">Some text</textarea> <textarea name="bar"/> </p></form>""", html.render())
def test_fill_option_unicode_value(self): html = HTML("""<form> <select name="foo"> <option value="ö">foo</option> </select> </form>""") | HTMLFormFiller(data={'foo': 'ö'}) self.assertEquals("""<form> <select name="foo"> <option value="ö" selected="selected">foo</option> </select> </form>""", html.render(encoding=None))
def test_translate_included_attribute_text(self): """ Verify that translated attributes end up in a proper `Attrs` instance. """ html = HTML("""<html> <span title="Foo"></span> </html>""") translator = Translator(lambda s: u"Voh") stream = list(html.filter(translator)) kind, data, pos = stream[2] assert isinstance(data[1], Attrs)
def save(self, encoding=None): """validate incoming html using genshi's HTMLSanitizer, throw an error if invalid (ie: anything changed in input)""" # let creole content go through unverified, the parser will clean it up anyway if self.blob.markup_language == 'ductus-html5': html = HTML(self.text) #TODO: define our own set of acceptable tags/attributes in settings.py friendly_attrs = set(['data-gentics-aloha-repository', 'data-gentics-aloha-object-id', 'data-macro-name', 'data-tags', 'contenteditable']) sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | friendly_attrs) safe_html = html | sanitizer if html.render() != safe_html.render(): raise ValidationError(u'invalid html content') return super(Wikitext, self).save(encoding)
def test_fill_select_no_value_auto(self): html = HTML(u"""<form><p> <select name="foo"> <option>1</option> <option>2</option> <option>3</option> </select> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <select name="foo"> <option>1</option> <option>2</option> <option>3</option> </select> </p></form>""", html.render())
def test_fill_select_multi_value_defined(self): html = HTML("""<form><p> <select name="foo" multiple> <option value="1">1</option> <option value="2">2</option> <option value="3">3</option> </select> </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']}) self.assertEquals("""<form><p> <select name="foo" multiple="multiple"> <option value="1" selected="selected">1</option> <option value="2">2</option> <option value="3" selected="selected">3</option> </select> </p></form>""", html.render())
def test_fill_select_no_value_defined(self): html = HTML("""<form><p> <select name="foo"> <option value="1">1</option> <option value="2">2</option> <option value="3">3</option> </select> </p></form>""") | HTMLFormFiller() self.assertEquals("""<form><p> <select name="foo"> <option value="1">1</option> <option value="2">2</option> <option value="3">3</option> </select> </p></form>""", html.render())
def test_fill_select_single_value_auto(self): html = HTML("""<form><p> <select name="foo"> <option>1</option> <option>2</option> <option>3</option> </select> </p></form>""") | HTMLFormFiller(data={'foo': '1'}) self.assertEquals("""<form><p> <select name="foo"> <option selected="selected">1</option> <option>2</option> <option>3</option> </select> </p></form>""", html.render())
def comment(self, id, cancel=False, **data): link = self.data.get(id) if not link: raise cherrypy.NotFound() if cherrypy.request.method == 'POST': if cancel: raise cherrypy.HTTPRedirect('/info/%s' % link.id) form = CommentForm() try: data = form.to_python(data) markup = HTML(data['content']) | HTMLSanitizer() data['content'] = markup.render('xhtml') comment = link.add_comment(**data) if not ajax.is_xhr(): raise cherrypy.HTTPRedirect('/info/%s' % link.id) return template.render('_comment.html', comment=comment, num=len(link.comments)) except Invalid, e: errors = e.unpack_errors()
def filter_stream(self, req, method, filename, stream, data): if filename.startswith("roadmap"): stream_roadmap = HTML(to_unicode(stream)) stream_milestones = HTML(to_unicode(stream_roadmap.select('//div[@class="roadmap"]/div[@class="milestones"]'))) milestones = data.get('milestones') milestones = [milestone.name for milestone in milestones] versions = data.get('versions') if versions: for version in versions: milestones.append(version.name) div_milestones_array = self.__extract_div_milestones_array('<div class="milestone">',stream_milestones) div_projects_milestones = self.__process_div_projects_milestones(milestones, div_milestones_array, req) return stream_roadmap | Transformer('//div[@class="roadmap"]/div[@class="milestones"]').replace(div_projects_milestones) return stream
def send_html_response(self, handler, html_file, code=200, html_form_data={}, **kwargs): """ Generates and sends an HTML response. This generates headers and an HTML response either from the specified HTML source or HTML file. Both will be parsed using the Genhsi template engine and will be extended with the default template. Args: handler: References the handler of the current http request. code: Defines the response code is send within the http headers, by default, responde code 200 (success) is sent. html_file: Must reference a HTML document within the current document root or the plugin directory that will be loaded and parsed using Genshi. html_form_data: Pass additional html form data to auto-fill html forms using genshi.filters.HTMLFormFiller. **kwargs: Any additional parameter will be forwarded to the Genshi template. """ handler.send_response(code=code) handler.send_header("Content-type", 'text/html') handler.end_headers() # Add additional template parameters kwargs["plugin"] = self.__module__ template_path = os.path.dirname(__file__) + os.sep + \ "assets" + os.sep + "html" + os.sep + "index.html" fd = open(template_path) template = MarkupTemplate(fd, template_path) fd.close() filler = HTMLFormFiller(data=html_form_data) # See http://stackoverflow.com/questions/1555644/can-one-prevent-genshi-from-parsing-html-entities # because of "us-ascii" encoding. html = HTML(self.template.load(html_file).generate(**kwargs).render(encoding= 'us-ascii')) template = template.generate(Context(input=html.filter(filler), **kwargs)) handler.wfile.write(template.render('xhtml', doctype='html', encoding= 'us-ascii'))
def test_sanitize_property_name(self): html = HTML(u'<div style="display:none;border-left-color:red;' u'user_defined:1;-moz-user-selct:-moz-all">prop</div>') self.assertEqual('<div style="display:none; border-left-color:red' '">prop</div>', unicode(html | StyleSanitizer()))
def test_unicode_url(self): # IPA extensions html = HTML(u'<div style="background-image:uʀʟ(javascript:alert())">' u'XSS</div>') self.assertEqual('<div>XSS</div>', unicode(html | TracHTMLSanitizer()))
def test_capital_url_with_javascript(self): html = HTML( '<div style="background-image:URL(javascript:alert())">' 'XSS</div>', encoding='utf-8') self.assertEqual('<div>XSS</div>', unicode(html | TracHTMLSanitizer()))
def test_unicode_escapes(self): html = HTML( r'<div style="top:exp\72 ess\000069 on(alert())">' r'XSS</div>', encoding='utf-8') self.assertEqual('<div>XSS</div>', unicode(html | TracHTMLSanitizer()))
def test_html5_doctype(self): stream = HTML(u'<html></html>') output = stream.render(HTMLSerializer, doctype=DocType.HTML5, encoding=None) self.assertEqual('<!DOCTYPE html>\n<html></html>', output)
def assert_parse_error_or_equal(self, expected, exploit): try: html = HTML(exploit) except ParseError: return self.assertEquals(expected, (html | HTMLSanitizer()).render())
def test_sanitize_capital_url_with_javascript(self): html = HTML(u'<div style="background-image:URL(javascript:alert())">' u'XSS</div>') self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
def test_sanitize_close_empty_tag(self): html = HTML(u'<a href="#">fo<br>o</a>') self.assertEquals('<a href="#">fo<br/>o</a>', (html | HTMLSanitizer()).render())
def test_sanitize_invalid_entity(self): html = HTML(u'&junk;') self.assertEquals('&junk;', (html | HTMLSanitizer()).render())
def test_sanitize_remove_onclick_attr(self): html = HTML(u'<div onclick=\'alert("foo")\' />') self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
def test_sanitize_remove_input_password(self): html = HTML(u'<form><input type="password" /></form>') self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
def test_sanitize_remove_comments(self): html = HTML(u'''<div><!-- conditional comment crap --></div>''') self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
def test_sanitize_css_hack(self): html = HTML(u'<div style="*position:static">XSS</div>') self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) html = HTML(u'<div style="_margin:-10px">XSS</div>') self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
def test_sanitize_escape_attr(self): html = HTML(u'<div title="<foo>"></div>') self.assertEquals('<div title="<foo>"/>', (html | HTMLSanitizer()).render())
def test_sanitize_negative_margin(self): html = HTML(u'<div style="margin-top:-9999px">XSS</div>') self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) html = HTML(u'<div style="margin:0 -9999px">XSS</div>') self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
def test_sanitize_entityref_text(self): html = HTML(u'<a href="#">foö</a>') self.assertEquals(u'<a href="#">foö</a>', (html | HTMLSanitizer()).render(encoding=None))
def test_sanitize_remove_style_scripts(self): sanitizer = StyleSanitizer() # Inline style with url() using javascript: scheme html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) # Inline style with url() using javascript: scheme, using control char html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) # Inline style with url() using javascript: scheme, in quotes html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') self.assertEquals('<div/>', (html | sanitizer).render()) # IE expressions in CSS not allowed html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));' 'color: #fff\'>') self.assertEquals('<div style="color: #fff"/>', (html | sanitizer).render()) # Inline style with url() using javascript: scheme, using unicode # escapes html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render()) html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>') self.assertEquals('<div/>', (html | sanitizer).render())
def test_expression(self): html = HTML('<div style="top:expression(alert())">XSS</div>', encoding='utf-8') self.assertEqual('<div>XSS</div>', unicode(html | TracHTMLSanitizer()))
def process_request(self, req): # This is a workaround for bug: http://trac.edgewall.org/ticket/5628 reload(sys) if sys.getdefaultencoding() == 'ascii': sys.setdefaultencoding("latin1") # End: workaround # The full path to where the mailman archives are stored mail_archive_path = self.env.config.get('tracmailman', 'mail_archive_path') if mail_archive_path[-1] != '/': mail_archive_path += "/" data = {} data['title'] = 'Mailing List Archive Browser' # Check user is logged in if not authenticated(req): return 'tracmailmanbrowser.html', data, 'text/html' else: data['authenticated'] = True # We won't respond to just anything. Let's use regexps to pull # out relevant tokens, and verify the tokens. doctypes = '((\d+)|(thread)|(subject)|(author)|(date))' result = re.search(r'^/tracmailman/browser/(public|private)/([^/]+)/([^.]+)\.(html|txt|txt\.gz)$', req.path_info) if result is None: chrome.add_warning(req, 'The URL you requested is does not refer to a valid document') return 'tracmailmanbrowser.html', data, 'text/html' priv = result.group(1) listname = result.group(2) docID = result.group(3) extension= result.group(4) # Check if user is trying to access a private list if listname in self.env.config.getlist('tracmailman', 'private_lists'): chrome.add_warning(req, 'This list is private and not browsable. Please go through the standard Mailman interface.') return 'tracmailmanbrowser.html', data, 'text/html' path = mail_archive_path + priv + '/' + listname + '/' + docID + '.' + extension if os.path.isfile(path): archivedMail = open(path, 'r').read() if extension == 'html': html = HTML(archivedMail,encoding='utf-8') # At this point, the HTML document is turned into a Genshi # object. For more info on how to transform the HTML # object using Genshi: # http://genshi.edgewall.org/wiki/ApiDocs # sanitized = html.select('body/*') | HTMLSanitizer() contents = sanitized.render('html') contents = re.sub(r'<a name=.+?a>',"",contents) data['contents'] = Markup(contents) data['title'] += " - " + listname return 'tracmailmanbrowser.html', data, 'text/html' else: req.send_response(200) if extension == 'txt': req.send_header('Content-Type', 'text/plain') else: req.send_header('Content-Type', 'application/x-gzip') req.send_header('Content-Length', len(archivedMail)) req.end_headers() req.write(archivedMail) else: if docID in ['thread', 'subject', 'author', 'date']: chrome.add_warning(req, """You requested a mail index page that could not be found. It is possible that there are currently no mail messages archived, so no index has been created.""" ) else: chrome.add_warning(req, 'The mail message that you requested cannot be found') return 'tracmailmanbrowser.html', data, 'text/html'
def strip_message(message): markup = HTML(message) | HTMLSanitizer() return markup.render('xhtml')
def get_series_info(mnemonic): """ Ottiene i metadati per una serie datastream >>> get_series_info('BRIPTOT.H') """ return {} # Genshi (for get_series_data) # from genshi.input import HTML addr = 'http://product.datastream.com/navigator/EconomicsMetadata.aspx?category=Economics&mnemonic=%s' % mnemonic html = urlopen(addr) LOGGER.info('------------------------------------------------------------' ) LOGGER.info("GET %s" % addr) LOGGER.info('' ) HH = HTML( html.read() ) HHH = HTML( HH.select('body/form/table') ) TD = HTML( HHH.select('tr/td') ) el = {} xin = 0 ky = 0 key = None Value = None lastkey = None if len(TD.events)==0: LOGGER.info('NO DATA for %s' % mnemonic) return {} for e in TD.events: #LOGGER.info("EVT %s-%s" % (e[0],e[1])) if e[0]=='START': if e[1][0].localname=='td': xin = 1 if e[0]=='END': if e[1].localname=='td' and xin==2 and ky == 0: xin = 0 if key: LOGGER.info("[%s]=%s" % (key,Value)) if Value is None: Value='None' el[key.lower()]=Value.strip() key = None Value = None if e[1].localname=='td' and xin==1: ky = 0 xin = 0 if key: LOGGER.info("[%s]=%s" % (key,Value)) if Value is None: Value='None' el[key.lower()]=Value.strip() key = None Value = None if e[0]=='TEXT': if xin==1: xin = 2 if ky==0: if len(e[1])>1: lastkey=key key=unicode(e[1]) else: key=unicode(lastkey) ky = 1 else: Value=unicode(e[1]) ky = 0 return el