def test_quote_text(self): html = html_sanitize(test_mail_examples.TEXT_1) for ext in test_mail_examples.TEXT_1_IN: self.assertIn(ext, html) for ext in test_mail_examples.TEXT_1_OUT: self.assertIn(u'<span data-o-mail-quote="1">%s</span>' % misc.html_escape(ext), html) html = html_sanitize(test_mail_examples.TEXT_2) for ext in test_mail_examples.TEXT_2_IN: self.assertIn(ext, html) for ext in test_mail_examples.TEXT_2_OUT: self.assertIn(u'<span data-o-mail-quote="1">%s</span>' % misc.html_escape(ext), html)
def _do_request(self, template, data): xml_transaction = self.env.ref(template).render(data).decode() if not data['merchant_id'] or not data['merchant_pwd']: return "not setup" soap_header = '<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:mer="http://www.mercurypay.com"><soapenv:Header/><soapenv:Body><mer:CreditTransaction><mer:tran>' soap_footer = '</mer:tran><mer:pw>' + data[ 'merchant_pwd'] + '</mer:pw></mer:CreditTransaction></soapenv:Body></soapenv:Envelope>' xml_transaction = soap_header + misc.html_escape( xml_transaction) + soap_footer response = '' headers = { 'Content-Type': 'text/xml', 'SOAPAction': 'http://www.mercurypay.com/CreditTransaction', } try: r = requests.post('https://w1.mercurypay.com/ws/ws.asmx', data=xml_transaction, headers=headers, timeout=65) r.raise_for_status() response = werkzeug.utils.unescape(r.content.decode()) except: response = "timeout" return response
def plaintext2html(text, container_tag=False): """ Convert plaintext into html. Content of the text is escaped to manage html entities, using misc.html_escape(). - all \n,\r are replaced by <br /> - enclose content into <p> - convert url into clickable link - 2 or more consecutive <br /> are considered as paragraph breaks :param string container_tag: container of the html; by default the content is embedded into a <div> """ text = misc.html_escape(ustr(text)) # 1. replace \n and \r text = text.replace('\n', '<br/>') text = text.replace('\r', '<br/>') # 2. clickable links text = html_keep_url(text) # 3-4: form paragraphs idx = 0 final = '<p>' br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})') for item in re.finditer(br_tags, text): final += text[idx:item.start()] + '</p><p>' idx = item.end() final += text[idx:] + '</p>' # 5. container if container_tag: final = '<%s>%s</%s>' % (container_tag, final, container_tag) return ustr(final)
def to_html(self): media = (' media="%s"' % misc.html_escape(self.media)) if self.media else '' if self.url: href = self.html_url return '<link rel="stylesheet" href="%s" type="text/css"%s/>' % (href, media) else: return '<style type="text/css"%s>%s</style>' % (media, self.with_header())
def test_quote_basic_text(self): test_data = [ ( """This is Sparta!\n--\nAdministrator\n+9988776655""", ['This is Sparta!'], ['\n--\nAdministrator\n+9988776655'] ), ( """<p>This is Sparta!\n--\nAdministrator</p>""", [], ['\n--\nAdministrator'] ), ( """<p>This is Sparta!<br/>--<br>Administrator</p>""", ['This is Sparta!'], [] ), ( """This is Sparta!\n>Ah bon ?\nCertes\n> Chouette !\nClair""", ['This is Sparta!', 'Certes', 'Clair'], ['\n>Ah bon ?', '\n> Chouette !'] ) ] for test, in_lst, out_lst in test_data: new_html = html_sanitize(test) for text in in_lst: self.assertIn(text, new_html) for text in out_lst: self.assertIn(u'<span data-o-mail-quote="1">%s</span>' % misc.html_escape(text), new_html)
def test_quote_thunderbird(self): html = html_sanitize(test_mail_examples.QUOTE_THUNDERBIRD_1) for ext in test_mail_examples.QUOTE_THUNDERBIRD_1_IN: self.assertIn(ext, html) for ext in test_mail_examples.QUOTE_THUNDERBIRD_1_OUT: self.assertIn( u'<span data-o-mail-quote="1">%s</span>' % misc.html_escape(ext), html)
def test_quote_blockquote(self): html = html_sanitize(test_mail_examples.QUOTE_BLOCKQUOTE) for ext in test_mail_examples.QUOTE_BLOCKQUOTE_IN: self.assertIn(ext, html) for ext in test_mail_examples.QUOTE_BLOCKQUOTE_OUT: self.assertIn( u'<span data-o-mail-quote="1">%s' % misc.html_escape(ext), html)
def test_sanitize_escape_emails(self): emails = [ "Charles <*****@*****.**>", "Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>", "Technical <service/[email protected]>", "Div nico <*****@*****.**>" ] for email in emails: self.assertIn(misc.html_escape(email), html_sanitize(email), 'html_sanitize stripped emails of original html')
def test_sanitize_unescape_emails(self): not_emails = [ '<blockquote cite="mid:CAEJSRZvWvud8c6Qp=wfNG6O1+wK3i_jb33qVrF7XyrgPNjnyUA@mail.gmail.com" type="cite">cat</blockquote>', '<img alt="@github-login" class="avatar" src="/web/image/pi" height="36" width="36">'] for email in not_emails: sanitized = html_sanitize(email) left_part = email.split('>')[0] # take only left part, as the sanitizer could add data information on node self.assertNotIn(misc.html_escape(email), sanitized, 'html_sanitize stripped emails of original html') self.assertIn(left_part, sanitized)
def set_file(self, file, import_id, jsonp='callback'): import_id = int(import_id) written = request.env['base_import.import'].browse(import_id).write({ 'file': file.read(), 'file_name': file.filename, 'file_type': file.content_type, }) return 'window.top.%s(%s)' % (misc.html_escape(jsonp), json.dumps({'result': written}))
def test_i18n(self): field = etree.Element('span', {'t-field': u'company.name'}) s = u"Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!" company = self.env['res.company'].create({'name': s}) result = self.engine.render(field, {'company': company}) self.assertEqual( etree.fromstring(result), etree.fromstring(u'<span data-oe-model="res.company" data-oe-id="%d" ' u'data-oe-field="name" data-oe-type="char" ' u'data-oe-expression="company.name">%s</span>' % ( company.id, misc.html_escape(s), )), )
def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False): """ Append extra content at the end of an HTML snippet, trying to locate the end of the HTML document (</body>, </html>, or EOF), and converting the provided content in html unless ``plaintext`` is False. Content conversion can be done in two ways: - wrapping it into a pre (preserve=True) - use plaintext2html (preserve=False, using container_tag to wrap the whole content) A side-effect of this method is to coerce all HTML tags to lowercase in ``html``, and strip enclosing <html> or <body> tags in content if ``plaintext`` is False. :param str html: html tagsoup (doesn't have to be XHTML) :param str content: extra content to append :param bool plaintext: whether content is plaintext and should be wrapped in a <pre/> tag. :param bool preserve: if content is plaintext, wrap it into a <pre> instead of converting it into html """ html = ustr(html) if plaintext and preserve: content = u'\n<pre>%s</pre>\n' % misc.html_escape(ustr(content)) elif plaintext: content = '\n%s\n' % plaintext2html(content, container_tag) else: content = re.sub(r'(?i)(</?(?:html|body|head|!\s*DOCTYPE)[^>]*>)', '', content) content = u'\n%s\n' % ustr(content) # Force all tags to lowercase html = re.sub( r'(</?)(\w+)([ >])', lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html) insert_location = html.find('</body>') if insert_location == -1: insert_location = html.find('</html>') if insert_location == -1: return '%s%s' % (html, content) return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
def wifi(self): wifi_template = """ <!DOCTYPE HTML> <html> <head> <title>Wifi configuration</title> """ + index_style + """ </head> <body> <h1>Configure wifi</h1> <p> Here you can configure how the posbox should connect to wireless networks. Currently only Open and WPA networks are supported. When enabling the persistent checkbox, the chosen network will be saved and the posbox will attempt to connect to it every time it boots. </p> <form action='/wifi_connect' method='POST'> <table> <tr> <td> ESSID: </td> <td> <select name="essid"> """ try: f = open('/tmp/scanned_networks.txt', 'r') for line in f: line = line.rstrip() line = misc.html_escape(line) wifi_template += '<option value="' + line + '">' + line + '</option>\n' f.close() except IOError: _logger.warning("No /tmp/scanned_networks.txt") wifi_template += """ </select> </td> </tr> <tr> <td> Password: </td> <td> <input type="password" name="password" placeholder="optional"/> </td> </tr> <tr> <td> Persistent: </td> <td> <input type="checkbox" name="persistent"/> </td> </tr> <tr> <td/> <td> <input type="submit" value="connect"/> </td> </tr> </table> </form> <p> You can clear the persistent configuration by clicking below: <form action='/wifi_clear'> <input type="submit" value="Clear persistent network configuration"/> </form> </p> <form> </body> </html> """ return wifi_template
def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=False, sanitize_style=False, strip_style=False, strip_classes=False): if not src: return src src = ustr(src, errors='replace') # html: remove encoding attribute inside tags doctype = re.compile( r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL) src = doctype.sub(u"", src) logger = logging.getLogger(__name__ + '.html_sanitize') # html encode email tags part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL) # remove results containing cite="mid:email_like@address" (ex: blockquote cite) # cite_except = re.compile(r"^((?!cite[\s]*=['\"]).)*$", re.IGNORECASE) src = part.sub( lambda m: (u'cite=' not in m.group(1) and u'alt=' not in m.group(1)) and misc.html_escape(m.group(1)) or m.group(1), src) # html encode mako tags <% ... %> to decode them later and keep them alive, otherwise they are stripped by the cleaner src = src.replace(u'<%', misc.html_escape(u'<%')) src = src.replace(u'%>', misc.html_escape(u'%>')) kwargs = { 'page_structure': True, 'style': strip_style, # True = remove style tags/attrs 'sanitize_style': sanitize_style, # True = sanitize styling 'forms': True, # True = remove form tags 'remove_unknown_tags': False, 'comments': False, 'processing_instructions': False } if sanitize_tags: kwargs['allow_tags'] = allowed_tags if etree.LXML_VERSION >= (2, 3, 1): # kill_tags attribute has been added in version 2.3.1 kwargs.update({ 'kill_tags': tags_to_kill, 'remove_tags': tags_to_remove, }) else: kwargs['remove_tags'] = tags_to_kill + tags_to_remove if sanitize_attributes and etree.LXML_VERSION >= ( 3, 1, 0 ): # lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style" if strip_classes: current_safe_attrs = safe_attrs - frozenset(['class']) else: current_safe_attrs = safe_attrs kwargs.update({ 'safe_attrs_only': True, 'safe_attrs': current_safe_attrs, }) else: kwargs.update({ 'safe_attrs_only': False, # keep oe-data attributes + style 'strip_classes': strip_classes, # remove classes, even when keeping other attributes }) try: # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail) cleaner = _Cleaner(**kwargs) cleaned = cleaner.clean_html(src) assert isinstance(cleaned, pycompat.text_type) # MAKO compatibility: $, { and } inside quotes are escaped, preventing correct mako execution cleaned = cleaned.replace(u'%24', u'$') cleaned = cleaned.replace(u'%7B', u'{') cleaned = cleaned.replace(u'%7D', u'}') cleaned = cleaned.replace(u'%20', u' ') cleaned = cleaned.replace(u'%5B', u'[') cleaned = cleaned.replace(u'%5D', u']') cleaned = cleaned.replace(u'%7C', u'|') cleaned = cleaned.replace(u'<%', u'<%') cleaned = cleaned.replace(u'%>', u'%>') # html considerations so real html content match database value cleaned.replace(u'\xa0', u' ') except etree.ParserError as e: if u'empty' in pycompat.text_type(e): return u"" if not silent: raise logger.warning(u'ParserError obtained when sanitizing %r', src, exc_info=True) cleaned = u'<p>ParserError when sanitizing</p>' except Exception: if not silent: raise logger.warning(u'unknown error obtained when sanitizing %r', src, exc_info=True) cleaned = u'<p>Unknown error when sanitizing</p>' # this is ugly, but lxml/etree tostring want to put everything in a 'div' that breaks the editor -> remove that if cleaned.startswith(u'<div>') and cleaned.endswith(u'</div>'): cleaned = cleaned[5:-6] return cleaned
def write_config_formula(self, attachment_id, spreadsheet_key, model, domain, groupbys, view_id): access_token = self.get_access_token( scope='https://spreadsheets.google.com/feeds') fields = self.env[model].fields_view_get(view_id=view_id, view_type='tree') doc = etree.XML(fields.get('arch')) display_fields = [] for node in doc.xpath("//field"): if node.get('modifiers'): modifiers = json.loads(node.get('modifiers')) if not modifiers.get('invisible') and not modifiers.get( 'column_invisible'): display_fields.append(node.get('name')) fields = " ".join(display_fields) domain = domain.replace("'", r"\'").replace('"', "'") if groupbys: fields = "%s %s" % (groupbys, fields) formula = '=oe_read_group("%s";"%s";"%s";"%s")' % ( model, fields, groupbys, domain) else: formula = '=oe_browse("%s";"%s";"%s")' % (model, fields, domain) url = self.env['ir.config_parameter'].sudo().get_param('web.base.url') dbname = self._cr.dbname user = self.env['res.users'].browse(self.env.user.id).read( ['login', 'password'])[0] username = user['login'] password = user['password'] if not password: config_formula = '=oe_settings("%s";"%s")' % (url, dbname) else: config_formula = '=oe_settings("%s";"%s";"%s";"%s")' % ( url, dbname, username, password) request = '''<feed xmlns="http://www.w3.org/2005/Atom" xmlns:batch="http://schemas.google.com/gdata/batch" xmlns:gs="http://schemas.google.com/spreadsheets/2006"> <id>https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full</id> <entry> <batch:id>A1</batch:id> <batch:operation type="update"/> <id>https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full/R1C1</id> <link rel="edit" type="application/atom+xml" href="https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full/R1C1"/> <gs:cell row="1" col="1" inputValue="{formula}"/> </entry> <entry> <batch:id>A2</batch:id> <batch:operation type="update"/> <id>https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full/R60C15</id> <link rel="edit" type="application/atom+xml" href="https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full/R60C15"/> <gs:cell row="60" col="15" inputValue="{config}"/> </entry> </feed>'''.format(key=spreadsheet_key, formula=misc.html_escape(formula), config=misc.html_escape(config_formula)) try: req = requests.post( 'https://spreadsheets.google.com/feeds/cells/%s/od6/private/full/batch?%s' % (spreadsheet_key, werkzeug.url_encode({ 'v': 3, 'access_token': access_token })), data=request, headers={ 'content-type': 'application/atom+xml', 'If-Match': '*' }, timeout=TIMEOUT, ) except IOError: _logger.warning( "An error occured while writting the formula on the Google Spreadsheet." ) description = ''' formula: %s ''' % formula if attachment_id: self.env['ir.attachment'].browse(attachment_id).write( {'description': description}) return True