def plaintext2html(text, container_tag=False): """ Convert plaintext into html. Content of the text is escaped to manage html entities, using misc.html_escape(). - all \n,\r are replaced by <br /> - enclose content into <p> - convert url into clickable link - 2 or more consecutive <br /> are considered as paragraph breaks :param string container_tag: container of the html; by default the content is embedded into a <div> """ text = misc.html_escape(ustr(text)) # 1. replace \n and \r text = text.replace('\n', '<br/>') text = text.replace('\r', '<br/>') # 2. clickable links text = html_keep_url(text) # 3-4: form paragraphs idx = 0 final = '<p>' br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})') for item in re.finditer(br_tags, text): final += text[idx:item.start()] + '</p><p>' idx = item.end() final += text[idx:] + '</p>' # 5. container if container_tag: final = '<%s>%s</%s>' % (container_tag, final, container_tag) return ustr(final)
def test_quote_text(self): html = html_sanitize(test_mail_examples.TEXT_1) for ext in test_mail_examples.TEXT_1_IN: self.assertIn(ext, html) for ext in test_mail_examples.TEXT_1_OUT: self.assertIn( u'<span data-o-mail-quote="1">%s</span>' % misc.html_escape(ext), html) html = html_sanitize(test_mail_examples.TEXT_2) for ext in test_mail_examples.TEXT_2_IN: self.assertIn(ext, html) for ext in test_mail_examples.TEXT_2_OUT: self.assertIn( u'<span data-o-mail-quote="1">%s</span>' % misc.html_escape(ext), html)
def _do_request(self, template, data): xml_transaction = self.env.ref(template).render(data).decode() if not data['merchant_id'] or not data['merchant_pwd']: return "not setup" soap_header = '<soapenv:Envelope xmlns:soapenv="" xmlns:mer=""><soapenv:Header/><soapenv:Body><mer:CreditTransaction><mer:tran>' soap_footer = '</mer:tran><mer:pw>' + data[ 'merchant_pwd'] + '</mer:pw></mer:CreditTransaction></soapenv:Body></soapenv:Envelope>' xml_transaction = soap_header + misc.html_escape( xml_transaction) + soap_footer response = '' headers = { 'Content-Type': 'text/xml', 'SOAPAction': '', } url = '' if self.env['ir.config_parameter'].sudo().get_param( 'pos_mercury.enable_test_env'): url = '' try: r =, data=xml_transaction, headers=headers, timeout=65) r.raise_for_status() response = werkzeug.utils.unescape(r.content.decode()) except Exception: response = "timeout" return response
def test_quote_thunderbird(self): html = html_sanitize(test_mail_examples.QUOTE_THUNDERBIRD_1) for ext in test_mail_examples.QUOTE_THUNDERBIRD_1_IN: self.assertIn(ext, html) for ext in test_mail_examples.QUOTE_THUNDERBIRD_1_OUT: self.assertIn( u'<span data-o-mail-quote="1">%s</span>' % misc.html_escape(ext), html)
def test_quote_blockquote(self): html = html_sanitize(test_mail_examples.QUOTE_BLOCKQUOTE) for ext in test_mail_examples.QUOTE_BLOCKQUOTE_IN: self.assertIn(ext, html) for ext in test_mail_examples.QUOTE_BLOCKQUOTE_OUT: self.assertIn( u'<span data-o-mail-quote="1">%s' % misc.html_escape(ext), html)
def test_sanitize_unescape_emails(self): not_emails = [ '<blockquote cite="" type="cite">cat</blockquote>', '<img alt="@github-login" class="avatar" src="/web/image/pi" height="36" width="36">' ] for email in not_emails: sanitized = html_sanitize(email) left_part = email.split( '>' )[0] # take only left part, as the sanitizer could add data information on node self.assertNotIn(misc.html_escape(email), sanitized, 'html_sanitize stripped emails of original html') self.assertIn(left_part, sanitized)
def set_file(self, file, import_id, jsonp='callback'): import_id = int(import_id) written = request.env['base_import.import'].browse(import_id).write({ 'file':, 'file_name': file.filename, 'file_type': file.content_type, }) return '' % (misc.html_escape(jsonp), json.dumps({'result': written}))
def test_i18n(self): field = etree.Element('span', {'t-field': u''}) s = u"Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!" company = self.env[''].create({'name': s}) result = self.engine.render(field, {'company': company}) self.assertEqual( etree.fromstring(result), etree.fromstring(u'<span data-oe-model="" data-oe-id="%d" ' u'data-oe-field="name" data-oe-type="char" ' u'data-oe-expression="">%s</span>' % (, misc.html_escape(s), )), )
def test_quote_basic_text(self): test_data = [ ("""This is Sparta!\n--\nAdministrator\n+9988776655""", ['This is Sparta!'], ['\n--\nAdministrator\n+9988776655']), ("""<p>This is Sparta!\n--\nAdministrator</p>""", [], ['\n--\nAdministrator']), ("""<p>This is Sparta!<br/>--<br>Administrator</p>""", ['This is Sparta!'], []), ("""This is Sparta!\n>Ah bon ?\nCertes\n> Chouette !\nClair""", ['This is Sparta!', 'Certes', 'Clair'], ['\n>Ah bon ?', '\n> Chouette !']) ] for test, in_lst, out_lst in test_data: new_html = html_sanitize(test) for text in in_lst: self.assertIn(text, new_html) for text in out_lst: self.assertIn( u'<span data-o-mail-quote="1">%s</span>' % misc.html_escape(text), new_html)
def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False): """ Append extra content at the end of an HTML snippet, trying to locate the end of the HTML document (</body>, </html>, or EOF), and converting the provided content in html unless ``plaintext`` is False. Content conversion can be done in two ways: - wrapping it into a pre (preserve=True) - use plaintext2html (preserve=False, using container_tag to wrap the whole content) A side-effect of this method is to coerce all HTML tags to lowercase in ``html``, and strip enclosing <html> or <body> tags in content if ``plaintext`` is False. :param str html: html tagsoup (doesn't have to be XHTML) :param str content: extra content to append :param bool plaintext: whether content is plaintext and should be wrapped in a <pre/> tag. :param bool preserve: if content is plaintext, wrap it into a <pre> instead of converting it into html """ html = ustr(html) if plaintext and preserve: content = u'\n<pre>%s</pre>\n' % misc.html_escape(ustr(content)) elif plaintext: content = '\n%s\n' % plaintext2html(content, container_tag) else: content = re.sub(r'(?i)(</?(?:html|body|head|!\s*DOCTYPE)[^>]*>)', '', content) content = u'\n%s\n' % ustr(content) # Force all tags to lowercase html = re.sub(r'(</?)(\w+)([ >])', lambda m: '%s%s%s' % (,,, html) insert_location = html.find('</body>') if insert_location == -1: insert_location = html.find('</html>') if insert_location == -1: return '%s%s' % (html, content) return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
def write_config_formula(self, attachment_id, spreadsheet_key, model, domain, groupbys, view_id): access_token = self.get_access_token( scope='') fields = self.env[model].fields_view_get(view_id=view_id, view_type='tree') doc = etree.XML(fields.get('arch')) display_fields = [] for node in doc.xpath("//field"): if node.get('modifiers'): modifiers = json.loads(node.get('modifiers')) if not modifiers.get('invisible') and not modifiers.get( 'column_invisible'): display_fields.append(node.get('name')) fields = " ".join(display_fields) domain = domain.replace("'", r"\'").replace('"', "'").replace( 'True', 'true').replace('False', 'false') if groupbys: fields = "%s %s" % (groupbys, fields) formula = '=oe_read_group("%s";"%s";"%s";"%s")' % ( model, fields, groupbys, domain) else: formula = '=oe_browse("%s";"%s";"%s")' % (model, fields, domain) url = self.env['ir.config_parameter'].sudo().get_param('web.base.url') dbname = self._cr.dbname user = self.env['res.users'].browse( ['login', 'password'])[0] username = user['login'] password = user['password'] if not password: config_formula = '=oe_settings("%s";"%s")' % (url, dbname) else: config_formula = '=oe_settings("%s";"%s";"%s";"%s")' % ( url, dbname, username, password) request = '''<feed xmlns="" xmlns:batch="" xmlns:gs=""> <id>{key}/od6/private/full</id> <entry> <batch:id>A1</batch:id> <batch:operation type="update"/> <id>{key}/od6/private/full/R1C1</id> <link rel="edit" type="application/atom+xml" href="{key}/od6/private/full/R1C1"/> <gs:cell row="1" col="1" inputValue="{formula}"/> </entry> <entry> <batch:id>A2</batch:id> <batch:operation type="update"/> <id>{key}/od6/private/full/R60C15</id> <link rel="edit" type="application/atom+xml" href="{key}/od6/private/full/R60C15"/> <gs:cell row="60" col="15" inputValue="{config}"/> </entry> </feed>'''.format(key=spreadsheet_key, formula=misc.html_escape(formula), config=misc.html_escape(config_formula)) try: req = '' % (spreadsheet_key, werkzeug.url_encode({ 'v': 3, 'access_token': access_token })), data=request, headers={ 'content-type': 'application/atom+xml', 'If-Match': '*' }, timeout=TIMEOUT, ) except IOError: _logger.warning( "An error occured while writing the formula on the Google Spreadsheet." ) description = ''' formula: %s ''' % formula if attachment_id: self.env['ir.attachment'].browse(attachment_id).write( {'description': description}) return True
def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=False, sanitize_style=False, strip_style=False, strip_classes=False): if not src: return src src = ustr(src, errors='replace') # html: remove encoding attribute inside tags doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL) src = doctype.sub(u"", src) logger = logging.getLogger(__name__ + '.html_sanitize') # html encode mako tags <% ... %> to decode them later and keep them alive, otherwise they are stripped by the cleaner src = src.replace(u'<%', misc.html_escape(u'<%')) src = src.replace(u'%>', misc.html_escape(u'%>')) kwargs = { 'page_structure': True, 'style': strip_style, # True = remove style tags/attrs 'sanitize_style': sanitize_style, # True = sanitize styling 'forms': True, # True = remove form tags 'remove_unknown_tags': False, 'comments': False, 'processing_instructions': False } if sanitize_tags: kwargs['allow_tags'] = allowed_tags if etree.LXML_VERSION >= (2, 3, 1): # kill_tags attribute has been added in version 2.3.1 kwargs.update({ 'kill_tags': tags_to_kill, 'remove_tags': tags_to_remove, }) else: kwargs['remove_tags'] = tags_to_kill + tags_to_remove if sanitize_attributes and etree.LXML_VERSION >= (3, 1, 0): # lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style" if strip_classes: current_safe_attrs = safe_attrs - frozenset(['class']) else: current_safe_attrs = safe_attrs kwargs.update({ 'safe_attrs_only': True, 'safe_attrs': current_safe_attrs, }) else: kwargs.update({ 'safe_attrs_only': False, # keep oe-data attributes + style 'strip_classes': strip_classes, # remove classes, even when keeping other attributes }) try: # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"\"></SCRIPT> in test_mail) cleaner = _Cleaner(**kwargs) cleaned = cleaner.clean_html(src) assert isinstance(cleaned, str) # MAKO compatibility: $, { and } inside quotes are escaped, preventing correct mako execution cleaned = cleaned.replace(u'%24', u'$') cleaned = cleaned.replace(u'%7B', u'{') cleaned = cleaned.replace(u'%7D', u'}') cleaned = cleaned.replace(u'%20', u' ') cleaned = cleaned.replace(u'%5B', u'[') cleaned = cleaned.replace(u'%5D', u']') cleaned = cleaned.replace(u'%7C', u'|') cleaned = cleaned.replace(u'<%', u'<%') cleaned = cleaned.replace(u'%>', u'%>') # html considerations so real html content match database value cleaned.replace(u'\xa0', u' ') except etree.ParserError as e: if 'empty' in str(e): return u"" if not silent: raise logger.warning(u'ParserError obtained when sanitizing %r', src, exc_info=True) cleaned = u'<p>ParserError when sanitizing</p>' except Exception: if not silent: raise logger.warning(u'unknown error obtained when sanitizing %r', src, exc_info=True) cleaned = u'<p>Unknown error when sanitizing</p>' # this is ugly, but lxml/etree tostring want to put everything in a 'div' that breaks the editor -> remove that if cleaned.startswith(u'<div>') and cleaned.endswith(u'</div>'): cleaned = cleaned[5:-6] return cleaned