Python html_escape Examples, harpiya.tools.misc.html_escape Python Examples

Example #1

0

Show file

File: mail.py Project: marionumza/saas

def plaintext2html(text, container_tag=False):
    """ Convert plaintext into html. Content of the text is escaped to manage
        html entities, using misc.html_escape().
        - all \n,\r are replaced by <br />
        - enclose content into <p>
        - convert url into clickable link
        - 2 or more consecutive <br /> are considered as paragraph breaks

        :param string container_tag: container of the html; by default the
            content is embedded into a <div>
    """
    text = misc.html_escape(ustr(text))

    # 1. replace \n and \r
    text = text.replace('\n', '<br/>')
    text = text.replace('\r', '<br/>')

    # 2. clickable links
    text = html_keep_url(text)

    # 3-4: form paragraphs
    idx = 0
    final = '<p>'
    br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})')
    for item in re.finditer(br_tags, text):
        final += text[idx:item.start()] + '</p><p>'
        idx = item.end()
    final += text[idx:] + '</p>'

    # 5. container
    if container_tag:
        final = '<%s>%s</%s>' % (container_tag, final, container_tag)
    return ustr(final)

Example #2

0

Show file

    def test_quote_text(self):
        html = html_sanitize(test_mail_examples.TEXT_1)
        for ext in test_mail_examples.TEXT_1_IN:
            self.assertIn(ext, html)
        for ext in test_mail_examples.TEXT_1_OUT:
            self.assertIn(
                u'<span data-o-mail-quote="1">%s</span>' %
                misc.html_escape(ext), html)

        html = html_sanitize(test_mail_examples.TEXT_2)
        for ext in test_mail_examples.TEXT_2_IN:
            self.assertIn(ext, html)
        for ext in test_mail_examples.TEXT_2_OUT:
            self.assertIn(
                u'<span data-o-mail-quote="1">%s</span>' %
                misc.html_escape(ext), html)

Example #3

0

Show file

File: pos_mercury_transaction.py Project: marionumza/saas

    def _do_request(self, template, data):
        xml_transaction = self.env.ref(template).render(data).decode()

        if not data['merchant_id'] or not data['merchant_pwd']:
            return "not setup"

        soap_header = '<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:mer="http://www.mercurypay.com"><soapenv:Header/><soapenv:Body><mer:CreditTransaction><mer:tran>'
        soap_footer = '</mer:tran><mer:pw>' + data[
            'merchant_pwd'] + '</mer:pw></mer:CreditTransaction></soapenv:Body></soapenv:Envelope>'
        xml_transaction = soap_header + misc.html_escape(
            xml_transaction) + soap_footer

        response = ''

        headers = {
            'Content-Type': 'text/xml',
            'SOAPAction': 'http://www.mercurypay.com/CreditTransaction',
        }

        url = 'https://w1.mercurypay.com/ws/ws.asmx'
        if self.env['ir.config_parameter'].sudo().get_param(
                'pos_mercury.enable_test_env'):
            url = 'https://w1.mercurycert.net/ws/ws.asmx'

        try:
            r = requests.post(url,
                              data=xml_transaction,
                              headers=headers,
                              timeout=65)
            r.raise_for_status()
            response = werkzeug.utils.unescape(r.content.decode())
        except Exception:
            response = "timeout"

        return response

Example #4

0

Show file

 def test_quote_thunderbird(self):
     html = html_sanitize(test_mail_examples.QUOTE_THUNDERBIRD_1)
     for ext in test_mail_examples.QUOTE_THUNDERBIRD_1_IN:
         self.assertIn(ext, html)
     for ext in test_mail_examples.QUOTE_THUNDERBIRD_1_OUT:
         self.assertIn(
             u'<span data-o-mail-quote="1">%s</span>' %
             misc.html_escape(ext), html)

Example #5

0

Show file

 def test_quote_blockquote(self):
     html = html_sanitize(test_mail_examples.QUOTE_BLOCKQUOTE)
     for ext in test_mail_examples.QUOTE_BLOCKQUOTE_IN:
         self.assertIn(ext, html)
     for ext in test_mail_examples.QUOTE_BLOCKQUOTE_OUT:
         self.assertIn(
             u'<span data-o-mail-quote="1">%s' % misc.html_escape(ext),
             html)

Example #6

0

Show file

 def test_sanitize_unescape_emails(self):
     not_emails = [
         '<blockquote cite="mid:CAEJSRZvWvud8c6Qp=wfNG6O1+wK3i_jb33qVrF7XyrgPNjnyUA@mail.gmail.com" type="cite">cat</blockquote>',
         '<img alt="@github-login" class="avatar" src="/web/image/pi" height="36" width="36">'
     ]
     for email in not_emails:
         sanitized = html_sanitize(email)
         left_part = email.split(
             '>'
         )[0]  # take only left part, as the sanitizer could add data information on node
         self.assertNotIn(misc.html_escape(email), sanitized,
                          'html_sanitize stripped emails of original html')
         self.assertIn(left_part, sanitized)

Example #7

0

Show file

    def set_file(self, file, import_id, jsonp='callback'):
        import_id = int(import_id)

        written = request.env['base_import.import'].browse(import_id).write({
            'file':
            file.read(),
            'file_name':
            file.filename,
            'file_type':
            file.content_type,
        })

        return 'window.top.%s(%s)' % (misc.html_escape(jsonp),
                                      json.dumps({'result': written}))

Example #8

0

Show file

    def test_i18n(self):
        field = etree.Element('span', {'t-field': u'company.name'})
        s = u"Testing «ταБЬℓσ»: 1<2 & 4+1>3, now 20% off!"
        company = self.env['res.company'].create({'name': s})

        result = self.engine.render(field, {'company': company})
        self.assertEqual(
            etree.fromstring(result),
            etree.fromstring(u'<span data-oe-model="res.company" data-oe-id="%d" '
                  u'data-oe-field="name" data-oe-type="char" '
                  u'data-oe-expression="company.name">%s</span>' % (
                company.id,
                misc.html_escape(s),
            )),
        )

Example #9

0

Show file

 def test_quote_basic_text(self):
     test_data = [
         ("""This is Sparta!\n--\nAdministrator\n+9988776655""",
          ['This is Sparta!'], ['\n--\nAdministrator\n+9988776655']),
         ("""<p>This is Sparta!\n--\nAdministrator</p>""", [],
          ['\n--\nAdministrator']),
         ("""<p>This is Sparta!<br/>--<br>Administrator</p>""",
          ['This is Sparta!'], []),
         ("""This is Sparta!\n>Ah bon ?\nCertes\n> Chouette !\nClair""",
          ['This is Sparta!', 'Certes',
           'Clair'], ['\n>Ah bon ?', '\n> Chouette !'])
     ]
     for test, in_lst, out_lst in test_data:
         new_html = html_sanitize(test)
         for text in in_lst:
             self.assertIn(text, new_html)
         for text in out_lst:
             self.assertIn(
                 u'<span data-o-mail-quote="1">%s</span>' %
                 misc.html_escape(text), new_html)

Example #10

0

Show file

File: mail.py Project: marionumza/saas

def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False):
    """ Append extra content at the end of an HTML snippet, trying
        to locate the end of the HTML document (</body>, </html>, or
        EOF), and converting the provided content in html unless ``plaintext``
        is False.
        Content conversion can be done in two ways:
        - wrapping it into a pre (preserve=True)
        - use plaintext2html (preserve=False, using container_tag to wrap the
            whole content)
        A side-effect of this method is to coerce all HTML tags to
        lowercase in ``html``, and strip enclosing <html> or <body> tags in
        content if ``plaintext`` is False.

        :param str html: html tagsoup (doesn't have to be XHTML)
        :param str content: extra content to append
        :param bool plaintext: whether content is plaintext and should
            be wrapped in a <pre/> tag.
        :param bool preserve: if content is plaintext, wrap it into a <pre>
            instead of converting it into html
    """
    html = ustr(html)
    if plaintext and preserve:
        content = u'\n<pre>%s</pre>\n' % misc.html_escape(ustr(content))
    elif plaintext:
        content = '\n%s\n' % plaintext2html(content, container_tag)
    else:
        content = re.sub(r'(?i)(</?(?:html|body|head|!\s*DOCTYPE)[^>]*>)', '', content)
        content = u'\n%s\n' % ustr(content)
    # Force all tags to lowercase
    html = re.sub(r'(</?)(\w+)([ >])',
        lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
    insert_location = html.find('</body>')
    if insert_location == -1:
        insert_location = html.find('</html>')
    if insert_location == -1:
        return '%s%s' % (html, content)
    return '%s%s%s' % (html[:insert_location], content, html[insert_location:])

Example #11

0

Show file

File: google_drive.py Project: marionumza/saas

    def write_config_formula(self, attachment_id, spreadsheet_key, model,
                             domain, groupbys, view_id):
        access_token = self.get_access_token(
            scope='https://spreadsheets.google.com/feeds')

        fields = self.env[model].fields_view_get(view_id=view_id,
                                                 view_type='tree')
        doc = etree.XML(fields.get('arch'))
        display_fields = []
        for node in doc.xpath("//field"):
            if node.get('modifiers'):
                modifiers = json.loads(node.get('modifiers'))
                if not modifiers.get('invisible') and not modifiers.get(
                        'column_invisible'):
                    display_fields.append(node.get('name'))
        fields = " ".join(display_fields)
        domain = domain.replace("'", r"\'").replace('"', "'").replace(
            'True', 'true').replace('False', 'false')
        if groupbys:
            fields = "%s %s" % (groupbys, fields)
            formula = '=oe_read_group("%s";"%s";"%s";"%s")' % (
                model, fields, groupbys, domain)
        else:
            formula = '=oe_browse("%s";"%s";"%s")' % (model, fields, domain)
        url = self.env['ir.config_parameter'].sudo().get_param('web.base.url')
        dbname = self._cr.dbname
        user = self.env['res.users'].browse(self.env.user.id).read(
            ['login', 'password'])[0]
        username = user['login']
        password = user['password']
        if not password:
            config_formula = '=oe_settings("%s";"%s")' % (url, dbname)
        else:
            config_formula = '=oe_settings("%s";"%s";"%s";"%s")' % (
                url, dbname, username, password)
        request = '''<feed xmlns="http://www.w3.org/2005/Atom"
      xmlns:batch="http://schemas.google.com/gdata/batch"
      xmlns:gs="http://schemas.google.com/spreadsheets/2006">
  <id>https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full</id>
  <entry>
    <batch:id>A1</batch:id>
    <batch:operation type="update"/>
    <id>https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full/R1C1</id>
    <link rel="edit" type="application/atom+xml"
      href="https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full/R1C1"/>
    <gs:cell row="1" col="1" inputValue="{formula}"/>
  </entry>
  <entry>
    <batch:id>A2</batch:id>
    <batch:operation type="update"/>
    <id>https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full/R60C15</id>
    <link rel="edit" type="application/atom+xml"
      href="https://spreadsheets.google.com/feeds/cells/{key}/od6/private/full/R60C15"/>
    <gs:cell row="60" col="15" inputValue="{config}"/>
  </entry>
</feed>'''.format(key=spreadsheet_key,
                  formula=misc.html_escape(formula),
                  config=misc.html_escape(config_formula))

        try:
            req = requests.post(
                'https://spreadsheets.google.com/feeds/cells/%s/od6/private/full/batch?%s'
                % (spreadsheet_key,
                   werkzeug.url_encode({
                       'v': 3,
                       'access_token': access_token
                   })),
                data=request,
                headers={
                    'content-type': 'application/atom+xml',
                    'If-Match': '*'
                },
                timeout=TIMEOUT,
            )
        except IOError:
            _logger.warning(
                "An error occured while writing the formula on the Google Spreadsheet."
            )

        description = '''
        formula: %s
        ''' % formula
        if attachment_id:
            self.env['ir.attachment'].browse(attachment_id).write(
                {'description': description})
        return True

Example #12

0

Show file

File: mail.py Project: marionumza/saas

def html_sanitize(src, silent=True, sanitize_tags=True, sanitize_attributes=False, sanitize_style=False, strip_style=False, strip_classes=False):
    if not src:
        return src
    src = ustr(src, errors='replace')
    # html: remove encoding attribute inside tags
    doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
    src = doctype.sub(u"", src)

    logger = logging.getLogger(__name__ + '.html_sanitize')

    # html encode mako tags <% ... %> to decode them later and keep them alive, otherwise they are stripped by the cleaner
    src = src.replace(u'<%', misc.html_escape(u'<%'))
    src = src.replace(u'%>', misc.html_escape(u'%>'))

    kwargs = {
        'page_structure': True,
        'style': strip_style,              # True = remove style tags/attrs
        'sanitize_style': sanitize_style,  # True = sanitize styling
        'forms': True,                     # True = remove form tags
        'remove_unknown_tags': False,
        'comments': False,
        'processing_instructions': False
    }
    if sanitize_tags:
        kwargs['allow_tags'] = allowed_tags
        if etree.LXML_VERSION >= (2, 3, 1):
            # kill_tags attribute has been added in version 2.3.1
            kwargs.update({
                'kill_tags': tags_to_kill,
                'remove_tags': tags_to_remove,
            })
        else:
            kwargs['remove_tags'] = tags_to_kill + tags_to_remove

    if sanitize_attributes and etree.LXML_VERSION >= (3, 1, 0):  # lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style"
        if strip_classes:
            current_safe_attrs = safe_attrs - frozenset(['class'])
        else:
            current_safe_attrs = safe_attrs
        kwargs.update({
            'safe_attrs_only': True,
            'safe_attrs': current_safe_attrs,
        })
    else:
        kwargs.update({
            'safe_attrs_only': False,  # keep oe-data attributes + style
            'strip_classes': strip_classes,  # remove classes, even when keeping other attributes
        })

    try:
        # some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
        cleaner = _Cleaner(**kwargs)
        cleaned = cleaner.clean_html(src)
        assert isinstance(cleaned, str)
        # MAKO compatibility: $, { and } inside quotes are escaped, preventing correct mako execution
        cleaned = cleaned.replace(u'%24', u'$')
        cleaned = cleaned.replace(u'%7B', u'{')
        cleaned = cleaned.replace(u'%7D', u'}')
        cleaned = cleaned.replace(u'%20', u' ')
        cleaned = cleaned.replace(u'%5B', u'[')
        cleaned = cleaned.replace(u'%5D', u']')
        cleaned = cleaned.replace(u'%7C', u'|')
        cleaned = cleaned.replace(u'&lt;%', u'<%')
        cleaned = cleaned.replace(u'%&gt;', u'%>')
        # html considerations so real html content match database value
        cleaned.replace(u'\xa0', u'&nbsp;')
    except etree.ParserError as e:
        if 'empty' in str(e):
            return u""
        if not silent:
            raise
        logger.warning(u'ParserError obtained when sanitizing %r', src, exc_info=True)
        cleaned = u'<p>ParserError when sanitizing</p>'
    except Exception:
        if not silent:
            raise
        logger.warning(u'unknown error obtained when sanitizing %r', src, exc_info=True)
        cleaned = u'<p>Unknown error when sanitizing</p>'

    # this is ugly, but lxml/etree tostring want to put everything in a 'div' that breaks the editor -> remove that
    if cleaned.startswith(u'<div>') and cleaned.endswith(u'</div>'):
        cleaned = cleaned[5:-6]

    return cleaned