def test_unwrap_html_body(self): html = """ <html> <head></head> <body>Body</body> </html> """ body = '<div>Body</div>' self.assertEquals(body, utils.unwrap_html_body(html)) html = """ <html> <body style="color: #666; font-size: 12px;">Body</body> </html> """ body ='<div class="mailBody" style="color: #666; font-size: 12px;">'\ 'Body</div>' self.assertEquals(body, utils.unwrap_html_body(html, 'mailBody')) html = '<p>Body</p>' body = '<div class="mailBody"><p>Body</p></div>' self.assertEquals(body, utils.unwrap_html_body(html, 'mailBody'))
def test_unwrap_html_body_encoding(self): # the html body may contain a charset header # we always get an utf8-encoded body, thus we must ignore the charset html = """ <html> <head> <meta http-equiv=Content-Type content="text/html; charset=iso-8859-1"> </head> <body>Äöü</body> """ self.assertEquals('<div>Äöü</div>', utils.unwrap_html_body(html)) # BeautifulSoup does some weird encoding guessing. # For the snippet above it guesses utf-8, but if the body # only contains a single ä Umlaut, it seems to guess latin1. # Check we still get utf-8 back. html = """ <html> <body>ä</body> """ self.assertEquals('<div>ä</div>', utils.unwrap_html_body(html))
def html_safe_body(self): """Converts the mail body to a html safe variant by using the following transforms: - the premailer css parser. - The `safe_html` PortalTranforms """ context = aq_inner(self.context) parts = utils.get_body(self.msg(), context.absolute_url()) if not parts: return '' result = [] for body in parts: body = self.rewrite_css_styles(body) body = utils.fix_broken_meta_tags(body) body = utils.unwrap_html_body(body, 'mailBody-part') body = self.transfrom_safe_html(body) result.append(body) return '\n'.join(result)