def test_proxy_url_http_proto(self): url = "http://localhost/?bizz=iss" proxied = redirect.proxy_url(url) self.assertEqual("/click/?url=http%3A//localhost/%3Fbizz%3Diss", proxied) response = self.client.get(proxied, follow=True) self.assertEqual(len(response.redirect_chain), 1) self.assertEqual(response.redirect_chain[0][0], "http://localhost/?bizz=iss") self.assertEqual(response.redirect_chain[0][1], 302)
def test_proxy_url_no_proto_and_port(self): url = "localhost:8080" proxied = redirect.proxy_url(url) self.assertEqual("/click/?url=localhost%3A8080", proxied) response = self.client.get(proxied, follow=True) self.assertEqual(len(response.redirect_chain), 1) self.assertEqual(response.redirect_chain[0][0], "localhost:8080") self.assertEqual(response.redirect_chain[0][1], 302)
def test_utils(self): url = "/?bizz=iss" proxied = redirect.proxy_url(url) self.assertEqual("/click/?url=/%3Fbizz%3Diss", proxied) response = self.client.get(proxied, follow=True) self.assertEqual(len(response.redirect_chain), 1) self.assertEqual(response.redirect_chain[0][0], "/?bizz=iss") self.assertEqual(response.redirect_chain[0][1], 302)
def _clean_html_body(request, email, body, charset): """Clean up a html part as best we can Doesn't catch LXML errors """ html_tree = lxml_html.fromstring(body) # if the HTML doc says its a different encoding, use that for meta_tag in html_tree.xpath("/html/head/meta"): if meta_tag.get("http-equiv", None) == "Content-Type": try: content = meta_tag.attrib["content"] content = content.split(";", 1)[1] charset = dict(HEADER_PARAMS.findall(content))["charset"] break except (KeyError, IndexError): pass elif "charset" in meta_tag.attrib: charset = meta_tag.attrib["charset"] break try: # check there's a body for premailer if html_tree.find("body") is not None: html_tree = InboxenPremailer(html_tree).transform() except Exception as exc: # Yeah, a pretty wide catch, but Premailer likes to throw up everything and anything messages.info(request, _("Part of this message could not be parsed - it may not display correctly")) _log.warning("Failed to render CSS for %s: %s", email["eid"], exc) # Mail Pile uses this, give back if you come up with something better cleaner = Cleaner( allow_tags=HTML_ALLOW_TAGS, kill_tags=["style"], # remove style tags, not attrs remove_unknown_tags=False, safe_attrs=HTML_SAFE_ATTRS, safe_attrs_only=True, style=False, # keep style attrs ) html_tree = cleaner.clean_html(html_tree) # filter images if we need to if not email["display_images"]: for img in html_tree.xpath("//img"): try: # try to delete src first - we don't want to add a src where there wasn't one already del img.attrib["src"] # replace image with 1px png img.attrib["src"] = staticfiles_storage.url("imgs/placeholder.svg") email["has_images"] = True except KeyError: pass for link in html_tree.xpath("//a"): try: # proxy link url = link.attrib["href"] link.attrib["href"] = proxy_url(url) except KeyError: pass # open link in tab link.attrib["target"] = "_blank" # and prevent window.opener bug (noopener is only supported in newer # browsers, plus we already set noreferrer in the head) link.attrib["rel"] = "noreferrer" # finally, export to unicode body = unicode_damnit(etree.tostring(html_tree, method="html"), charset) return safestring.mark_safe(body)
def test_proxy_url_wrong_proto(self): url = "mailto:[email protected]" proxied = redirect.proxy_url(url) # url should have not changed self.assertEqual("mailto:[email protected]", proxied)
def _clean_html_body(request, email, body, charset): """Clean up a html part as best we can Doesn't catch LXML errors """ html_tree = lxml_html.fromstring(body) # if the HTML doc says its a different encoding, use that for meta_tag in html_tree.xpath("/html/head/meta"): if meta_tag.get("http-equiv", None) == "Content-Type": content = meta_tag.get("content") try: content = content.split(";", 1)[1] charset = dict(HEADER_PARAMS.findall(content))["charset"] break except (KeyError, IndexError): pass elif meta_tag.get("charset", None): charset = meta_tag.get("charset") break try: # check there's a body and header for premailer if html_tree.find("body"): html_tree = InboxenPremailer(html_tree).transform() except Exception as exc: # Yeah, a pretty wide catch, but Premailer likes to throw up everything and anything messages.info(request, _("Part of this message could not be parsed - it may not display correctly")) msg = "Failed to render CSS: %s" % exc _log.exception(msg, extra={"request": request}) # Mail Pile uses this, give back if you come up with something better cleaner = Cleaner( allow_tags=HTML_ALLOW_TAGS, kill_tags = ["style"], # remove style tags, not attrs remove_unknown_tags=False, safe_attrs=HTML_SAFE_ATTRS, safe_attrs_only=True, style=False, # keep style attrs ) html_tree = cleaner.clean_html(html_tree) # filter images if we need to if not email["display_images"]: for img in html_tree.xpath("//img"): try: # try to delete src first - we don't want to add a src where there wasn't one already del img.attrib["src"] # replace image with 1px png img.attrib["src"] = staticfiles_storage.url("imgs/placeholder.svg") email["has_images"] = True except KeyError: pass for link in html_tree.xpath("//a"): try: # proxy link url = link.attrib["href"] link.attrib["href"] = proxy_url(url) except KeyError: pass # finally, export to unicode body = _unicode_damnit(etree.tostring(html_tree), charset) return safestring.mark_safe(body)