Beispiel #1
0
    def test_proxy_url_no_proto_and_port(self):
        url = "localhost:8080"
        proxied = redirect.proxy_url(url)

        self.assertEqual("/click/?url=localhost%3A8080", proxied)

        response = self.client.get(proxied, follow=True)

        self.assertEqual(len(response.redirect_chain), 1)
        self.assertEqual(response.redirect_chain[0][0], "localhost:8080")
        self.assertEqual(response.redirect_chain[0][1], 302)
Beispiel #2
0
    def test_proxy_url_no_proto(self):
        url = "/?bizz=iss"
        proxied = redirect.proxy_url(url)

        self.assertEqual("/click/?url=/%3Fbizz%3Diss", proxied)

        response = self.client.get(proxied, follow=True)

        self.assertEqual(len(response.redirect_chain), 1)
        self.assertEqual(response.redirect_chain[0][0], "/?bizz=iss")
        self.assertEqual(response.redirect_chain[0][1], 302)
Beispiel #3
0
    def test_proxy_url_http_proto(self):
        url = "http://localhost/?bizz=iss"
        proxied = redirect.proxy_url(url)

        self.assertEqual("/click/?url=http%3A//localhost/%3Fbizz%3Diss", proxied)

        response = self.client.get(proxied, follow=True)

        self.assertEqual(len(response.redirect_chain), 1)
        self.assertEqual(response.redirect_chain[0][0], "http://localhost/?bizz=iss")
        self.assertEqual(response.redirect_chain[0][1], 302)
Beispiel #4
0
    def test_proxy_url_no_proto_and_port(self):
        url = "localhost:8080"
        proxied = redirect.proxy_url(url)

        self.assertEqual("/click/?url=localhost%3A8080", proxied)

        response = self.client.get(proxied, follow=True)

        self.assertEqual(len(response.redirect_chain), 1)
        self.assertEqual(response.redirect_chain[0][0], "localhost:8080")
        self.assertEqual(response.redirect_chain[0][1], 302)
Beispiel #5
0
def _clean_html_body(request, email, body, charset):
    """Clean up a html part as best we can

    Doesn't catch LXML errors
    """
    html_tree = lxml_html.fromstring(body)

    # if the HTML doc says its a different encoding, use that
    for meta_tag in html_tree.xpath("/html/head/meta"):
        if meta_tag.get("http-equiv", None) == "Content-Type":
            try:
                content = meta_tag.attrib["content"]
                content = content.split(";", 1)[1]
                charset = dict(HEADER_PARAMS.findall(content))["charset"]
                break
            except (KeyError, IndexError):
                pass
        elif "charset" in meta_tag.attrib:
            charset = meta_tag.attrib["charset"]
            break

    try:
        # check there's a body for premailer
        if html_tree.find("body") is not None:
            html_tree = InboxenPremailer(html_tree).transform()
    except Exception as exc:
        # Yeah, a pretty wide catch, but Premailer likes to throw up everything and anything
        messages.info(
            request,
            _("Part of this message could not be parsed - it may not display correctly"
              ))
        _log.warning("Failed to render CSS for %s: %s", email["eid"], exc)

    # Mail Pile uses this, give back if you come up with something better
    cleaner = Cleaner(
        allow_tags=HTML_ALLOW_TAGS,
        kill_tags=["style"],  # remove style tags, not attrs
        remove_unknown_tags=False,
        safe_attrs=HTML_SAFE_ATTRS,
        safe_attrs_only=True,
        style=False,  # keep style attrs
    )

    html_tree = cleaner.clean_html(html_tree)

    # filter images if we need to
    if not email["display_images"]:
        for img in html_tree.xpath("//img"):
            try:
                # try to delete src first - we don't want to add a src where there wasn't one already
                del img.attrib["src"]
                # replace image with 1px png
                img.attrib["src"] = staticfiles_storage.url(
                    "imgs/placeholder.svg")
                email["has_images"] = True
            except KeyError:
                pass

    for link in html_tree.xpath("//a"):
        try:
            # proxy link
            url = link.attrib["href"]
            link.attrib["href"] = proxy_url(url)
        except KeyError:
            pass

        # open link in tab
        link.attrib["target"] = "_blank"
        # and prevent window.opener bug (noopener is only supported in newer
        # browsers, plus we already set noreferrer in the head)
        link.attrib["rel"] = "noreferrer"

    # finally, export to unicode
    body = unicode_damnit(etree.tostring(html_tree, method="html"), charset)
    return safestring.mark_safe(body)
Beispiel #6
0
    def test_proxy_url_wrong_proto(self):
        url = "mailto:[email protected]"
        proxied = redirect.proxy_url(url)

        # url should have not changed
        self.assertEqual("mailto:[email protected]", proxied)
Beispiel #7
0
def _clean_html_body(request, email, body, charset):
    """Clean up a html part as best we can

    Doesn't catch LXML errors
    """
    html_tree = lxml_html.fromstring(body)

    # if the HTML doc says its a different encoding, use that
    for meta_tag in html_tree.xpath("/html/head/meta"):
        if meta_tag.get("http-equiv", None) == "Content-Type":
            try:
                content = meta_tag.attrib["content"]
                content = content.split(";", 1)[1]
                charset = dict(HEADER_PARAMS.findall(content))["charset"]
                break
            except (KeyError, IndexError):
                pass
        elif "charset" in meta_tag.attrib:
            charset = meta_tag.attrib["charset"]
            break

    try:
        # check there's a body for premailer
        if html_tree.find("body") is not None:
            html_tree = InboxenPremailer(html_tree).transform()
    except Exception as exc:
        # Yeah, a pretty wide catch, but Premailer likes to throw up everything and anything
        messages.info(request, _("Part of this message could not be parsed - it may not display correctly"))
        _log.warning("Failed to render CSS for %s: %s", email["eid"], exc)

    # Mail Pile uses this, give back if you come up with something better
    cleaner = Cleaner(
        allow_tags=HTML_ALLOW_TAGS,
        kill_tags=["style"],  # remove style tags, not attrs
        remove_unknown_tags=False,
        safe_attrs=HTML_SAFE_ATTRS,
        safe_attrs_only=True,
        style=False,  # keep style attrs
    )

    html_tree = cleaner.clean_html(html_tree)

    # filter images if we need to
    if not email["display_images"]:
        for img in html_tree.xpath("//img"):
            try:
                # try to delete src first - we don't want to add a src where there wasn't one already
                del img.attrib["src"]
                # replace image with 1px png
                img.attrib["src"] = staticfiles_storage.url("imgs/placeholder.svg")
                email["has_images"] = True
            except KeyError:
                pass

    for link in html_tree.xpath("//a"):
        try:
            # proxy link
            url = link.attrib["href"]
            link.attrib["href"] = proxy_url(url)
        except KeyError:
            pass

        # open link in tab
        link.attrib["target"] = "_blank"
        # and prevent window.opener bug (noopener is only supported in newer
        # browsers, plus we already set noreferrer in the head)
        link.attrib["rel"] = "noreferrer"

    # finally, export to unicode
    body = unicode_damnit(etree.tostring(html_tree, method="html"), charset)
    return safestring.mark_safe(body)
Beispiel #8
0
    def test_proxy_url_wrong_proto(self):
        url = "mailto:[email protected]"
        proxied = redirect.proxy_url(url)

        # url should have not changed
        self.assertEqual("mailto:[email protected]", proxied)