def test_unescape_works(self):
     # html_unescape undoes the 5 entity transformations performed by
     # html_escape.
     self.assertEqual('&<>"\'', html_unescape('&amp;&lt;&gt;&quot;&#x27;'))
    def _linkify_substitution(match):
        if match.group("bug") is not None:
            return FormattersAPI._linkify_bug_number(match.group("bug"), match.group("bugnum"))
        elif match.group("url") is not None:
            # The text will already have been cgi escaped.  We temporarily
            # unescape it so that we can strip common trailing characters
            # that aren't part of the URL.
            full_url = match.group("url")
            url, trailers = FormattersAPI._split_url_and_trailers(html_unescape(full_url))
            # We use nofollow for these links to reduce the value of
            # adding spam URLs to our comments; it's a way of moderately
            # devaluing the return on effort for spammers that consider
            # using Launchpad.
            # The use of structured() in the argument here is a bit
            # evil. Ideally add_word_breaks would return one itself.
            if not FormattersAPI._linkify_url_should_be_ignored(url):
                return structured(
                    '<a rel="nofollow" ' 'href="%(url)s">%(linked_text)s</a>%(trailers)s',
                    url=url,
                    linked_text=structured(add_word_breaks(html_escape(url))),
                    trailers=trailers,
                ).escapedtext
            else:
                return full_url
        elif match.group("faq") is not None:
            # This is *BAD*.  We shouldn't be doing database lookups to
            # linkify text.
            text = match.group("faq")
            faqnum = match.group("faqnum")
            faqset = getUtility(IFAQSet)
            faq = faqset.getFAQ(faqnum)
            if not faq:
                return text
            url = canonical_url(faq)
            return '<a href="%s">%s</a>' % (url, text)
        elif match.group("oops") is not None:
            text = match.group("oops")

            if not getUtility(ILaunchBag).developer:
                return text

            root_url = config.launchpad.oops_root_url
            url = root_url + "OOPS-" + match.group("oopscode")
            return '<a href="%s">%s</a>' % (url, text)
        elif match.group("lpbranchurl") is not None:
            lp_url = match.group("lpbranchurl")
            path = match.group("branch")
            lp_url, trailers = FormattersAPI._split_url_and_trailers(html_unescape(lp_url))
            path, trailers = FormattersAPI._split_url_and_trailers(html_unescape(path))
            if path.isdigit():
                return FormattersAPI._linkify_bug_number(lp_url, path, trailers)
            url = "/+branch/%s" % path
            # Mark the links with a 'branch-short-link' class so they can be
            # harvested and validated when the page is rendered.
            return structured('<a href="%s" class="branch-short-link">%s</a>%s', url, lp_url, trailers).escapedtext
        elif match.group("clbug") is not None:
            # 'clbug' matches Ubuntu changelog format bugs. 'bugnumbers' is
            # all of the bug numbers, that look something like "#1234, #434".
            # 'leader' is the 'LP: ' bit at the beginning.
            bug_parts = []
            # Split the bug numbers into multiple bugs.
            splitted = re.split("(,(?:\s|<br\s*/>)+)", match.group("bugnumbers")) + [""]
            for bug_id, spacer in zip(splitted[::2], splitted[1::2]):
                bug_parts.append(FormattersAPI._linkify_bug_number(bug_id, bug_id.lstrip("#")))
                bug_parts.append(spacer)
            return match.group("leader") + "".join(bug_parts)
        else:
            raise AssertionError("Unknown pattern matched.")
Exemple #3
0
    def _linkify_substitution(match):
        if match.group('bug') is not None:
            return FormattersAPI._linkify_bug_number(match.group('bug'),
                                                     match.group('bugnum'))
        elif match.group('url') is not None:
            # The text will already have been cgi escaped.  We temporarily
            # unescape it so that we can strip common trailing characters
            # that aren't part of the URL.
            full_url = match.group('url')
            url, trailers = FormattersAPI._split_url_and_trailers(
                html_unescape(full_url))
            # We use nofollow for these links to reduce the value of
            # adding spam URLs to our comments; it's a way of moderately
            # devaluing the return on effort for spammers that consider
            # using Launchpad.
            # The use of structured() in the argument here is a bit
            # evil. Ideally add_word_breaks would return one itself.
            if not FormattersAPI._linkify_url_should_be_ignored(url):
                return structured(
                    '<a rel="nofollow" '
                    'href="%(url)s">%(linked_text)s</a>%(trailers)s',
                    url=url,
                    linked_text=structured(add_word_breaks(html_escape(url))),
                    trailers=trailers).escapedtext
            else:
                return full_url
        elif match.group('faq') is not None:
            # This is *BAD*.  We shouldn't be doing database lookups to
            # linkify text.
            text = match.group('faq')
            faqnum = match.group('faqnum')
            faqset = getUtility(IFAQSet)
            faq = faqset.getFAQ(faqnum)
            if not faq:
                return text
            url = canonical_url(faq)
            return '<a href="%s">%s</a>' % (url, text)
        elif match.group('oops') is not None:
            text = match.group('oops')

            if not getUtility(ILaunchBag).developer:
                return text

            root_url = config.launchpad.oops_root_url
            url = root_url + "OOPS-" + match.group('oopscode')
            return '<a href="%s">%s</a>' % (url, text)
        elif match.group('lpbranchurl') is not None:
            lp_url = match.group('lpbranchurl')
            path = match.group('branch')
            lp_url, trailers = FormattersAPI._split_url_and_trailers(
                html_unescape(lp_url))
            path, trailers = FormattersAPI._split_url_and_trailers(
                html_unescape(path))
            if path.isdigit():
                return FormattersAPI._linkify_bug_number(
                    lp_url, path, trailers)
            url = '/+branch/%s' % path
            # Mark the links with a 'branch-short-link' class so they can be
            # harvested and validated when the page is rendered.
            return structured(
                '<a href="%s" class="branch-short-link">%s</a>%s', url, lp_url,
                trailers).escapedtext
        elif match.group("clbug") is not None:
            # 'clbug' matches Ubuntu changelog format bugs. 'bugnumbers' is
            # all of the bug numbers, that look something like "#1234, #434".
            # 'leader' is the 'LP: ' bit at the beginning.
            bug_parts = []
            # Split the bug numbers into multiple bugs.
            splitted = re.split("(,(?:\s|<br\s*/>)+)",
                                match.group("bugnumbers")) + [""]
            for bug_id, spacer in zip(splitted[::2], splitted[1::2]):
                bug_parts.append(
                    FormattersAPI._linkify_bug_number(bug_id,
                                                      bug_id.lstrip("#")))
                bug_parts.append(spacer)
            return match.group("leader") + "".join(bug_parts)
        else:
            raise AssertionError("Unknown pattern matched.")
 def test_unescape_works(self):
     # html_unescape undoes the 5 entity transformations performed by
     # html_escape.
     self.assertEqual('&<>"\'', html_unescape('&amp;&lt;&gt;&quot;&#x27;'))