def test_unescape_works(self): # html_unescape undoes the 5 entity transformations performed by # html_escape. self.assertEqual('&<>"\'', html_unescape('&<>"''))
def _linkify_substitution(match): if match.group("bug") is not None: return FormattersAPI._linkify_bug_number(match.group("bug"), match.group("bugnum")) elif match.group("url") is not None: # The text will already have been cgi escaped. We temporarily # unescape it so that we can strip common trailing characters # that aren't part of the URL. full_url = match.group("url") url, trailers = FormattersAPI._split_url_and_trailers(html_unescape(full_url)) # We use nofollow for these links to reduce the value of # adding spam URLs to our comments; it's a way of moderately # devaluing the return on effort for spammers that consider # using Launchpad. # The use of structured() in the argument here is a bit # evil. Ideally add_word_breaks would return one itself. if not FormattersAPI._linkify_url_should_be_ignored(url): return structured( '<a rel="nofollow" ' 'href="%(url)s">%(linked_text)s</a>%(trailers)s', url=url, linked_text=structured(add_word_breaks(html_escape(url))), trailers=trailers, ).escapedtext else: return full_url elif match.group("faq") is not None: # This is *BAD*. We shouldn't be doing database lookups to # linkify text. text = match.group("faq") faqnum = match.group("faqnum") faqset = getUtility(IFAQSet) faq = faqset.getFAQ(faqnum) if not faq: return text url = canonical_url(faq) return '<a href="%s">%s</a>' % (url, text) elif match.group("oops") is not None: text = match.group("oops") if not getUtility(ILaunchBag).developer: return text root_url = config.launchpad.oops_root_url url = root_url + "OOPS-" + match.group("oopscode") return '<a href="%s">%s</a>' % (url, text) elif match.group("lpbranchurl") is not None: lp_url = match.group("lpbranchurl") path = match.group("branch") lp_url, trailers = FormattersAPI._split_url_and_trailers(html_unescape(lp_url)) path, trailers = FormattersAPI._split_url_and_trailers(html_unescape(path)) if path.isdigit(): return FormattersAPI._linkify_bug_number(lp_url, path, trailers) url = "/+branch/%s" % path # Mark the links with a 'branch-short-link' class so they can be # harvested and validated when the page is rendered. return structured('<a href="%s" class="branch-short-link">%s</a>%s', url, lp_url, trailers).escapedtext elif match.group("clbug") is not None: # 'clbug' matches Ubuntu changelog format bugs. 'bugnumbers' is # all of the bug numbers, that look something like "#1234, #434". # 'leader' is the 'LP: ' bit at the beginning. bug_parts = [] # Split the bug numbers into multiple bugs. splitted = re.split("(,(?:\s|<br\s*/>)+)", match.group("bugnumbers")) + [""] for bug_id, spacer in zip(splitted[::2], splitted[1::2]): bug_parts.append(FormattersAPI._linkify_bug_number(bug_id, bug_id.lstrip("#"))) bug_parts.append(spacer) return match.group("leader") + "".join(bug_parts) else: raise AssertionError("Unknown pattern matched.")
def _linkify_substitution(match): if match.group('bug') is not None: return FormattersAPI._linkify_bug_number(match.group('bug'), match.group('bugnum')) elif match.group('url') is not None: # The text will already have been cgi escaped. We temporarily # unescape it so that we can strip common trailing characters # that aren't part of the URL. full_url = match.group('url') url, trailers = FormattersAPI._split_url_and_trailers( html_unescape(full_url)) # We use nofollow for these links to reduce the value of # adding spam URLs to our comments; it's a way of moderately # devaluing the return on effort for spammers that consider # using Launchpad. # The use of structured() in the argument here is a bit # evil. Ideally add_word_breaks would return one itself. if not FormattersAPI._linkify_url_should_be_ignored(url): return structured( '<a rel="nofollow" ' 'href="%(url)s">%(linked_text)s</a>%(trailers)s', url=url, linked_text=structured(add_word_breaks(html_escape(url))), trailers=trailers).escapedtext else: return full_url elif match.group('faq') is not None: # This is *BAD*. We shouldn't be doing database lookups to # linkify text. text = match.group('faq') faqnum = match.group('faqnum') faqset = getUtility(IFAQSet) faq = faqset.getFAQ(faqnum) if not faq: return text url = canonical_url(faq) return '<a href="%s">%s</a>' % (url, text) elif match.group('oops') is not None: text = match.group('oops') if not getUtility(ILaunchBag).developer: return text root_url = config.launchpad.oops_root_url url = root_url + "OOPS-" + match.group('oopscode') return '<a href="%s">%s</a>' % (url, text) elif match.group('lpbranchurl') is not None: lp_url = match.group('lpbranchurl') path = match.group('branch') lp_url, trailers = FormattersAPI._split_url_and_trailers( html_unescape(lp_url)) path, trailers = FormattersAPI._split_url_and_trailers( html_unescape(path)) if path.isdigit(): return FormattersAPI._linkify_bug_number( lp_url, path, trailers) url = '/+branch/%s' % path # Mark the links with a 'branch-short-link' class so they can be # harvested and validated when the page is rendered. return structured( '<a href="%s" class="branch-short-link">%s</a>%s', url, lp_url, trailers).escapedtext elif match.group("clbug") is not None: # 'clbug' matches Ubuntu changelog format bugs. 'bugnumbers' is # all of the bug numbers, that look something like "#1234, #434". # 'leader' is the 'LP: ' bit at the beginning. bug_parts = [] # Split the bug numbers into multiple bugs. splitted = re.split("(,(?:\s|<br\s*/>)+)", match.group("bugnumbers")) + [""] for bug_id, spacer in zip(splitted[::2], splitted[1::2]): bug_parts.append( FormattersAPI._linkify_bug_number(bug_id, bug_id.lstrip("#"))) bug_parts.append(spacer) return match.group("leader") + "".join(bug_parts) else: raise AssertionError("Unknown pattern matched.")