def parse_links(markup): """Parse to add target, nofollow, and include missing TLDs""" # Add missing top-level domains to linkfify. # Workaround from https://github.com/mozilla/bleach/issues/519 tlds = linkifier.TLDS tlds.append(u'chat') tlds.append(u'cloud') tlds.append(u'community') tlds.append(u'fund') tlds.append(u'to') tlds.append(u'today') improved_url_re = build_url_re(tlds=tlds) def set_target(attrs, new=False): p = urllib.parse.urlparse(attrs[(None, 'href')]) # TODO: get URL from request to figure out if it's an internal link. if p.netloc not in ['blender.community']: attrs[(None, 'target')] = '_blank' attrs[(None, 'rel')] = 'nofollow' attrs[(None, 'class')] = 'is-external' return attrs skip_tags = ['code', 'pre'] linker = linkifier.Linker(url_re=improved_url_re, callbacks=[set_target], skip_tags=skip_tags) markup = linker.linkify(markup) # markup = parse_phabricator_tasks(markup) markup = clean_url_path(markup) return markup
def test_custom_url_re(self): url_re = build_url_re(["example"]) expected = '<p><a href="https://domain.example" rel="nofollow">https://domain.example</a></p>' actual = markdown( "https://domain.example", extensions=[LinkifyExtension(linker_options={"url_re": url_re})], ) self.assertEqual(expected, actual)
ALLOWED_ATTRIBUTES = { 'a': ['href', 'title', 'class'], 'abbr': ['title'], 'acronym': ['title'], 'table': ['width'], 'td': ['width', 'align'], 'div': ['class'], 'p': ['class'], 'span': ['class', 'title'], # Update doc/user/markdown.rst if you change this! } ALLOWED_PROTOCOLS = ['http', 'https', 'mailto', 'tel'] URL_RE = build_url_re(tlds=sorted(tld_set, key=len, reverse=True)) EMAIL_RE = build_email_re(tlds=sorted(tld_set, key=len, reverse=True)) def safelink_callback(attrs, new=False): """ Makes sure that all links to a different domain are passed through a redirection handler to ensure there's no passing of referers with secrets inside them. """ url = attrs.get((None, 'href'), '/') if not url_has_allowed_host_and_scheme(url, allowed_hosts=None) and not url.startswith('mailto:') and not url.startswith('tel:'): signer = signing.Signer(salt='safe-redirect') attrs[None, 'href'] = reverse('redirect') + '?url=' + urllib.parse.quote(signer.sign(url)) attrs[None, 'target'] = '_blank' attrs[None, 'rel'] = 'noopener'
ALLOWED_ATTRIBUTES = { 'a': ['href', 'title', 'class'], 'abbr': ['title'], 'acronym': ['title'], 'table': ['width'], 'td': ['width', 'align'], 'div': ['class'], 'p': ['class'], 'span': ['class', 'title'], # Update doc/user/markdown.rst if you change this! } ALLOWED_PROTOCOLS = ['http', 'https', 'mailto', 'tel'] URL_RE = SimpleLazyObject( lambda: build_url_re(tlds=sorted(tld_set, key=len, reverse=True))) EMAIL_RE = SimpleLazyObject( lambda: build_email_re(tlds=sorted(tld_set, key=len, reverse=True))) def safelink_callback(attrs, new=False): """ Makes sure that all links to a different domain are passed through a redirection handler to ensure there's no passing of referers with secrets inside them. """ url = attrs.get((None, 'href'), '/') if not url_has_allowed_host_and_scheme( url, allowed_hosts=None) and not url.startswith( 'mailto:') and not url.startswith('tel:'): signer = signing.Signer(salt='safe-redirect')
print(linker.linkify('abc models.py def')) # abc models.py def print('\n' + '-' * 100 + '\n') linker = Linker(skip_tags=['pre']) print(linker.linkify('a b c http://example.com d e f')) # a b c <a href="http://example.com" rel="nofollow">http://example.com</a> d e f print(linker.linkify('<pre>http://example.com</pre>')) # <pre>http://example.com</pre> print('\n' + '-' * 100 + '\n') only_fish_tld_url_re = build_url_re(tlds=['fish']) linker = Linker(url_re=only_fish_tld_url_re) print(linker.linkify('com TLD does not link https://example.com')) # com TLD does not link https://example.com print(linker.linkify('fish TLD links https://example.fish')) # fish TLD links <a href="https://example.fish" rel="nofollow">https://example.fish</a> print('\n' + '-' * 100 + '\n') only_https_url_re = build_url_re(protocols=['https']) linker = Linker(url_re=only_https_url_re) print(linker.linkify('gopher does not link gopher://example.link')) # gopher does not link gopher://example.link
ALLOWED_ATTRIBUTES = { 'a': ['href', 'title', 'class'], 'abbr': ['title'], 'acronym': ['title'], 'table': ['width'], 'td': ['width', 'align'], 'div': ['class'], 'p': ['class'], 'span': ['class', 'title'], # Update doc/user/markdown.rst if you change this! } ALLOWED_PROTOCOLS = ['http', 'https', 'mailto', 'tel'] URL_RE = build_url_re(tlds=tld_set) EMAIL_RE = build_email_re(tlds=tld_set) def safelink_callback(attrs, new=False): url = attrs.get((None, 'href'), '/') if not url_has_allowed_host_and_scheme( url, allowed_hosts=None) and not url.startswith( 'mailto:') and not url.startswith('tel:'): signer = signing.Signer(salt='safe-redirect') attrs[None, 'href'] = reverse('redirect') + '?url=' + urllib.parse.quote( signer.sign(url)) attrs[None, 'target'] = '_blank' attrs[None, 'rel'] = 'noopener'