def test_named_arguments(): ATTRS = {'a': ['rel', 'href']} s = ('<a href="http://xx.com" rel="alternate">xx.com</a>', '<a rel="alternate" href="http://xx.com">xx.com</a>') eq_('<a href="http://xx.com">xx.com</a>', bleach.clean(s[0])) in_(s, bleach.clean(s[0], attributes=ATTRS))
def test_unsafe_url(): """Any unsafe char ({}[]<>, etc.) in the path should end URL scanning.""" in_(('All your{"<a href="http://xx.yy.com/grover.png" ' 'rel="nofollow">xx.yy.com/grover.png</a>"}base are', 'All your{"<a rel="nofollow" href="http://xx.yy.com/grover.png"' '>xx.yy.com/grover.png</a>"}base are'), linkify('All your{"xx.yy.com/grover.png"}base are'))
def test_links_case_insensitive(): """Protocols and domain names are case insensitive.""" expect = ('<a href="HTTP://EXAMPLE.COM" rel="nofollow">' 'HTTP://EXAMPLE.COM</a>', '<a rel="nofollow" href="HTTP://EXAMPLE.COM">' 'HTTP://EXAMPLE.COM</a>') in_(expect, linkify('HTTP://EXAMPLE.COM'))
def test_link_http_complete(): in_(('<a href="https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d' '&e#f" rel="nofollow">' 'https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>', '<a rel="nofollow" href="https://*****:*****@ftp.mozilla.org/x/' 'y.exe?a=b&c=d&e#f">' 'https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f</a>'), linkify('https://*****:*****@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f'))
def test_elements_inside_links(): in_(('<a href="#" rel="nofollow">hello<br></a>', '<a rel="nofollow" href="#">hello<br></a>'), linkify('<a href="#">hello<br></a>')) in_(('<a href="#" rel="nofollow"><strong>bold</strong> hello<br></a>', '<a rel="nofollow" href="#"><strong>bold</strong> hello<br></a>'), linkify('<a href="#"><strong>bold</strong> hello<br></a>'))
def test_set_attrs(): """We can set random attributes on links.""" def set_attr(attrs, new=False): attrs['rev'] = 'canonical' return attrs in_(('<a href="http://ex.mp" rev="canonical">ex.mp</a>', '<a rev="canonical" href="http://ex.mp">ex.mp</a>'), linkify('ex.mp', [set_attr]))
def test_rel_already_there(): """Make sure rel attribute is updated not replaced""" linked = ('Click <a href="http://example.com" rel="tooltip">' 'here</a>.') link_good = (('Click <a href="http://example.com" rel="tooltip nofollow">' 'here</a>.'), ('Click <a rel="tooltip nofollow" href="http://example.com">' 'here</a>.')) in_(link_good, bleach.linkify(linked)) in_(link_good, bleach.linkify(link_good[0]))
def test_link_emails_and_urls(): """parse_email=True shouldn't prevent URLs from getting linkified.""" output = ('<a href="http://example.com" rel="nofollow">' 'http://example.com</a> <a href="mailto:[email protected]">' '[email protected]</a>', '<a rel="nofollow" href="http://example.com">' 'http://example.com</a> <a href="mailto:[email protected]">' '[email protected]</a>') in_(output, linkify('http://example.com [email protected]', parse_email=True))
def test_link_in_html(): in_(('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>', '<i><a rel="nofollow" href="http://yy.com">http://yy.com</a></i>'), linkify('<i>http://yy.com</i>')) in_(('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com' '</a></strong></em>', '<em><strong><a rel="nofollow" href="http://xx.com">http://xx.com' '</a></strong></em>'), linkify('<em><strong>http://xx.com</strong></em>'))
def test_mangle_link(): """We can muck with the href attribute of the link.""" def filter_url(attrs, new=False): attrs['href'] = 'http://bouncer/?u={0!s}'.format(quote_plus(attrs['href'])) return attrs in_(('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">' 'http://example.com</a>', '<a rel="nofollow" href="http://bouncer/?u=http%3A%2F%2Fexample.com">' 'http://example.com</a>'), linkify('http://example.com', DC + [filter_url]))
def test_wildcard_attributes(): ATTR = { '*': ['id'], 'img': ['src'], } TAG = ['img', 'em'] dirty = ('both <em id="foo" style="color: black">can</em> have ' '<img id="bar" src="foo"/>') clean = ('both <em id="foo">can</em> have <img src="foo" id="bar">', 'both <em id="foo">can</em> have <img id="bar" src="foo">') in_(clean, bleach.clean(dirty, tags=TAG, attributes=ATTR))
def test_mangle_link(): """We can muck with the href attribute of the link.""" def filter_url(attrs, new=False): attrs['href'] = 'http://bouncer/?u={0!s}'.format( quote_plus(attrs['href'])) return attrs in_(('<a href="http://bouncer/?u=http%3A%2F%2Fexample.com" rel="nofollow">' 'http://example.com</a>', '<a rel="nofollow" href="http://bouncer/?u=http%3A%2F%2Fexample.com">' 'http://example.com</a>'), linkify('http://example.com', DC + [filter_url]))
def test_tlds(): in_(('<a href="http://example.com" rel="nofollow">example.com</a>', '<a rel="nofollow" href="http://example.com">example.com</a>'), linkify('example.com')) in_(('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>', '<a rel="nofollow" href="http://example.co.uk">example.co.uk</a>'), linkify('example.co.uk')) in_(('<a href="http://example.edu" rel="nofollow">example.edu</a>', '<a rel="nofollow" href="http://example.edu">example.edu</a>'), linkify('example.edu')) eq_('example.xxx', linkify('example.xxx')) eq_(' brie', linkify(' brie')) in_(('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>', '<a rel="nofollow" href="http://bit.ly/fun">bit.ly/fun</a>'), linkify('bit.ly/fun'))
def test_simple_link(): in_(('a <a href="http://example.com" rel="nofollow">http://example.com' '</a> link', 'a <a rel="nofollow" href="http://example.com">http://example.com' '</a> link'), linkify('a http://example.com link')) in_(('a <a href="https://example.com" rel="nofollow">https://example.com' '</a> link', 'a <a rel="nofollow" href="https://example.com">https://example.com' '</a> link'), linkify('a https://example.com link')) in_(('a <a href="http://example.com" rel="nofollow">example.com</a> link', 'a <a rel="nofollow" href="http://example.com">example.com</a> link'), linkify('a example.com link'))
def test_link_query(): in_(('<a href="http://xx.com/?test=win" rel="nofollow">' 'http://xx.com/?test=win</a>', '<a rel="nofollow" href="http://xx.com/?test=win">' 'http://xx.com/?test=win</a>'), linkify('http://xx.com/?test=win')) in_(('<a href="http://xx.com/?test=win" rel="nofollow">' 'xx.com/?test=win</a>', '<a rel="nofollow" href="http://xx.com/?test=win">' 'xx.com/?test=win</a>'), linkify('xx.com/?test=win')) in_(('<a href="http://xx.com?test=win" rel="nofollow">' 'xx.com?test=win</a>', '<a rel="nofollow" href="http://xx.com?test=win">' 'xx.com?test=win</a>'), linkify('xx.com?test=win'))
def test_trailing_slash(): in_(('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>', '<a rel="nofollow" href="http://examp.com/">http://examp.com/</a>'), linkify('http://examp.com/')) in_(('<a href="http://example.com/foo/" rel="nofollow">' 'http://example.com/foo/</a>', '<a rel="nofollow" href="http://example.com/foo/">' 'http://example.com/foo/</a>'), linkify('http://example.com/foo/')) in_(('<a href="http://example.com/foo/bar/" rel="nofollow">' 'http://example.com/foo/bar/</a>', '<a rel="nofollow" href="http://example.com/foo/bar/">' 'http://example.com/foo/bar/</a>'), linkify('http://example.com/foo/bar/'))
def test_skip_pre(): """Skip linkification in <pre> tags.""" simple = 'http://xx.com <pre>http://xx.com</pre>' linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> ' '<pre>http://xx.com</pre>', '<a rel="nofollow" href="http://xx.com">http://xx.com</a> ' '<pre>http://xx.com</pre>') all_linked = ('<a href="http://xx.com" rel="nofollow">http://xx.com</a> ' '<pre><a href="http://xx.com" rel="nofollow">http://xx.com' '</a></pre>', '<a rel="nofollow" href="http://xx.com">http://xx.com</a> ' '<pre><a rel="nofollow" href="http://xx.com">http://xx.com' '</a></pre>') in_(linked, linkify(simple, skip_pre=True)) in_(all_linked, linkify(simple)) already_linked = '<pre><a href="http://xx.com">xx</a></pre>' nofollowed = ('<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>', '<pre><a rel="nofollow" href="http://xx.com">xx</a></pre>') in_(nofollowed, linkify(already_linked)) in_(nofollowed, linkify(already_linked, skip_pre=True))
def test_link_ftp(): in_(('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">' 'ftp://ftp.mozilla.org/some/file</a>', '<a rel="nofollow" href="ftp://ftp.mozilla.org/some/file">' 'ftp://ftp.mozilla.org/some/file</a>'), linkify('ftp://ftp.mozilla.org/some/file'))
def test_mixed_linkify(): in_(('Домашняя <a href="http://example.com" rel="nofollow">' 'http://example.com</a> ヘルプとチュートリアル', 'Домашняя <a rel="nofollow" href="http://example.com">' 'http://example.com</a> ヘルプとチュートリアル'), linkify('Домашняя http://example.com ヘルプとチュートリアル'))
def test_url_with_path(): in_(('<a href="http://example.com/path/to/file" rel="nofollow">' 'http://example.com/path/to/file</a>', '<a rel="nofollow" href="http://example.com/path/to/file">' 'http://example.com/path/to/file</a>'), linkify('http://example.com/path/to/file'))
def check(test, expected_output): in_([o.format(*expected_output) for o in outs], linkify(test))
def test_end_of_clause(): """example.com/foo, shouldn't include the ,""" in_(('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar', '<a rel="nofollow" href="http://ex.com/foo">ex.com/foo</a>, bar'), linkify('ex.com/foo, bar'))
def check(test, expected_output): in_(expected_output, linkify(test))
def test_link_fragment(): in_(('<a href="http://xx.com/path#frag" rel="nofollow">' 'http://xx.com/path#frag</a>', '<a rel="nofollow" href="http://xx.com/path#frag">' 'http://xx.com/path#frag</a>'), linkify('http://xx.com/path#frag'))
def check(test, output): outs = ('<a href="{0}" rel="nofollow">{0}</a>{1}', '<a rel="nofollow" href="{0}">{0}</a>{1}') in_([out.format(*output) for out in outs], linkify(test))
def test_link_entities(): in_(('<a href="http://xx.com/?a=1&b=2" rel="nofollow">' 'http://xx.com/?a=1&b=2</a>', '<a rel="nofollow" href="http://xx.com/?a=1&b=2">' 'http://xx.com/?a=1&b=2</a>'), linkify('http://xx.com/?a=1&b=2'))
def _check(o, p, i): if isinstance(o, (list, tuple)): in_(o, linkify(i, parse_email=p)) else: eq_(o, linkify(i, parse_email=p))
def test_ignore_bad_protocols(): eq_('foohttp://bar', linkify('foohttp://bar')) in_(('fohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>', 'fohttp://<a rel="nofollow" href="http://exampl.com">exampl.com</a>'), linkify('fohttp://exampl.com'))
def test_links_https(): in_(('<a href="https://yy.com" rel="nofollow">https://yy.com</a>', '<a rel="nofollow" href="https://yy.com">https://yy.com</a>'), linkify('https://yy.com'))
def check(u, p): in_([out.format(u, p) for out in outs], linkify(intxt.format(u, p)))
def test_ignore_bad_protocols(): eq_('foohttp://bar', linkify('foohttp://bar')) in_(( 'foohttp://<a href="http://exampl.com" rel="nofollow">exampl.com</a>', 'foohttp://<a rel="nofollow" href="http://exampl.com">exampl.com</a>'), linkify('foohttp://exampl.com'))
def test_add_rel_nofollow(): """Verify that rel="nofollow" is added to an existing link""" in_(('<a href="http://yy.com" rel="nofollow">http://yy.com</a>', '<a rel="nofollow" href="http://yy.com">http://yy.com</a>'), linkify('<a href="http://yy.com">http://yy.com</a>'))