def test_arxiv_id_urls_punct(self): """Test cases of of urlize for arXiv identifiers with punctuation.""" h = 'sosmooth.org' app.config['SERVER_NAME'] = h with app.app_context(): self.assertEqual( urlize('hep-th/9901002.'), f'<a class="link-https" data-arxiv-id="hep-th/9901002" href="https://arxiv.org/abs/hep-th/9901002">hep-th/9901002</a>.', 'followed by period') self.assertEqual( urlize('0702.0003.'), f'<a class="link-https" data-arxiv-id="0702.0003" href="https://arxiv.org/abs/0702.0003">0702.0003</a>.', 'followed by period') self.assertEqual( urlize('hep-th/9901001,hep-th/9901002'), f'<a class="link-https" data-arxiv-id="hep-th/9901001" href="https://arxiv.org/abs/hep-th/9901001">hep-th/9901001</a>,<a class="link-https" data-arxiv-id="hep-th/9901002" href="https://arxiv.org/abs/hep-th/9901002">hep-th/9901002</a>', 'filter_urls_ids_escape (ID linking) 3/7') self.assertEqual( urlize('0702.0003, something'), f'<a class="link-https" data-arxiv-id="0702.0003" href="https://arxiv.org/abs/0702.0003">0702.0003</a>, something', 'followed by comma') self.assertEqual( urlize('(0702.0003) something'), f'(<a class="link-https" data-arxiv-id="0702.0003" href="https://arxiv.org/abs/0702.0003">0702.0003</a>) something', 'in parens')
def test_vixra(self): """Test urlize for identifiers prefixed by viXra.""" h = 'sosmooth.org' app.config['SERVER_NAME'] = h with app.app_context(): self.assertEqual(urlize('viXra:0704.0001 viXra:1003.0123'), 'viXra:0704.0001 viXra:1003.0123')
def test_arxiv_id_v(self): """Test urlize for arXiv identifers with version affix.""" h = 'sosmooth.org' app.config['SERVER_NAME'] = h with app.app_context(): self.assertEqual( urlize('arXiv:dg-ga/9401001v12 hep-th/9901001v2 0704.0001v1'), f'<a class="link-https" data-arxiv-id="dg-ga/9401001v12" href="https://arxiv.org/abs/dg-ga/9401001v12">arXiv:dg-ga/9401001v12</a> <a class="link-https" data-arxiv-id="hep-th/9901001v2" href="https://arxiv.org/abs/hep-th/9901001v2">hep-th/9901001v2</a> <a class="link-https" data-arxiv-id="0704.0001v1" href="https://arxiv.org/abs/0704.0001v1">0704.0001v1</a>', 'arxiv ids with version numbers')
def test_arxiv_id_urls_3(self): """Test more complex cases of urlize for arXiv identifiers.""" h = 'sosmooth.org' app.config['SERVER_NAME'] = h with app.app_context(): self.assertEqual( urlize('hep-th/9901002'), f'<a class="link-https" data-arxiv-id="hep-th/9901002" href="https://arxiv.org/abs/hep-th/9901002">hep-th/9901002</a>', ) self.assertEqual( urlize('hep-th/9901002\n'), f'<a class="link-https" data-arxiv-id="hep-th/9901002" href="https://arxiv.org/abs/hep-th/9901002">hep-th/9901002</a>\n' ) self.assertEqual( urlize('arXiv:dg-ga/9401001 hep-th/9901001 hep-th/9901002'), f'<a class="link-https" data-arxiv-id="dg-ga/9401001" href="https://arxiv.org/abs/dg-ga/9401001">arXiv:dg-ga/9401001</a> <a class="link-https" data-arxiv-id="hep-th/9901001" href="https://arxiv.org/abs/hep-th/9901001">hep-th/9901001</a> <a class="link-https" data-arxiv-id="hep-th/9901002" href="https://arxiv.org/abs/hep-th/9901002">hep-th/9901002</a>' )
def test_arxiv_id_urls_basic(self): """Test basic urlize for arXiv identifiers.""" # a server name is needed for url_for to return something h = 'arxiv.org' app.config['SERVER_NAME'] = h with app.app_context(): self.assertEqual(urlize('', ['arxiv_id']), '') s = 'some text 134#%$$%&^^%*^&(()*_)_<>?:;[}}' self.assertEqual(urlize(s), str(escape(s)), 'filters should return escaped text') self.assertEqual( urlize('hep-th/9901001'), f'<a class="link-https" data-arxiv-id="hep-th/9901001" href="https://arxiv.org/abs/hep-th/9901001">hep-th/9901001</a>', ) self.assertEqual( urlize('hep-th/9901001 hep-th/9901002'), f'<a class="link-https" data-arxiv-id="hep-th/9901001" href="https://arxiv.org/abs/hep-th/9901001">hep-th/9901001</a> <a class="link-https" data-arxiv-id="hep-th/9901002" href="https://arxiv.org/abs/hep-th/9901002">hep-th/9901002</a>' )
def test_arxiv_id_urls_more(self): """Test urlize for arXiv identifiers that have mixed formatting.""" h = 'sosmooth.org' app.config['SERVER_NAME'] = h with app.app_context(): self.assertEqual( urlize('arXiv:dg-ga/9401001 hep-th/9901001 0704.0001'), f'<a class="link-https" data-arxiv-id="dg-ga/9401001" href="https://arxiv.org/abs/dg-ga/9401001">arXiv:dg-ga/9401001</a> <a class="link-https" data-arxiv-id="hep-th/9901001" href="https://arxiv.org/abs/hep-th/9901001">hep-th/9901001</a> <a class="link-https" data-arxiv-id="0704.0001" href="https://arxiv.org/abs/0704.0001">0704.0001</a>', 'urlize (ID linking) 5/7')
def test_arxiv_id_urls_escaping(self): """Test proper escaping when urlize applied.""" h = 'sosmooth.org' app.config['SERVER_NAME'] = h with app.app_context(): ax_id = 'hep-th/9901002' user_entered_txt = ' <div>div should be escaped</div>' ex_txt = Markup(user_entered_txt) self.assertEqual( urlize(ax_id + user_entered_txt), f'<a class="link-https" data-arxiv-id="hep-th/9901002" href="https://arxiv.org/abs/hep-th/9901002">hep-th/9901002</a>{ex_txt}', 'Dealing with user entered text with html that should be escaped for safety' ) jinja_escaped_txt = Markup( ' <div>div should already be escaped by jinja2</div>') self.assertEqual( urlize(ax_id + jinja_escaped_txt), f'<a class="link-https" data-arxiv-id="hep-th/9901002" href="https://arxiv.org/abs/hep-th/9901002">hep-th/9901002</a>{jinja_escaped_txt}', 'Dealing with text that has been escaped by Jinja2 already')
def test_arxiv_urlize(self): """Multiple basic urlize tests.""" h = 'sosmooth.org' app.config['SERVER_NAME'] = h with app.app_context(): self.assertEqual( urlize('http://example.com/'), '<a class="link-external link-http" href="http://example.com/" rel="external noopener nofollow">this http URL</a>', 'urlize (URL linking) 1/6') self.assertEqual( urlize('https://example.com/'), '<a class="link-external link-https" href="https://example.com/" rel="external noopener nofollow">this https URL</a>', 'urlize (URL linking) 2/6') self.assertEqual( urlize('ftp://example.com/'), '<a class="link-external link-ftp" href="ftp://example.com/" rel="external noopener nofollow">this ftp URL</a>', 'urlize (URL linking) 3/6') self.assertEqual( urlize('http://example.com/.hep-th/9901001'), '<a class="link-external link-http" href="http://example.com/.hep-th/9901001" rel="external noopener nofollow">this http URL</a>', 'urlize (URL linking) 4/6') self.assertEqual( urlize('http://projecteuclid.org/euclid.bj/1151525136'), '<a class="link-external link-http" href="http://projecteuclid.org/euclid.bj/1151525136" rel="external noopener nofollow">this http URL</a>', 'urlize (URL linking) 6/6') self.assertEqual( urlize( ' Correction to Bernoulli (2006), 12, 551--570 http://projecteuclid.org/euclid.bj/1151525136' ), Markup( ' Correction to Bernoulli (2006), 12, 551--570 <a class="link-external link-http" href="http://projecteuclid.org/euclid.bj/1151525136" rel="external noopener nofollow">this http URL</a>' ), 'urlize (URL linking) 6/6') # shouldn't match self.assertEqual(urlize('2448446.4710(5)'), '2448446.4710(5)', 'urlize (should not match) 1/9') self.assertEqual(urlize('HJD=2450274.4156+/-0.0009'), 'HJD=2450274.4156+/-0.0009', 'urlize (should not match) 2/9') self.assertEqual( urlize('T_min[HJD]=49238.83662(14)+0.146352739(11)E.'), 'T_min[HJD]=49238.83662(14)+0.146352739(11)E.', 'urlize (should not match) 3/9') self.assertEqual(urlize('Pspin=1008.3408s'), 'Pspin=1008.3408s', 'urlize (should not match) 4/9') self.assertEqual(urlize('2453527.87455^{+0.00085}_{-0.00091}'), '2453527.87455^{+0.00085}_{-0.00091}', 'urlize (should not match) 5/9') self.assertEqual(urlize('2451435.4353'), '2451435.4353', 'urlize (should not match) 6/9') self.assertEqual( urlize('cond-mat/97063007'), '<a class="link-https" data-arxiv-id="cond-mat/9706300" href="https://arxiv.org/abs/cond-mat/9706300">cond-mat/9706300</a>7', 'urlize (should match) 7/9') self.assertEqual( urlize('[http://onion.com/something-funny-about-arxiv-1234]'), '[<a class="link-external link-http" href="http://onion.com/something-funny-about-arxiv-1234" rel="external noopener nofollow">this http URL</a>]' ) self.assertEqual( urlize( '[http://onion.com/?q=something-funny-about-arxiv.1234]'), '[<a class="link-external link-http" href="http://onion.com/?q=something-funny-about-arxiv.1234" rel="external noopener nofollow">this http URL</a>]' ) self.assertEqual( urlize('http://onion.com/?q=something funny'), '<a class="link-external link-http" href="http://onion.com/?q=something" rel="external noopener nofollow">this http URL</a> funny', 'Spaces CANNOT be expected to be part of URLs') self.assertEqual( urlize('"http://onion.com/something-funny-about-arxiv-1234"'), '"<a class="link-external link-http" href="http://onion.com/something-funny-about-arxiv-1234" rel="external noopener nofollow">this http URL</a>"', 'Should handle URL surrounded by double quotes')