def test_link_escaping(): link = Link('/foo/bar#baz.pex') assert link.scheme == 'file' assert link.local assert link.local_path == os.path.realpath('/foo/bar#baz.pex') link = Link('http://www.google.com/%20/%3Afile+%2B2.tar.gz') assert link.filename == ':file++2.tar.gz'
def test_crawler_remote_redirect(): Crawler.reset_cache() mock_context = mock.create_autospec(Context, spec_set=True) mock_context.resolve = lambda link: Link('http://url2.test.com') mock_context.content.side_effect = [MOCK_INDEX_A] expected_output = set([Link('http://url2.test.com/3to2-1.0.tar.gz')]) c = Crawler(mock_context) test_links = [Link('http://url1.test.com')] assert c.crawl(test_links) == expected_output
def test_link_schemes(): link = Link('http://www.google.com') assert link.scheme == 'http' assert link.remote link = Link('https://www.google.com') assert link.scheme == 'https' assert link.remote link = Link('/foo/bar') assert link.scheme == 'file' assert link.local assert link.path == os.path.realpath('/foo/bar')
def test_crawler_remote(): Crawler.reset_cache() mock_context = mock.create_autospec(Context, spec_set=True) mock_context.resolve = lambda link: link mock_context.content.side_effect = [MOCK_INDEX_A, MOCK_INDEX_B, Exception('shouldnt get here')] expected_output = set([Link('http://url1.test.com/3to2-1.0.tar.gz'), Link('http://url2.test.com/APScheduler-2.1.0.tar.gz')]) c = Crawler(mock_context) test_links = [Link('http://url1.test.com'), Link('http://url2.test.com')] assert c.crawl(test_links) == expected_output # Test memoization of Crawler.crawl(). assert c.crawl(test_links) == expected_output
def test_link_wrapping(): link = Link.wrap('https://www.google.com') assert link.url == 'https://www.google.com' link = Link.wrap(Link.wrap('https://www.google.com')) assert link.url == 'https://www.google.com' with pytest.raises(ValueError): Link.wrap(1234) with pytest.raises(ValueError): Link.wrap_iterable(1234) links = Link.wrap_iterable('https://www.google.com') assert len(links) == 1 assert links[0].url == 'https://www.google.com' links = Link.wrap_iterable(['https://www.google.com', Link('http://www.google.com')]) assert set(links) == set([ Link('http://www.google.com'), Link('https://www.google.com'), ])
def test_link_equality(): assert Link('http://www.google.com') == Link('http://www.google.com') assert Link('http://www.google.com') != Link('http://www.twitter.com')
def test_link_join(): link = Link('https://www.google.com/bar/') assert link.join('/foo').url == 'https://www.google.com/foo' assert link.join('#foo').url == 'https://www.google.com/bar/#foo' assert link.join('foo').url == 'https://www.google.com/bar/foo'