def test_link_wrapping(): link = Link.wrap("https://www.google.com") assert link.url == "https://www.google.com" link = Link.wrap(Link.wrap("https://www.google.com")) assert link.url == "https://www.google.com" with pytest.raises(ValueError): Link.wrap(1234) with pytest.raises(ValueError): Link.wrap_iterable(1234) links = Link.wrap_iterable("https://www.google.com") assert len(links) == 1 assert links[0].url == "https://www.google.com" links = Link.wrap_iterable(["https://www.google.com", Link("http://www.google.com")]) assert set(links) == set([Link("http://www.google.com"), Link("https://www.google.com")])
def crawl(self, link_or_links, follow_links=False): links = list(Link.wrap_iterable(link_or_links)) cache_key = self._make_cache_key(links, follow_links) # Memoize crawling to a global Memoizer (Crawler._CRAWL_CACHE). result = self._CRAWL_CACHE.get(cache_key) if result is None: result = self._crawl(links, follow_links) self._CRAWL_CACHE.store(cache_key, result) return result
def test_link_wrapping(): link = Link.wrap('https://www.google.com') assert link.url == 'https://www.google.com' link = Link.wrap(Link.wrap('https://www.google.com')) assert link.url == 'https://www.google.com' with pytest.raises(ValueError): Link.wrap(1234) with pytest.raises(ValueError): Link.wrap_iterable(1234) links = Link.wrap_iterable('https://www.google.com') assert len(links) == 1 assert links[0].url == 'https://www.google.com' links = Link.wrap_iterable(['https://www.google.com', Link('http://www.google.com')]) assert set(links) == set([ Link('http://www.google.com'), Link('https://www.google.com'), ])
def test_link_wrapping(): link = Link.wrap('https://www.google.com') assert link.url == 'https://www.google.com' link = Link.wrap(Link.wrap('https://www.google.com')) assert link.url == 'https://www.google.com' with pytest.raises(ValueError): Link.wrap(1234) with pytest.raises(ValueError): Link.wrap_iterable(1234) links = Link.wrap_iterable('https://www.google.com') assert len(links) == 1 assert links[0].url == 'https://www.google.com' links = Link.wrap_iterable(['https://www.google.com', Link('http://www.google.com')]) assert set(links) == set([ Link('http://www.google.com'), Link('https://www.google.com'), ])