def test_async_urlopen_returnsValidRequests(self): responses = async_urlopen( ['http://testserver/test1.txt', 'http://testserver/test2.txt']) expected_responses = ['test 1', 'test 2'] for i in range(len(expected_responses)): self.assertEqual(responses[i].strip(), expected_responses[i])
def getMany(self, urls): not_cached_urls = [x for x in urls if (x not in self.cache)] responses = [] count_cache_hits = len(urls) - len(not_cached_urls) if count_cache_hits > 0: debug('{}/{} cache hits'.format(count_cache_hits, len(urls))) if len(not_cached_urls) > 0: responses = async_urlopen(not_cached_urls, NUM_PARALLEL_REQUESTS) for req, res in zip(not_cached_urls, responses): self.cache[req] = BeautifulSoup(res, "html.parser") return [self.cache[x] for x in urls]
def test_async_urlopen_returns404(self): res = async_urlopen( ['http://testserver/test404.txt']) self.assertIn("404 Not Found", res[0])
def getOne(self, url=EVENTS_URL): if url not in self.cache: self.cache[url] = BeautifulSoup(async_urlopen([url])[0], "html.parser") return self.cache[url]