Example #1
0
    def test_async_urlopen_returnsValidRequests(self):
        responses = async_urlopen(
            ['http://testserver/test1.txt', 'http://testserver/test2.txt'])

        expected_responses = ['test 1', 'test 2']
        for i in range(len(expected_responses)):
            self.assertEqual(responses[i].strip(), expected_responses[i])
Example #2
0
    def getMany(self, urls):
        not_cached_urls = [x for x in urls if (x not in self.cache)]
        responses = []
        count_cache_hits = len(urls) - len(not_cached_urls)
        if count_cache_hits > 0:
            debug('{}/{} cache hits'.format(count_cache_hits, len(urls)))
        if len(not_cached_urls) > 0:
            responses = async_urlopen(not_cached_urls, NUM_PARALLEL_REQUESTS)
        for req, res in zip(not_cached_urls, responses):
            self.cache[req] = BeautifulSoup(res, "html.parser")

        return [self.cache[x] for x in urls]
Example #3
0
 def test_async_urlopen_returns404(self):
     res = async_urlopen(
         ['http://testserver/test404.txt'])
     self.assertIn("404 Not Found", res[0])
Example #4
0
 def getOne(self, url=EVENTS_URL):
     if url not in self.cache:
         self.cache[url] = BeautifulSoup(async_urlopen([url])[0], "html.parser")
     return self.cache[url]