def wait_all_requests_completed(self): if len(self.requests_to_send) == 0: return try: for resp, result in zip(erequests.map(self.requests_to_send), self.results): result.response = resp finally: self.requests_to_send = [] self.results = []
def test_hooks(self): result = {} def hook(r, **kwargs): result[r.url] = True return r reqs = [erequests.get_async(url, hooks={'response': [hook]}) for url in URLS] resp = list(erequests.map(reqs, size=N)) self.assertEqual(sorted(result.keys()), sorted(URLS))
def test_hooks(self): result = {} def hook(r, **kwargs): result[r.url] = True return r reqs = [erequests.get(url, hooks={'response': [hook]}) for url in URLS] resp = erequests.map(reqs, size=N) self.assertEqual(sorted(result.keys()), sorted(URLS))
def test_timeout(self): n = 5 reqs = [erequests.get(httpbin('delay/3'), timeout=(i+1), exception_handler=mark_exception) for i in range(n)] resp = erequests.map(reqs, size=n) self.assertListEqual(resp[0:3], [erequests.Timeout, erequests.Timeout, erequests.Timeout], "First three requests should have timeout")
def api_async(params, token=None): """ Takes the uris of the API to call """ headers = {} keys = [] urls = [] for key, value in params.items(): method, uri, get_vars = value keys.append(key) urls.append((method, build_url(uri, get_vars))) if token is not None: headers['Authorization'] = 'Bearer %s' % token else: encoded_consumer = b64encode('%s:%s' % (settings.MASTER_OAUTH_KEY, settings.MASTER_OAUTH_SECRET)) headers['Authorization'] = 'Basic %s' % encoded_consumer reqs = (erequests.get(url, headers=headers) for method, url in urls) reqs_map = erequests.map(reqs) return dict(zip(keys, reqs_map))
def api_async(params, token=None): """ Takes the uris of the API to call """ headers = {} keys = [] urls = [] for key, value in params.items(): method, uri, get_vars = value keys.append(key) urls.append((method, build_url(uri, get_vars))) if token is not None: headers['Authorization'] = 'Bearer %s' % token else: encoded_consumer = b64encode( '%s:%s' % (settings.MASTER_OAUTH_KEY, settings.MASTER_OAUTH_SECRET)) headers['Authorization'] = 'Basic %s' % encoded_consumer reqs = (erequests.get(url, headers=headers) for method, url in urls) reqs_map = erequests.map(reqs) return dict(zip(keys, reqs_map))
def test_concurrency_with_delayed_url(self): t = time.time() n = 10 reqs = [erequests.get(httpbin('delay/1')) for _ in range(n)] resp = erequests.map(reqs, size=n) self.assertLess((time.time() - t), n)
def get(self, url, **kwargs): return list(erequests.map([erequests.get_async(url, **kwargs)]))[0]
def test_concurrency_with_delayed_url(self): t = time.time() n = 10 reqs = [erequests.get_async(httpbin('delay/1')) for _ in range(n)] resp = list(erequests.map(reqs, size=n)) self.assertLess((time.time() - t), n)
HTTPBIN_URL = os.environ.get('HTTPBIN_URL', 'http://httpbin.org/') def httpbin(*suffix): """Returns url for HTTPBIN resource.""" return HTTPBIN_URL + '/'.join(suffix) N = 5 URLS = [httpbin('get?p=%s' % i) for i in range(N)] class GrequestsCase(unittest.TestCase): def test_map(self): reqs = [erequests.async.get(url) for url in URLS] resp = erequests.map(reqs, size=N) self.assertEqual([r.url for r in resp], URLS) def test_imap(self): reqs = (erequests.async.get(url) for url in URLS) i = 0 for i, r in enumerate(erequests.imap(reqs, size=N)): self.assertTrue(r.url in URLS) self.assertEqual(i, N - 1) def test_hooks(self): result = {} def hook(r, **kwargs): result[r.url] = True return r
genotypes.append(individual_genotype) return genotypes def get_list_of_rs(list_of_rs): """Return a list with all the Ensembl responses for a list of rs.""" ensembl_uri = \ "http://rest.ensembl.org/variation/human/{}" +\ "?content-type=application/json;genotypes=1" # Create all the URL request strings list_of_urls = [ensembl_uri.format(_) for _ in list_of_rs] # Put every request in a pool (unsent) responses = (erequests.async.get(_) for _ in list_of_urls) # Send every request and get the JSON from each one. return [_.json() for _ in erequests.map(responses)] def get_rs(rs_id): """Return the JSON for a given rs_id.""" ensembl_uri = \ "http://rest.ensembl.org/variation/human/{}".format(rs_id) +\ "?content-type=application/json;genotypes=1" json_response = requests.get(ensembl_uri).json() return json_response
def test_map(self): reqs = [erequests.get_async(url) for url in URLS] resp = erequests.map(reqs, size=N) self.assertEqual([r.url for r in resp], URLS)
db = Database() #General logging stuff so it doesn't break logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') #fh.setFormatter(formatter) ch = logging.StreamHandler() ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(ch) logger.info("Logger created") url = 'http://www.bbc.co.uk/1/hi/england/hampshire/6245861.stm' referer = 'http://www.bbc.co.uk/news/health-22855670' urls = [url] rs = (erequests.async.get(u) for u in urls) for u in erequests.map(rs): print '\n\n\n\n%s' % u.url #print db.singleValueSelectQuery('SELECT rowid FROM BBCPages WHERE Url = ?', [u.url]) id = BBC.getIdByUrl(u.url) print "id: %s" % str(id) bbc = BBC(u, id, logger, referer)
randy = random.randint(4,8) logger.info("Processing links from %s. About to sleep for %ds...." % (referer, randy)) time.sleep(randy) else: logger.info("Processing links from %s. Not sleeping because the last list was small/empty" % referer) #time.sleep(1) header={'Accept-Encoding':'gzip, deflate', 'User-Agent':'Scraper', 'Referer' : referer} print url_list[0] #print url_list[0]["%d|%s" % (id,referer)] #list_not_empty = bool(url_list[0]["%d|%s" % (id,referer)]) length = len(url_list[0]["%d|%s" % (id,referer)]) print "List length: %d" % len(url_list[0]["%d|%s" % (id,referer)]) rs = (erequests.async.get(u.split('|')[1], headers=header) for u in url_list[0]["%d|%s" % (id,referer)]) for u in erequests.map(rs): try: logger.info('Starting to process %s' % u.url) id = BBC.getIdByUrl(u.url) bbc = BBC(u, id, logger, referer) url_list.append({"%d|%s" % (id, bbc.url) : bbc.urls}) except Exception, e: #Just die and we can deal with it later logger.error(str(e)) logger.info("Finished processing remaining URLs for %s" % referer) del url_list[0] save_todo_list(url_list)
def get(self, url, **kwargs): return erequests.map([erequests.get(url, **kwargs)])[0]
def test_stream_enabled(self): r = erequests.map([erequests.get(httpbin('stream/10'))], size=2, stream=True)[0] self.assertFalse(r._content_consumed)
def test_map(self): reqs = [erequests.get(url) for url in URLS] resp = erequests.map(reqs, size=N) self.assertEqual([r.url for r in resp], URLS)
def get(self, url, **kwargs): return list(erequests.map([erequests.async.get(url, **kwargs)]))[0]
def test_calling_request(self): reqs = [erequests.request('POST', httpbin('post'), data={'p': i}) for i in range(N)] resp = erequests.map(reqs, size=N) self.assertEqual([int(r.json()['form']['p']) for r in resp], list(range(N)))
def test_calling_request(self): reqs = [erequests.request_async('POST', httpbin('post'), data={'p': i}) for i in range(N)] resp = erequests.map(reqs, size=N) self.assertEqual([int(r.json()['form']['p']) for r in resp], list(range(N)))
def test_stream_enabled(self): r = list(erequests.map([erequests.get_async(httpbin('stream/10'), stream=True)], size=2))[0] self.assertFalse(r._content_consumed)
HTTPBIN_URL = os.environ.get('HTTPBIN_URL', 'http://httpbin.org/') def httpbin(*suffix): """Returns url for HTTPBIN resource.""" return HTTPBIN_URL + '/'.join(suffix) N = 5 URLS = [httpbin('get?p=%s' % i) for i in range(N)] class GrequestsCase(unittest.TestCase): def test_map(self): reqs = [erequests. async .get(url) for url in URLS] resp = erequests.map(reqs, size=N) self.assertEqual([r.url for r in resp], URLS) def test_imap(self): reqs = (erequests. async .get(url) for url in URLS) i = 0 for i, r in enumerate(erequests.imap(reqs, size=N)): self.assertTrue(r.url in URLS) self.assertEqual(i, N - 1) def test_hooks(self): result = {} def hook(r, **kwargs): result[r.url] = True return r