def send_requests(reqs): """ Send all the requests in :reqs: and reads the response data to extract the deputies data. It will check if a deputy has more than one page of advisors and send new requests if True """ buffer = list() print("Sending!") kwargs = dict(size=8, exception_handler=http_exception_handler) for response in grequests.imap(reqs, **kwargs): page_data = extract_data_from_page(response) yield page_data print('.', end="", flush=True) if page_data["has_next_page"]: current = page_data["current_page"] total = page_data["number_of_pages"] for page in range(current + 1, total + 1): buffer.append(get_page(page_data['data'], page)) pending = len(buffer) print("\nFound {} more pages to fetch. Starting now…".format(pending)) for req in grequests.imap(buffer, **kwargs): page_data = extract_data_from_page(req) yield page_data print('.', end="", flush=True)
def send_requests(reqs): """ Send all the requests in :reqs: and reads the response data to extract the deputies data. It will check if a deputy has more than one page of advisors and send new requests if True """ request_buffer = list() print("Sending!") for response in grequests.imap(reqs, size=8, exception_handler=http_exception_handler): page_data = extract_data_from_page(response) yield page_data print('.', end="", flush=True) if page_data["has_next_page"]: for rq in [ get_request_to_page_of_advisors_from_deputy( page_data['data'], page_number) for page_number in range(page_data["current_page"], page_data["number_of_pages"]) ]: request_buffer.append(rq) print("\nFound {} more pages to fetch. Starting now...".format( len(request_buffer))) for req in grequests.imap(request_buffer, size=8, exception_handler=http_exception_handler): page_data = extract_data_from_page(req) yield page_data print(':', end="", flush=True)
def get_route_info_from_infotraffic(known_lines_csv: str, known_stations_csv: str)-> Dict[int, Tuple[Route, Route]]: root = 'http://86.122.170.105:61978/html/timpi/' urls = [grequests.get(root + 'tram.php', stream=False), grequests.get(root + 'trol.php', stream=False), grequests.get(root + 'auto.php', stream=False)] known_lines = { line.line_id: line for line in importer.parse_lines_from_csv(known_lines_csv) } known_lines = known_lines # type: Dict[int, Line] known_stations = { station.raw_name: station for station in importer.parse_stations_from_csv(known_stations_csv) } known_stations = known_stations # type: Dict[str, Station] line_id_re = re.compile("param1=(\d+)") line_id_to_routes = {} # type: Dict[int, Tuple[Route, Route]] for page in grequests.imap(urls, size=len(urls), exception_handler=exception_handler): page.raise_for_status() if page.status_code == requests.codes.ok: soup = bs4.BeautifulSoup(page.text, "html.parser") unknown_lines = { } # type: Dict[int, str] line_requests = [] for a in soup.select("div p a"): line_id = int(line_id_re.search(a['href']).group(1)) line = known_lines.get(line_id, None) if not line: line_name = a['title'] if a.has_attr('title') else None if line_name is None: img = a.select("img")[0] line_name = img['alt'] if img and img.has_attr('alt') else 'unknown' unknown_lines[line_id] = line_name print("WARNING: unknown line '{line_name}' (line ID: {line_id}) encountered at {url}" .format(line_name=line_name, line_id=line_id, url=page.url)) line_requests.append(grequests.get(root + a['href'], stream=False)) for line_response in grequests.imap(line_requests, size=6, exception_handler=exception_handler): line_id = int(line_id_re.search(line_response.url).group(1)) routes = parse_arrivals_from_infotrafic(line_id, known_stations, line_response, include_unknown_stations=True) line = known_lines.get(line_id, None) line_name = line.line_name if line is not None else unknown_lines.get(line_id, "unknown") route1 = route2 = None for route_id, route in enumerate(routes): valid_stations = [] for station, arrival in route: if not isinstance(station, Station): print("WARNING: unknown station '{raw_station_name}' encountered in route {route_id} of line {line_name} (line ID: {line_id})" .format(line_name=line_name, line_id=line_id, route_id=route_id, raw_station_name=station)) else: if not station.lng or not station.lat: print("WARNING: station '{station_name}' (station ID: {station_id}) has no GPS coordinates defined" .format(station_name=station.friendly_name, station_id=station.station_id)) valid_stations.append(station) if valid_stations and line is not None: if route_id == 0: route1 = Route(route_id, line.route_name_1, line.line_id, valid_stations) elif route_id == 1: route2 = Route(route_id, line.route_name_2, line.line_id, valid_stations) if route1 is not None and route2 is not None: line_id_to_routes[line.line_id] = (route1, route2) return line_id_to_routes
def getDatainfo(start, stop): lireques_list = [] #li数组 allData = [] #最终数据 rs = (grequests.get('https://www.ciu1.com/shipin/list-偷拍自拍-' + format(u) + '.html', timeout=3, proxies="proxies") for u in range(start, stop)) res_list = grequests.imap(rs, size=200) for iteminfo in res_list: iteminfo.encoding = iteminfo.apparent_encoding soup = BeautifulSoup(iteminfo.text, 'lxml') data = soup.select('#tpl-img-content > li') for item in data: print('获取中') childrenText = grequests.get('https://www.ciu1.com' + item.a.get('href')) lireques_list.append(childrenText) print(childrenText) lireques_arr = grequests.imap(lireques_list) for item_li in lireques_arr: print(1) item_li.encoding = item_li.apparent_encoding item_info = BeautifulSoup(item_li.text, 'lxml') item_info_chil = item_info.select('#shipin-detail-content-pull') item_info = item_info.select('#lin1k0') result = { "title": item_info_chil[0].img.get('alt'), "url": item_info[0].get('data-clipboard-text'), "pic": item_info_chil[0].img.get('data-original') } allData.append(result) db = pymysql.connect(host='216.24.255.15', port=3306, user='******', password='******', database='m_vue_ac_cn', charset='utf8') cursor = db.cursor() def insertInfo(a, b, c): sql = "INSERT INTO video_data(title, url, pic) VALUES ('" + a + "', '" + b + "', '" + c + "')" #print(sql) cursor.execute(sql) try: for item in allData: insertInfo(item.get('title'), item.get('url'), item.get('pic')) # 提交到数据库执行 print(allData) print('成功!') db.commit() except: # 发生错误时回滚 db.rollback() # 关闭数据库连接 db.close()
def picture_spider(): srequest = Srequests() if srequest.check_cookies(): pass else: print('update cookies !') loginurl = 'https://anime-pictures.net/login/submit' logindata = {'login': '******', 'password': '******', 'time_zone': 'Asia/Shanghai'} srequest.update_cookies(loginurl, logindata) # 搜索图片 taglist = ['girl', 'long hair', 'breasts', 'blush', 'light erotic'] search_tag = '||'.join(taglist) # update_date 0:任何时候 1:上周 2:过去一个月 3:过去的一天 if get_pictures_count() < 200: update_date = 0 else: update_date = 2 # search_url = "https://anime-pictures.net/pictures/view_posts/0?search_tag=%s&aspect=16:9&order_by=date&ldate=%d" \ # "&ext_jpg=jpg&ext_png=png&lang=en" % (search_tag, update_date) search_url = "https://anime-pictures.net/pictures/view_posts/0?search_tag=%s&res_x=1024&res_y=768&res_x_n=1&res_y_n=1&aspect=16:9&order_by=date&ldate=%d&small_prev=1&ext_jpg=jpg&ext_png=png&lang=en" % ( search_tag, update_date) resp = srequest.session.get(search_url, headers=Srequests.headers).text # print(Srequests.headers) details_urls = [] details_urls.extend(get_details_urls(resp)) page_count = get_page_count(resp) search_urls = [ "https://anime-pictures.net/pictures/view_posts/%d?search_tag=%s&res_x=1024&res_y=768&res_x_n=1&res_y_n=1&aspect=16:9&order_by=date&ldate=%d&small_prev=1&ext_jpg=jpg&ext_png=png&lang=en" % ( x, search_tag, update_date) for x in range(1, int(page_count) + 1)] reqs = (grequests.get(url, headers=Srequests.headers, session=srequest.session) for url in search_urls) for r_data in grequests.imap(reqs, size=Wallpaper.REQUEST_THREAD_NUMBER): if r_data.status_code == 200: print('搜索页成功:' + r_data.url) details_urls.extend(get_details_urls(r_data.text)) else: print('搜索页失败:' + r_data.url) # 图片详情页 reqs = (grequests.get(url, headers=Srequests.headers, session=srequest.session) for url in details_urls) for r_data in grequests.imap(reqs, size=Wallpaper.REQUEST_THREAD_NUMBER): if r_data.status_code == 200: print('详情页成功:' + r_data.url) save_picture_info(Picture(*get_picture_info(r_data.text))) else: print('详情页失败:' + r_data.url) srequest.close()
def _get_person_id(query: str, max_people: int) -> List[Iterator[str]]: """ Returns list of people IDs """ request_objs = [] for i in range(int(max_people / 10) + 1): payload = { "search": "Search", "filter": query, "_kgoui_region": "kgoui_Rcontent_I0_Rcontent_I0_Ritems", "_object_include_html": 1, "_object_js_config": 1, "_kgoui_page_state": "439a1a9b6fb81b480ade61813e20e049", "_region_index_offset": i * 10, "feed": "directory", "start": i * 10 } request_objs.append(grequests.get(PEOPLE_URL, params=payload)) responses = grequests.imap(request_objs) url_list = [] for response_obj in responses: response = response_obj.json()['response']['contents'] local_url_list = (x['fields']['url']['formatted'] for x in response) local_url_list = (dict(param.split("=") for param in x.split("&")) for x in local_url_list) local_url_list = (x['id'] for x in local_url_list if 'id' in x) url_list.append(local_url_list) return url_list
def check_proxies(proxy_list, threads=8): IFCONFIG_CANDIDATES = [ "https://ifconfig.co/ip", "https://api.ipify.org/?format=text", "https://myexternalip.com/raw", "https://wtfismyip.com/text" ] # de-dupe proxy_list = list(set(proxy_list)) # create a set of unsent requests rs = [] for proxy in proxy_list: rs.append( grequests.get(random.choice(IFCONFIG_CANDIDATES), proxies={ "http": proxy, "https": proxy }, timeout=1)) print("[II] [proxy_checker] Checking health of proxies") working_proxies = [] # send a few at a time in sets of size "threads" for response in grequests.imap(rs, size=threads): # raw_text = str( response.content, 'utf-8') if response.status_code == 200: this_proxy = next(iter(response.connection.proxy_manager)) parsed = urlsplit(this_proxy).netloc working_proxies.append(parsed) yield parsed
def dl(url_template, url_arguments, desc=''): " Download in parallel {url_template} for each {url_arguments}, with a progress bar describing {desc}" all_requests = (grequests.get(url_template % arg, headers=HEADERS) for arg in url_arguments) yield from tqdm(grequests.imap(all_requests), desc=desc, total=len(url_arguments))
def send_requests(url, data_file, expected_results): results = defaultdict(int) for res in grequests.imap(get_requests(url, data_file), size=20): results[res.status_code] += 1 if res.status_code != 204: error(res) return [results == expected_results, results]
def download_iamges(image_data, n_images, output_dir): """ Download a specified number of images to out_dir. :param _elementtree._element_iterator image_data: information to download images :param int n_images: number of images to download :param str output_dir: directory to store the images """ urls = (make_thumb_url(image) for image in image_data) reqs = (grequests.get(url) for url in urls) responses = grequests.imap(reqs) responses = frogress.bar(responses, steps=n_images) print('\nDownloading {} images'.format(n_images)) os.makedirs(output_dir, exist_ok=True) for r in responses: try: url = urllib.parse.urlparse(r.url) filename, _ = os.path.splitext(os.path.basename(url.path)) output_file_path = os.path.join(output_dir, filename + '.jpg') with open(output_file_path, 'wb') as output_file: output_file.write(r.content) finally: r.close()
def _get_description(self, rs): """ Creates asynchronous requests using the grequests library, if request was successful - gets vacancy description from vacancy page, if not - appends url in list of urls, which will be used again :param rs: list of urls :return: list of urls with error in response """ error_rs = [] for r in grq.imap(rs, size=self.WORKERS_NUM, exception_handler=self.exception_handler): if r.status_code == 200: try: index = self._get_job_id(r.url) self.vacancy_dict[index]["description"] = \ self._get_vacancy_description(pq(r.text)) if self.vacancy_dict[index]["description"] == "": error_rs.append(r.url) logging.info('Empty description in {}'.format(index)) except Exception as e: logging.info( 'Error in response {}, exception:{}'. format(r.url, str(e))) else: error_rs.append(r.url) return error_rs
def test_evt_link_for_trx_id4(self): symbol = base.Symbol( sym_name=sym_name, sym_id=sym_id, precision=sym_prec) asset = base.new_asset(symbol) pay_link = evt_link.EvtLink() pay_link.set_max_pay(999999999) pay_link.set_header(evt_link.HeaderType.version1.value | evt_link.HeaderType.everiPay.value) pay_link.set_symbol_id(sym_id) pay_link.set_link_id_rand() pay_link.sign(user.priv_key) req = { 'link_id': 'd1680fea21a3c3d8ef555afd8fd8c903' } url = 'http://127.0.0.1:8888/v1/evt_link/get_trx_id_for_link_id' tasks = [] for i in range(10240): pay_link.set_link_id_rand() req['link_id'] = pay_link.get_link_id().hex() tasks.append(grequests.post(url, data=json.dumps(req))) i = 0 for resp in grequests.imap(tasks, size=900): self.assertEqual(resp.status_code, 500, msg=resp.content) i += 1 if i % 100 == 0: print('Received {} responses'.format(i))
def _get_description(self, rs): """ Creates asynchronous requests using the grequests library, if request was successful - gets vacancy description from vacancy page, if not - appends url in list of urls, which will be used again :param rs: list of urls :return: list of urls with error in response """ error_rs = [] for r in grq.imap(rs, size=self.WORKERS_NUM, exception_handler=self.exception_handler): if r.status_code == 200: try: index = self._get_job_id(r.url) self.vacancy_dict[index]["description"] = \ self._get_vacancy_description(pq(r.text)) if self.vacancy_dict[index]["description"] == "": error_rs.append(r.url) logging.info('Empty description in {}'.format(index)) except Exception as e: logging.info('Error in response {}, exception:{}'.format( r.url, str(e))) else: error_rs.append(r.url) return error_rs
def __get_matches_from_live_ids(self, matches_ids): urls = (self.GAME_URL_TEMPLATE.format(match_id=I) for I in matches_ids) rs = (grequests.get(u, timeout=3, verify=True) for u in urls) matches_data = [] for match_request in grequests.imap(rs, size=7): if not match_request or match_request is None: continue try: match_data = match_request.json() except ValueError: continue # Probably match_id changed or match has ended, so remove it from live_ids, # noqa if not match_data.get("Success", False): _, url_params = match_request.url.split("?") match_id = int((url_params.split("&")[0]).lstrip("?id=")) self.live_matches_ids.discard(match_id) continue res_data = match_data["Value"] match_name = res_data["O1"] + " - " + res_data["O2"] if self.extra_info.get(match_name) is None: date = datetime.datetime.today() # + datetime.timedelta(hours=0, minutes=35) # noqa date_str = date.strftime("%d-%m-%Y") self.extra_info[match_name] = date_str matches_data.append(res_data) return matches_data
def getSucursales(stemUrl): sucursales = [] mainUrl = stemUrl + 'sucursales' timeoutSecs = 20 # seconds to launch timeout exception concurrents = 20 # max concurrent requests print 'Recolectando información sobre comercios...' data = getJsonData(mainUrl) cantSucursales = data['total'] maxLimit = data['maxLimitPermitido'] cantPages = int(math.ceil(cantSucursales / maxLimit)) urls = [] print('Descargando comercios...') for x in xrange(1, cantPages + 1): urls.append(mainUrl + '?offset=' + str((x - 1) * maxLimit) + '&limit=' + str(maxLimit)) rs = (grequests.get(u, stream=False, timeout=timeoutSecs, headers={'User-Agent': 'Mozilla/5.0'}) for u in urls) responses = grequests.imap(rs, size=concurrents) for response in responses: data = ujson.loads(response.content) sucursales = sucursales + data['sucursales'] response.close() return sucursales
def fetch_all_projects(): """ Fetch the names of all projects using the v1 project API. Grossly inefficient - we need an API that can return a list of all project names quickly. """ url_format = ( 'http://readthedocs.org/api/v1/project/?format=json' '&limit=100&offset={0}') project_names = [] # Make initial request to see how many total requests we need to set up. resp = requests.get(url_format.format(0)) project_results = resp.json() project_names.extend(parse_project_objects(project_results['objects'])) total_count = project_results['meta']['total_count'] # Determine the largest offset needed to fetch all projects max_offset = (total_count/100) * 100 print max_offset urls = [url_format.format(offset) for offset in xrange(100, max_offset + 1, 100)] rs = (grequests.get(u) for u in urls) for resp in grequests.imap(rs, size=5): project_results = resp.json() project_names.extend(parse_project_objects(project_results['objects'])) with open('project_list.json', 'w') as f: f.write(json.dumps({ 'project_names': project_names, }))
def process_requests(ctx, rs, count, process_fun, ordered=False): errors = 0 index = -1 if ordered: request_iterator = grequests.map(rs, size=ctx['connections']) else: request_iterator = grequests.imap(rs, size=ctx['connections']) with click.progressbar(request_iterator, length=count) as bar: for r in bar: index = index + 1 if r.status_code is requests.codes.ok: process_fun(index, r) elif r.status_code is 404: # indicates database was deleted before we queried it continue elif r.status_code is 500: errors = errors + 1 click.echo('500 error processing {0}. Continuing...' + r.url, err=True) else: click.echo(r.status_code) r.raise_for_status() if errors > 0: click.echo( 'Failed to get data for {0} requests due to server errors'.format( errors))
def __init__(self, manifest_file, target_dir=os.getcwd()): self.target_dir = target_dir self.manifest_file = manifest_file self.cdn_url = "http://cdn.urbanterror.info/urt/{0}/{1}/q3ut4/{2}" self.mver = "" self.relnum = "" self.files = grequests.imap(self._parse_manifest())
def _get_person_url(query: str, max_people: int) -> List[Iterator[str]]: """ """ request_objs = [] for i in range(int(max_people / 10) + 1): payload = { "search": "Search", "filter": query, "_region": "kgoui_Rcontent_I0_Rcontent_I0_Ritems", "_object_include_html": 1, "_object_js_config": 1, "_kgoui_page_state": "8c6ef035807a2a969576d6d78d211c78", "_region_index_offset": i * 10, "feed": "directory", "start": i * 10 } request_objs.append(grequests.get(PEOPLE_URL, params=payload)) responses = grequests.imap(request_objs) url_list = [] for response_obj in responses: response = response_obj.json()['response']['contents'] local_url_list = (x['fields']['url']['formatted'] for x in response) local_url_list = (x.replace('\\', '').split('&')[-1].replace('id=', '') for x in local_url_list if '&start' not in x) local_url_list = (urllib.parse.unquote(x) for x in local_url_list) url_list.append(local_url_list) return url_list
def sync_db(): header = { "Authorization": "Basic: a2RsYW5ub3lAZ21haWwuY29tOmZlM2Y2ZDI5OGJlMWI2ODljNmUwZjlkNjFiYjNjY2YzYTNkYWIwMDdmYjYzZWU0MDcxMTFhMTgzMjNjYWQwNzAyNjM5OTY1OTZhOTAwZTM4MzgwNDhhMThjODdkZDUyOWZiZWM3YTA2YTEwZjA0ZDM0NjJjYmRmNjkwNGJlMjEz" } urls = [ 'http://tw06v033.ugent.be/Chronic/rest/DrugService/drugs', 'http://tw06v033.ugent.be/Chronic/rest/SymptomService/symptoms', 'http://tw06v033.ugent.be/Chronic/rest/TriggerService/triggers', 'http://tw06v033.ugent.be/Chronic/rest/HeadacheService/headaches?patientID=6', 'http://tw06v033.ugent.be/Chronic/rest/MedicineService/medicines?patientID=6', ] session = requests.Session() # session.mount('http://', HTTPAdapter(pool_connections=250, pool_maxsize=50)) rs = (grequests.get(u, headers=header, session=session) for u in urls) # responses = requests.async.imap(rs, size=250) times = [] for response in grequests.imap(rs, size=1): if response.status_code == 200: times.append(response.elapsed.total_seconds()) else: times.append(1000) response.close() q.put(sum(times))
def request(method, iterable, key=None, ignore_errors=True, **kwargs): """Convinient http request iterator. Returns a generator of :class:`requests.Response <requests.Response>`. See ``requests.request`` and ``grequests``. :param iterable: Iterable of URL or context object with ``key`` argument. The item can access by ``response.context``. :param key: (optional) URL getter function like ``key`` argument of ``list.sort``. :param ignore_errors: (optional) If ``True``, ignore non 20x code and transport errors. """ # https://github.com/kennethreitz/requests # https://github.com/kennethreitz/grequests assert 'return_response' not in kwargs, 'not supported' kwargs.setdefault('prefetch', True) size = kwargs.pop('size', 2) hooks = kwargs.pop('hooks', {}) def gen_hook_response(item): def result(response): response.context = item if 'response' in hooks: return hooks['response'](response) return result reqs = (grequests.request( method, key(item) if key else item, hooks=dict((i for i in hooks.items() if i[0] in requests.hooks.HOOKS), response=gen_hook_response(item)), **kwargs) for item in iterable) for response in grequests.imap(reqs, kwargs['prefetch'], size): # can't get socket.timeout, requests.packages.urllib3.exceptions.TimeoutError here # response.status_code == None if not connectable for some reasons if ignore_errors \ and (not response.status_code \ or math.floor(response.status_code / 100) != 2): logger.error('%s %s', response.url, response.status_code) response = requests.hooks.dispatch_hook('error', hooks, response) continue # read and decode response body if kwargs['prefetch']: try: response.content except http.client.HTTPException as e: # e.g. IncompleteRead logger.exception('%s', response.url) response.error = e if ignore_errors: response = requests.hooks.dispatch_hook( 'error', hooks, response) continue except Exception as e: logger.exception('%s', response.url) continue yield response
def sync_db(): header = { "Authorization": "Basic: a2RsYW5ub3lAZ21haWwuY29tOmZlM2Y2ZDI5OGJlMWI2ODljNmUwZjlkNjFiYjNjY2YzYTNkYWIwMDdmYjYzZWU0MDcxMTFhMTgzMjNjYWQwNzAyNjM5OTY1OTZhOTAwZTM4MzgwNDhhMThjODdkZDUyOWZiZWM3YTA2YTEwZjA0ZDM0NjJjYmRmNjkwNGJlMjEz"} urls = [ 'http://tw06v033.ugent.be/Chronic/rest/DrugService/drugs', 'http://tw06v033.ugent.be/Chronic/rest/SymptomService/symptoms', 'http://tw06v033.ugent.be/Chronic/rest/TriggerService/triggers', 'http://tw06v033.ugent.be/Chronic/rest/HeadacheService/headaches?patientID=6', 'http://tw06v033.ugent.be/Chronic/rest/MedicineService/medicines?patientID=6', ] session = requests.Session() # session.mount('http://', HTTPAdapter(pool_connections=250, pool_maxsize=50)) rs = (grequests.get(u, headers=header, session=session) for u in urls) # responses = requests.async.imap(rs, size=250) times = [] for response in grequests.imap(rs, size=1): if response.status_code == 200: times.append(response.elapsed.total_seconds()) else: times.append(1000) response.close() q.put(sum(times))
def send_heartbeat(node_state: NodeState, election_timeout, time_unit=TimeUnit.SECOND): client = Client() timeout = int(election_timeout) if time_unit == TimeUnit.SECOND: timeout = timeout * 1000 state = { "id": node_state.id, "term": node_state.current_term, "state": type(node_state).__name__.lower(), "timeout": timeout } try: with client as session: logging.info(f'send heartbeat to monitor: {state}') posts = [ grequests.post(MONITOR_URL_HEARTBEAT, json=state, session=session) ] for response in grequests.imap(posts): result = response.json() logging.info(f'get response from monitor: {result}') except: logging.info(f'cannot connect to monitor: {MONITOR_URL_STATE_UPDATE}')
def start_get_ss(): urls_dict = { # 'https://doub.io/sszhfx/': get_ss_doubi, # 'https://xsjs.yhyhd.org/free-ss/': get_ss_yhyhd, # 'https://www.vbox.co/': get_ss_vbox, # 'http://ishadow.info/': get_ss_ishadow, # 'http://ss.vpsml.site/': get_ss_vpsml # 'http://get.shadowsocks8.cc/': get_ss_shadowsocks8, # 'http://www.shadowsocks.asia/mianfei/10.html': get_ss_sspw, # 'http://ss.ishadow.world/': get_ss_sishadow, r'https://github.com/Alvin9999/new-pac/wiki/ss%E5%85%8D%E8%B4%B9%E8%B4%A6%E5%8F%B7': get_ss_Alvin9999 } headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36' } pool = ThreadPoolExecutor(len(urls_dict.keys()) + 1) rs = (grequests.get(u, timeout=80, proxies=proxies, headers=headers) for u in urls_dict.keys()) for r in grequests.imap(rs, size=3): try: print("{:-^72}".format(r.url)) func = urls_dict.get(r.url, u"没有匹配项!!!") pool.submit(func(r)) except Exception as e: print(u"错误提示:" + str(e)) continue
def scrape_ratings(self, episodes): # alternative: http://www.omdbapi.com/?i=tt3697842&Season=1 def make_req(episode): return grequests.get(episode['url'], callback=set_meta({"episode": episode})) rows = [] reqs = (make_req(episode) for episode in episodes) for response in grequests.imap(reqs, exception_handler=handler): doc = lxml.html.fromstring(response.content) rating, rating_count = "N/A", "N/A" if not doc.cssselect('div.notEnoughRatings'): rating = float(doc.cssselect("span[itemprop='ratingValue']")[0].text) rating_count = int(doc.cssselect("span[itemprop='ratingCount']")[0].text) row = { 'season': int(response.meta['episode']['season']), 'episode_number': int(response.meta['episode']['episode_number']), 'rating': rating, 'rating_count': rating_count } msg = " ".join([str(row['season']), str(row['episode_number']), str(row['rating']), str(row['rating_count'])]) logging.debug(msg) rows.append(row) rows.sort(key=lambda x: (x['season'], x['episode_number'])) return rows
def trainingCost(): start_time = time.time() priceOrderArray = [] priceArray = [] reqs = (grequests.get(link) for link in trainingLinks) resp = grequests.imap(reqs, grequests.Pool(20)) for r in resp: soup = BeautifulSoup(r.text, 'lxml') ovr = textCleaner( soup.find("div", class_="list-info-player__ovr").span.text) ratingPrice = round( priceCleaner( soup.find("div", class_="player-listing__price-value").text)) trainingCostValue = ratingPrice / qsCheck(float(ovr)) trainingCostValue = round(trainingCostValue, 2) priceOrderArray.append(str(trainingCostValue)) priceArray.append( ("[Rated: " + str(ovr) + "]" + "[Buying at: " + str(ratingPrice) + "]" + "[C/T: " + str(trainingCostValue) + "]")) CheapestPriceIndex = priceOrderArray.index(min(priceOrderArray)) print("....Here you are: ") print("\n".join(priceArray)) print("The cheapest option is this: \n") print(priceArray[(20 - (priceOrderArray.index(min(priceOrderArray))))]) totalTime = time.time() - start_time print("--- %s seconds ---" % (round(totalTime, 2)))
def run(self): total = 0 totals = {} bad_results = 0 with open(self.hjson_path) as f: sites = json.load(f) rs = (grequests.head(s.get('url'), hooks={'response': [self.hook_factory(s)]}) for s in sites.get('base_urls')) for r in grequests.imap(rs, size=20): total += 1 if totals.get(r.status_code): totals[r.status_code] += 1 else: totals[r.status_code] = 1 if r.status_code >= 400: bad_results += 1 print('========================================================') print('Summary') print('========================================================') print('Total requests: %d' % total) print('Bad responses: %d' % bad_results) for sc in totals: print('Status Code %d: %d' % (sc, totals[sc])) self.dispatcher.command_complete.emit(0)
def __iter__(self): """ Yield all matching books for the supplied books & branch. """ search = "Searching library catalog for books" if self.branch: search += f" at {self.branch}" logger.info(search) full_record_requests = [] # First, yield all books with full metadata from the RSS channel for book in self.catalog_results(): if book.call_number: yield book elif not book.full_record_link: logger.warning("No link given for %s, can't get call #", book.title) else: # Some metadata found, but we need to more for the call # logger.debug("No call # found for %s, fetching record.", book.title) full_record_requests.append(self.async_record(book)) # Then yield books that need additional lookups to fetch call numbers for response in grequests.imap(full_record_requests): book = response._book book.call_number = self.get_call_number(response) yield book
def update_files(metadata, hash_fs): urls_to_get = [] for ext in metadata: for ext_file in ext["current_version"]["files"]: if not ext_file["is_webextension"]: continue ext_file_hash_type, ext_file_hash = ext_file["hash"].split(":") assert ext_file_hash_type == "sha256" if hash_fs.get(ext_file_hash) is None: if ext_file["url"] in urls_to_get: logger.warning("Duplicate URL in metadata: %s" % ext_file["url"]) urls_to_get.append(ext_file["url"]) else: logger.debug("`%s` is already cached locally" % ext_file_hash) logger.info("Fetching %d uncached web extensions from AMO" % len(urls_to_get)) session = create_request_session() while True: fatal_errors = 0 unsent_requests = [ grequests.get(url, verify=True, session=session) for url in urls_to_get ] for response in grequests.imap(unsent_requests, size=MAX_CONCURRENT_REQUESTS): if response.status_code == 200: logger.debug("Downloaded %d bytes from `%s`" % (len(response.content), response.url)) try: hash_fs.put(BytesIO(response.content), ".zip") except ValueError as err: # probably the mysterious ValueError: embedded null byte logger.error("Unable to store `%s` in local cache: %s" % (response.url, str(err))) continue try: original_url = response.history[0].url except IndexError: # There was no redirect original_url = response.url urls_to_get.remove(original_url) else: logger.error("Unable to download `%s`, status code %d" % (response.url, response.status_code)) if 400 <= response.status_code < 500: fatal_errors += 1 if len(urls_to_get) % 100 == 0: logger.info("%d extensions to go" % len(urls_to_get)) if len(urls_to_get) == fatal_errors: break if len(urls_to_get) > 0: logger.warning( "Unable to fetch %d extensions, likely deleted add-ons" % len(urls_to_get))
def quote_extract(): prog_count = 0 ticker = 0 #initalise grequests to use reqs = (grequests.get(link) for link in links) resp = grequests.imap(reqs, grequests.Pool(1)) def SQL_commit(): nonlocal quotes cur.execute( '''INSERT or REPLACE INTO Quote_link (quote_link) VALUES ( ? )''', (quotes, )) cur.execute('SELECT id FROM Quote_link WHERE quote_link = ?', (quotes, )) quote_link_id = cur.fetchone()[0] conn.commit() for i in resp: soups = BeautifulSoup(i.text, 'lxml') for j in soups.find_all('a', class_='actionLink', attrs={'href': re.compile("^/work/quotes")}): quotes = (j.get('href')) prog_count += 1 progress = (str(round((prog_count / book_n) * 100, 1))) ticker += 1 if ticker == 3: print("Currently at %", progress, "completion.") ticker = 0 SQL_commit()
def quote_extract(links): prog_count = 0 ticker = 0 #initalise grequests to use reqs = (grequests.get(link) for link in links) resp = grequests.imap(reqs, grequests.Pool(1)) #opens up grequests and finds tags within each consecutive webpage for the quote hyperlink # pulls the quote hyperlink to produce #for j in range(len) for i in resp: soups = BeautifulSoup(i.text, 'lxml') for j in soups.find_all('a', class_='actionLink', attrs={'href': re.compile("^/work/quotes")}): quotes = (j.get('href')) prog_count += 1 progress = (str(round((prog_count / book_n) * 100, 1))) ticker += 1 if ticker == 1: print("Currently at %", progress, "completion.") ticker = 0 def commit(): cur.execute( '''INSERT or REPLACE INTO Quote_link (quote_link) VALUES ( ? )''', (quotes, )) cur.execute('SELECT id FROM Quote_link WHERE quote_link = ?', (quotes, )) quote_link_id = cur.fetchone()[0] conn.commit() commit()
def run(self): if self._concurrent: rs = (grequests.get(u) for u in self._urls) self._results = list(grequests.imap(rs)) else: for u in tqdm(self._urls): self._results.append(self._get(u))
def test_imap_timeout(self): reqs = [ grequests.get(httpbin('delay/1'), timeout=0.001), grequests.get(httpbin('/')) ] responses = list(grequests.imap(reqs)) self.assertEqual(len(responses), 1)
def grequests_insert_test(): rs = (grequests_insert(x) for x in range(1, N)) rs2 = grequests.imap(rs, stream=True, size=1000) r = 0 for _ in rs2: r += 1 print(r)
def getCantArticulos(stemUrl, comercios): timeoutSecs = 30 mainUrl = stemUrl + 'productos' + '?id_sucursal=' concurrents = 5 urls = [] reqCounter = 0 result = [] print "Obteniendo cantidad de artículos por comercio..." for comercio in comercios: urls.append(mainUrl + comercio['id']) rs = (grequests.get(u, stream=False, timeout=timeoutSecs, headers={'User-Agent': 'Mozilla/5.0'}) for u in urls) responses = grequests.imap(rs, size=concurrents) for response in responses: data = ujson.loads(response.text) idComercio = response.url[response.url.rfind('=', 0, len(response.url) ) + 1:] result.append({ "id": idComercio, "total": data['total'], "maxLimitPermitido": data['maxLimitPermitido'], }) response.close() # Close open connections return result
def fetch_names(): """ fetches names from the url_List """ names_links = [grequests.get(link) for link in url_list()] resp = grequests.imap(names_links, exception_handler=exception_handler) names_lists = [] for idx, r in enumerate(resp): soup = BeautifulSoup(r.text, "html.parser") post = soup.find("section", {"class": "entry"}) try: names = [name.text for name in post.find_next("ol").find_all("li")] except AttributeError: print( f"there are no names that begin with {ascii_lowercase[idx].upper()}" ) names = [] names_lists.append(names) return names_lists
def grequests随机无序集(self): # grequests.imap(任务列 页数网址 = 'http://news.paidai.com/?page={}' #{} 网址总列表 = [] 任务列表 = [] 内容总列表 = [] for 倒页数 in range(18, 0, -1): 各帖子链接 = 页数网址.format( str(倒页数) ) # 不换行 end="" request("GET" pool=1, ,size=2 pool=1,timeout=len(任务列表)//2, 网址总列表.append(各帖子链接) 网址分列表 = [] 倒数 = len(网址总列表) for 各帖子链接 in 网址总列表: 网址分列表.append(各帖子链接) 倒数 = 倒数 - 1 if len(网址分列表) == 20 or 倒数 == 0: print('等待响应网页倒数', 倒数) for 各帖子链接 in 网址分列表: print('各帖子链接', 各帖子链接) 任务 = grequests.get(各帖子链接, headers=头部信息) # timeout=len(任务列表)//2, 任务列表.append(任务) 条件循环 = 1 次数循环 = 0 网址分列表 = [] while 条件循环 == 1: 此时数 = int(time.time()) if 此时数 > 换IP时间计数 + 60: self.模具一一换ip连接() self.模具一一换头部信息() try: # 调用异常处理,应对易发生错误的位置 返回网页内容集 = grequests.imap( 任务列表, size=5) # size=3 并发数 3 gtimeout超时时间 except (grequests.exceptions.ConnectTimeout, grequests.exceptions.ReadTimeout, grequests.exceptions.ConnectionError, grequests.exceptions.ConnectTimeout, grequests.exceptions.ChunkedEncodingError, grequests.exceptions.InvalidSchema) as 异常: print('网络异常等待', 异常) print('倒数9秒再连接', 次数循环, '次') # time.sleep(3) else: print('=========================') 返回网页内容列表 = [] for 返回网页内容 in 返回网页内容集: 返回网页内容文本 = str(返回网页内容) if '200' in 返回网页内容文本 and 'None' not in 返回网页内容文本 and '40' not in 返回网页内容文本: print('返回网页内容', 返回网页内容) 返回网页内容.encoding = "UTF-8" 返回网页内容列表.append(返回网页内容) if len(任务列表) == len(返回网页内容列表): 内容总列表 = 内容总列表 + 返回网页内容列表 条件循环 = 520 print('完成')
def download_reqs_to_files(reqs): for response in grequests.imap(reqs, exception_handler=handler): if response.status_code != 200: print("error downloading %s with code %s" % (response.url, response.status_code)) continue filename = response.url.split("/")[-1] with open("data/" + filename, "wb") as f: f.write(response.content) print("downloaded %s" % filename)
def download_reqs_to_files(reqs): for response in grequests.imap(reqs, exception_handler=handler): if response.status_code != 200: print("error downloading %s with code %s" % (response.url, response.status_code)) continue filepath = RAW_FILEPATH.format(kind=response.meta['kind'], episode=response.meta['i']) with open(filepath, "wb") as f: f.write(response.content) print("downloaded %s" % filepath)
def make_requests(reqs): successful_responses = 0 failed_responses = 0 for resp in grequests.imap(reqs, stream=False, size=100): if resp.status_code == 200: successful_responses += 1 else: failed_responses += 1 resp.close() return successful_responses, failed_responses
def test_imap_timeout_exception(self): class ExceptionHandler: def __init__(self): self.counter = 0 def callback(self, request, exception): self.counter += 1 eh = ExceptionHandler() reqs = [grequests.get(httpbin('delay/1'), timeout=0.001)] list(grequests.imap(reqs, exception_handler=eh.callback)) self.assertEqual(eh.counter, 1)
def test_imap_timeout_exception_handler_returns_value(self): """ ensure behaviour for a handler that returns a value """ def exception_handler(request, exception): return request reqs = [grequests.get(httpbin('delay/1'), timeout=0.001)] out = [] for r in grequests.imap(reqs, exception_handler=exception_handler): out.append(r) self.assertEquals(out, [])
def grequests_retry(): s = requests.Session() retries = Retry(total=5, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504], raise_on_redirect=True, raise_on_status=True) s.mount('http://', HTTPAdapter(max_retries=retries)) s.mount('https://', HTTPAdapter(max_retries=retries)) tasks = (grequests.get(url, session=s) for url in urls) resp = grequests.imap(tasks, size=10) for r in resp: print(r.status_code) print(r.text)
def test_imap_timeout_no_exception_handler(self): """ compliance with existing 0.2.0 behaviour """ reqs = [grequests.get(httpbin('delay/1'), timeout=0.001)] out = [] try: for r in grequests.imap(reqs): out.append(r) except Timeout: pass self.assertEquals(out, [])
def getIMDBPages(ListIDs): ''' Returns image counts by querying the relevant IMDB pages. ''' requests = (grequests.get(imagePageTemplate.format(ID)) for ID in ListIDs) # send out requests 30 at a time, get them back in generator expression. responses = grequests.imap(requests, size = 30) # get image counts and other information data = [getImageCounts(response) for response in responses] # serialise result as JSON, and return return json.dumps(data)
def query_gerrit(template, change_ids, repo_name): """query gerrit.""" queries = [] template = "https://review.openstack.org" + template for change_id in change_ids: # ChangeIDs can be used in multiple branches/repos patch_id = urllib.quote_plus("%s~master~" % repo_name) + change_id queries.append(template % patch_id) unsent = (grequests.get(query) for query in queries) for r in grequests.imap(unsent, size=10): try: yield json.loads(r.text[4:]) except AttributeError: # request must have failed, ignore it and move on logger.debug("failed to parse gerrit response") pass
def startRetrieve(params, session=Session(), callback=None): page_1 = session.get(SEARCH_URL, params=params) first_json = page_1.json() total_items = first_json["totalItems"] pages = int(math.ceil(float(total_items)/ITEMS_PER_PAGE)) if(pages > MAX_PAGES): pages = MAX_PAGES reqs = [] resps = [] resps.extend(first_json["items"]) for page in range(2,pages+1): #params["page"] = str(page_num) reqs.append(grequests.request('GET', SEARCH_URL+"page={}".format(page), timeout=TIMEOUT, params=params, session=session)) for resp in grequests.imap(reqs, False, REQ_THREADS, exception_handler=exception_handler): current_app.logger.debug("Requesting data from %s", resp.request.path_url) resps.extend(resp.json()["items"]) return resps
def scrape_archives(self): pages = self.get_archive_urls() rs = (grequests.get(u, session=self.session) for u in pages) all_show_notes = {} for result in grequests.imap(rs): print("On URL ->", result.url) doc = html.fromstring(result.content) all_show_notes[result.url.split("/")[-1]] = { "title": doc.cssselect("header h2 a")[0].text_content(), "notes": doc.cssselect(".post-contents.cf p")[1].text_content().strip(), "date": "{} {}".format( doc.cssselect(".date .day")[0].text_content(), doc.cssselect(".date .month")[0].text_content() ) } with open("show_notes.json", "w") as show_notes_file: show_notes_file.write(json.dumps(all_show_notes)) print("Done and saved!")
def get_all(task,view,filter_dict,page_size=2500,request_limit=20): result = [] c = count(task,view,filter_dict) total_pages = math.ceil(c/page_size) url = 'https://proteomics2.ucsd.edu/ProteoSAFe/QueryResult' params = [ ('task', task), ('file', find_file_name(task,view)), ('query', encode_all_filters(filter_dict)) ] rs = ( grequests.get(url, params = OrderedDict(params + [('pageSize', page_size),('offset', page_offset * page_size)])) for page_offset in range(0,total_pages) ) all_responses = [] for l in grequests.imap(rs,size=request_limit): all_responses += l.json()['row_data'] return all_responses
def sweap(mp,urldict,urllist): rs=(grequests.get(u) for u in urllist) for r in grequests.imap(rs): fd=get_info(r) if not fd:continue finfo=urldict.get(r.url,None) if not finfo:continue print finfo.msg logging.info(finfo.msg) fd['country']=finfo.country fd['mediatype']=finfo.mediatype fd['fieldtype']=finfo.fieldtype fd['fieldinfo']=finfo.fieldinfo fd['is_meta_stored']=False sv=SaveFeed(mp,fd) msg='end sweap dictsize=%d listsize=%d' % (len(urldict),len(urllist)) print msg logging.info(msg)
def do_requests(data, results, limit=None): if limit: total = limit else: total = len(data) i = 0 for resp in grequests.imap(RequestsGenerator(data), stream=True): i += 1 sys.stdout.write("\rCompleted %4d/%-4d [%-20s] %0d%% " % ( i, total, "=" * (int((i / total) * 20)), i * 100 / total)) sys.stdout.flush() add_result(results, data, resp) if limit and limit == i: return sys.stdout.write("\n") sys.stdout.flush()
def get_threds(): #get the threds of a particular board print('------4chan Word Frequency Experiment------\nNOTE: These posts are from an online forum, and as such\nare NOT censored. Use at your own risk!\n---What This Is---\nThis script counts the number of occurances of any particular\nword in a board on 4chan, and returns a descending list\nof those word frequencies. It currently ignores some\n(but not all!) common words.') which_thred = input("Please input the thread symbol (e.g., sci, g, or vg): ") thred_nums = json.loads(requests.get('https://a.4cdn.org/'+which_thred+'/threads.json').text) num_th = 0 all_threads = [] for q in thred_nums: num_th +=1 for r in q['threads']: all_threads.append(r['no']) thred_base = 'https://a.4cdn.org/'+which_thred+'/thread/' print(str(all_threads)) # this has somthing to do with a concept called 'deferred' ('promises' in JS). # Put simply, it has to wait for ALL the approx. 150 or so responses to # return before it can continue. We basically create an array of http reqs # with the line below, and then say "wait till they all get back" with # grequests.map(reqs) reqs = (grequests.get(thred_base+str(url)+'.json',timeout=10) for url in all_threads) rez = grequests.imap(reqs,exception_handler=exception_handler) txt = '' thred_count = 0 print('Beginning thread concatenization') for r in rez: thred_count += 1 try: coms = json.loads(r.text)['posts'] for n in coms: try: txt+=n['com'] except: txt+='' except: txt+='' print('Done thread #'+str(thred_count)) # got all txt. Now clean it! clean_txt = clean(txt) #clean the text to remove unprintable chars no_html_txt = strip_tags(clean_txt) #remove HTML tags, since those are not part of the posted data no_link_txt = reg.sub(r'^https?:\/\/.*[\r\n]*', '', no_html_txt)#remove links (mostly) no_quote_txt = reg.sub('>>\d{4,}|>+|>>\d{4,}',' ',no_link_txt) #remove 4chan 'quotes', such as >>blahblah unwanted_symbs = [">",">","[^a-zA-Z0-9']"] for q in range(0,len(unwanted_symbs)): no_quote_txt = reg.sub(unwanted_symbs[q],' ',no_quote_txt) count_words(no_quote_txt.lower())
def btc_e(assets): r = requests.get('https://btc-e.com/api/3/info').json() urls=[] pairs = [] for k, v in r['pairs'].items(): k1, k2 = k.upper().split("_") if k1 in assets and k2 in assets: pairs.append(k) urls.append('https://btc-e.com/api/3/ticker/' + k) def item(r): k,v = r.popitem() k1, k2 = k.upper().split("_") return {'from': k1, 'to': k2, 'bid': v['buy'], 'ask': v['sell'], 'last': v['last']} return [item(x.json()) for x in \ grequests.imap([grequests.get(u) for u in urls])]
def anx(assets): retval = [] urls = [] pairs = [] resp = requests.get('https://anxpro.com/api/3/currencyStatic').json() for k, v in resp['currencyStatic']['currencyPairs'].items(): k1 = v['tradedCcy'] k2 = v['settlementCcy'] if k1 in assets and k2 in assets: pairs.append([k1, k2]) urls.append('https://anxpro.com/api/2/%s/money/ticker' % k) def item(r): return {'from': r['vol']['currency'], 'to': r['last']['currency'], 'bid': float(r['buy']['value']), 'ask': float(r['sell']['value']), 'last': float(r['last']['value'])} return [item(i.json()['data']) \ for i in grequests.imap([grequests.get(u) for u in urls])]
def download_images(wnidfile, folder, n_images): def make_name(wnid, url): filename = url.encode("ascii", "ignore").replace("/","_") return os.path.join(folder, wnid, filename) URL = "http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={}" wnids = [l.strip().split()[0] for l in open(wnidfile)] random.shuffle(wnids) session = requests.Session() for wnid in wnids: try: os.makedirs(os.path.join(folder, wnid)) except os.error: pass res = requests.get(URL.format(wnid)) urls = [_.strip() for _ in res.text.split("\n")] urls = [u for u in urls if u] jobs = [grequests.get(url, session=session, timeout=5) for url in urls if not os.path.exists(make_name(wnid, url)) ] n_already_have = (len(urls) - len(jobs)) N = max(min(n_images, len(urls)) - n_already_have, 0) print("getting %s, (have %d, need %d) (%d/%d)" % (wnid, n_already_have, N, wnids.index(wnid)+1, len(wnids))) if N == 0: continue curr = 0 pbar = tqdm(total=len(jobs)) for res in grequests.imap(jobs, size=50): if curr >= N: print("got %d" % curr) break pbar.update() if "unavailable" in res.url: continue try: im = Image.open(StringIO(res.content)) if im.width < 128 or im.height < 128: continue im.save(make_name(wnid, res.url)) curr += 1 except IOError: continue except Exception as e: # print("caught exception: %s" % e) continue
def async_check_urls(url_list, request_size=128): d = {'err': []} greq = grequests.imap( (grequests.get( 'http://' + url, timeout=(10, 10)) for url in url_list), size=request_size) while True: try: res = next(greq) except StopIteration: break except: d['err'].append(res.url) else: try: d[res.status_code].append(res.url) except KeyError: d[res.status_code] = [res.url] return d
def run_command(url='', org='', account='', key='', command='', agent_list='', **kwargs): plugin_content = '#!/usr/bin/env bash \n' + command requests = (grequests.post( utils.build_api_url(url, org, account, endpoint='rpc' + '/run'), data={ 'name': 'temp.sh', 'agent': agent_id, 'content': base64.b64encode(plugin_content), 'encoding': 'base64', 'params': '', 'type': 'SCRIPT' }, callback=set_meta(agent_id), headers={'Authorization': "Bearer " + key}) for agent_id in agent_list) data = [] for resp in grequests.imap(requests, size=10): data.append([resp.meta, resp.json()]) return data
def main(*args): urls = [ 'http://localhost:8000/test?timeout=10&name=req1', 'http://localhost:8001/test?timeout=9&name=req2', 'http://localhost:8002/test?timeout=8&name=req3', 'http://localhost:8003/test?timeout=7&name=req4', 'http://localhost:8004/test?timeout=6&name=req5', 'http://localhost:8004/test?timeout=5&name=req6', 'http://localhost:8003/test?timeout=4&name=req7', 'http://localhost:8002/test?timeout=3&name=req8', 'http://localhost:8001/test?timeout=2&name=req9', 'http://localhost:8000/test?timeout=1&name=req0', ] print datetime.datetime.now() rs = (grequests.get(u) for u in urls) for res in grequests.imap(rs, size=10): print datetime.datetime.now() print res.text print datetime.datetime.now()