def test_as_completed_invalid_args(self): fut = asyncio.Future(loop=self.loop) # as_completed() expects a list of futures, not a future instance self.assertRaises(TypeError, self.loop.run_until_complete, asyncio.as_completed(fut, loop=self.loop)) self.assertRaises(TypeError, self.loop.run_until_complete, asyncio.as_completed(coroutine_function(), loop=self.loop))
async def do_loop(): nums = range(5) a_futs = [do_limitted_task_a(i) for i in nums] b_futs = [] for fut in asyncio.as_completed(a_futs): j = await fut b_futs.append(do_limitted_task_b(j)) for fut in asyncio.as_completed(b_futs): print(await fut)
def process_search(client, queries, with_progress_bar=False, description=None): """Process the search for all queries.""" coroutines = [get_search_result(client, query) for query in queries] results = [] if with_progress_bar: for coroutine in tqdm.tqdm(asyncio.as_completed(coroutines), desc=description, total=len(coroutines)): content = yield from coroutine results.append(content) else: for coroutine in asyncio.as_completed(coroutines): content = yield from coroutine results.append(content) return results
async def find_node(self, remoteId): # Check if node is already in Route alpha = self.config["query"]["alpha"] queryNode = Node(remoteId) def get_findNode_future(node, id): return self.tcpService.call.findNode( node.remote, remoteId ) longest_distance_list = utils.DelayList([2 ** 160]) nodes_to_ping = {} for distance, node in self.route.findNeighbors(queryNode)[:alpha]: if distance == 0: return node nodes_to_ping[node.id] = node nodes_queried = [] while True: if not(nodes_to_ping): return None longest_distance = longest_distance_list.__next__() node_to_query = list(nodes_to_ping.items())[:alpha] commands = [ get_findNode_future(node, remoteId) for (key, node) in node_to_query ] nodes_queried.extend([key for key, node in node_to_query]) nodes_to_ping.clear() futures = [] for f in asyncio.as_completed(commands): futures.append(await f) __longest_distance = 2 ** 160 for f in asyncio.as_completed(futures): _remoteId, count, remoteNodes = (await f)["data"]["data"] for remoteNode in remoteNodes: if remoteNode.id == remoteId: return remoteNode if remoteNode.distance(queryNode.hash) <= longest_distance and not(remoteNode.id in nodes_queried): nodes_to_ping[remoteNode.id] = remoteNode __longest_distance = min( __longest_distance, remoteNode.distance(remoteNode.hash) ) longest_distance_list.data.append(__longest_distance)
async def fetch_cluster_info(self): logger.info('Loading cluster info from {}...'.format(self._nodes)) tasks = [ asyncio.ensure_future( self._get_raw_cluster_info_from_node(node), loop=self._loop ) for node in self._nodes ] try: for task in asyncio.as_completed(tasks, loop=self._loop): try: nodes_raw_response = list(await task) self._cluster_manager = ClusterNodesManager.create( nodes_raw_response ) logger.info('Cluster info loaded successfully: %s', nodes_raw_response) return except (ReplyError, ProtocolError, ConnectionError, OSError) as exc: logger.warning( "Loading cluster info from a node failed with {}" .format(repr(exc)) ) finally: for task in tasks: task.cancel() # Wait until all tasks have closed their connection await asyncio.gather( *tasks, loop=self._loop, return_exceptions=True) raise RedisClusterError( "No cluster info could be loaded from any host")
def downloader_coro(cc_list, base_url, verbose, concur_req): counter = collections.Counter() semaphore = asyncio.Semaphore(concur_req) to_do = [download_one(cc, base_url, semaphore, verbose) for cc in sorted(cc_list)] to_do_iter = asyncio.as_completed(to_do) if not verbose: to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) for future in to_do_iter: try: res = yield from future except FetchError as exc: country_code = exc.country_code try: error_msg = exc.__cause__.args[0] except IndexError: error_msg = exc.__cause__.__class__.__name__ else: error_msg = '' status = res.status if error_msg: status = HTTPStatus.error counter[status] += 1 if verbose and error_msg: msg = '*** Error for {}: {}' print(msg.format(country_code, error_msg)) return counter
def downloader_coro(cc_list, base_url, verbose, concur_req): #1 counter = collections.Counter() semaphore = asyncio.Semaphore(concur_req) #2 to_do = [download_one(cc, base_url, semaphore, verbose) for cc in sorted(cc_list)] #3 to_do_iter = asyncio.as_completed(to_do) #4 if not verbose: to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) #5 for future in to_do_iter: #6 try: res = yield from future #7 except FetchError as exc: #8 country_code = exc.country_code #9 try: error_msg = exc.__cause__.args[0] #10 except IndexError: error_msg = exc.__cause__.__class__.__name__ #11 if verbose and error_msg: msg = '*** Error for {}: {}' print(msg.format(country_code, error_msg)) status = HTTPStatus.error else: status = res.status counter[status] += 1 #12 return counter #13
def download_coro(names, url, verbose, concur_req): semaphore = asyncio.Semaphore(concur_req) to_do = [download_one(name, url, semaphore, verbose) for name in sorted(names)] to_do_iter = asyncio.as_completed(to_do) if verbose: to_do_iter = tqdm.tqdm(to_do_iter, total=len(names)) for future in to_do_iter: try: yield from future except CustomError as exc: country = exc.country try: error_msg = exc.__cause__.args[0] except IndexError: error_msg = exc.__cause__.__class__.__name__ if verbose and error_msg: msg = '*** Error for {}: {}' print(msg.format(country, error_msg)) return len(names)
async def run(self, tree): """ This is the main "run" method which will run our main worker methods -- worker_async -- concurrently in a ThreadPoolExecutor. We then get the results of the meson extractions, and create new MergeSteps for generating the appropriate ebuilds using templates, and run them. :return: None """ all_meta_pkg_ebuilds = list(glob(tree.root + "/x11-base/xorg-proto/xorg-proto-*.ebuild")) futures =[ self.loop.run_in_executor(self.cpu_bound_executor, self.run_async_in_executor, self.worker_async, meta_pkg_ebuild_path) for meta_pkg_ebuild_path in all_meta_pkg_ebuilds ] meta_mappings = defaultdict(set) for future in asyncio.as_completed(futures): new_meta_mappings = await future for key, new_set in new_meta_mappings.items(): meta_mappings[key] |= new_set for pv_key, all_meta_atoms in meta_mappings.items(): pkg, ver = pv_key all_meta_atoms = sorted(list(all_meta_atoms)) output_ebuild = tree.root + "/x11-proto/%s/%s-%s.ebuild" % (pkg, pkg, ver) output_dir = os.path.dirname(output_ebuild) if not os.path.exists(output_dir): os.makedirs(output_dir) step = CreateEbuildFromTemplate( template_text=self.template_text, template_params={ "all_meta_atoms" : all_meta_atoms }, file_subpath = "x11-proto/%s/%s-%s.ebuild" % ( pkg, pkg, ver ) ) await step.run(tree) self.collector.cpm_logger.record(tree.name, get_catpkg_from_ebuild_path(output_ebuild), is_fixup=True)
def _single_election(self): # Setup vote_count = 0 term = self.current_term self.voted_for = self.candidate_id # Send requests to all peers futures = [] for peer in self.peers: vote_f = peer.request_vote(term, self.candidate_id, self.log.last_index, self.log.term) futures.append(vote_f) # Get results as they come in for f in asyncio.as_completed(futures): vote_msg = yield from f count, stop = self._election_counting(vote_msg, term, vote_count) vote_count += count if stop: break else: # Got all results and didn't become leader. Sleep until # the end of the current term, then loop around and try # again diff = self.now() - self.current_term_end if diff > timedelta(seconds=0) yield asyncio.sleep(diff.total_seconds())
async def main(urls, *, url_finders=None): fixup() from .extrafinders import GithubFinder, GithubUserFinder, ZhihuZhuanlan, NeteaseMusic if not url_finders: url_finders = (GithubFinder, GithubUserFinder, ZhihuZhuanlan, NeteaseMusic) try: from nicelogger import enable_pretty_logging enable_pretty_logging() except ImportError: pass futures = [ TitleFetcher(url, url_finders=url_finders).run() for url in urls ] for fu in asyncio.as_completed(futures): try: result = await fu except Exception: logger.exception('an error occurred') continue info = result.info urls = result.url_visited status_code = result.status_code url = ' <- '.join(reversed(urls)) logger.info('done: [%d] %s <- %s', status_code, info, url)
def as_completed(self): # as_completed requires a list of futures, not a generator fs = list(self) # returns an iterator of futures that will only resolve once iterator = asyncio.as_completed(fs) # Return a new instance of this class so you can piggy back .gather or .wait and such return self.__class__(iterator)
async def main(): url, headers = parse_headers_from_stdin() cookies = parse_cookies(headers.get('cookie', '')) futs = [ try_cookie_combinations(url, headers, cookies), try_cookie_removal(url, headers, cookies), ] all_results = [] for result_co in asyncio.as_completed(futs): results = await result_co all_results += results if not results: break min_len = min(len(res) for res in results) if min_len < 2: break asyncio.gather(*futs).cancel() if not all_results: print("No combinations found") return min_len = min(len(res) for res in all_results) valid_cookie_combos = filter(lambda res: len(res) == min_len, all_results) for combo in valid_cookie_combos: print('Found valid combination with cookie names:', ', '.join([ck.partition('=')[0] for ck in combo]))
async def _run(coros): for future in asyncio.as_completed(coros): try: result = await future except Exception as e: log.warning('{}: {}', e.__class__.__name__, str(e)) traceback.print_exc()
def as_completed(self, *, loop, **kwargs): try: # We are proxying the iterator that as_completed returns. # And wrapping it in a try/finally block. yield from asyncio.as_completed(self.all_tasks, loop=loop, **kwargs) finally: self.check_tasks()
def process_sources_for_status(client, sources,): g_status = [] coroutines = [retrieve_status(client, source) for source in sources] for coroutine in asyncio.as_completed(coroutines): status = yield from coroutine g_status.append(status) return sorted(g_status, key=lambda x: x[0].nick)
def downloader_coro(star_id, verbose, concur_req, counter): semaphore = asyncio.Semaphore(concur_req) urls = [] with open(os.path.join(STAR_PHOTO_LIST_DIR, star_id), 'r') as f: for line in f: if len(line.strip()) > 0: urls.append(line.strip()) if len(urls) > 0: if not os.path.exists(os.path.join(STAR_PHOTO_DIR, star_id)): os.mkdir(os.path.join(STAR_PHOTO_DIR, star_id)) to_download = [download_one(star_id, url, semaphore, False) for url in sorted(urls)] to_download_iter = asyncio.as_completed(to_download) for future in to_download_iter: try: res = yield from future except FetchError as exc: star_id = exc.star_id url = exc.url try: error_msg = exc.__cause__.args[0] except IndexError: error_msg = exc.__cause__.__class__.__name__ status = HTTPStatus.error else: status = res.status counter[status] += 1 return counter
def process_sources_for_file(client, sources, limit, cache=None): g_tweets = [] coroutines = [retrieve_file(client, source, limit, cache) for source in sources] for coroutine in asyncio.as_completed(coroutines): tweets = yield from coroutine g_tweets.extend(tweets) return sorted(g_tweets, reverse=True)[:limit]
async def _grab(self, types=None, check=False): def _get_tasks(by=MAX_CONCURRENT_PROVIDERS): providers = [pr for pr in self._providers if not types or not pr.proto or bool(pr.proto & types.keys())] while providers: tasks = [asyncio.ensure_future(pr.get_proxies()) for pr in providers[:by]] del providers[:by] self._all_tasks.extend(tasks) yield tasks log.debug('Start grabbing proxies') while True: for tasks in _get_tasks(): for task in asyncio.as_completed(tasks): proxies = await task for proxy in proxies: await self._handle(proxy, check=check) log.debug('Grab cycle is complete') if self._server: log.debug('fall asleep for %d seconds' % GRAB_PAUSE) await asyncio.sleep(GRAB_PAUSE) log.debug('awaked') else: break await self._on_check.join() self._done()
async def url2urls(self, url): exp = r'''<a href\s*=\s*['"]([^'"]*\.\w+/\d{4}/\d{2}/[^'"#]*)['"]>''' urls = set() for task in asyncio.as_completed([self.get("http://%s/" % d) for d in self.domains]): page = await task urls.union(set(re.findall(exp, page))) return list(urls)
async def quote_many(num_quotes=1, conn_limit=20, progress=None, step=10): if progress is None: progress = ProgressBar() progress.max = num_quotes // step logger.info('Process total %d quotes with max %d concurrent connections' % (num_quotes, conn_limit)) logger.debug('... progress bar increment step size: %d coroutines' % step) semaphore = asyncio.Semaphore(conn_limit) coro_to_fut = asyncio.ensure_future futures = [ coro_to_fut(quote_with_lock(semaphore)) for i in range(num_quotes) ] t_start = datetime.today() for ith, fut in enumerate(asyncio.as_completed(futures), 1): if ith % step == 0: progress.next() await fut t_end = datetime.today() progress.finish() logger.info('All coroutines complete in {:.2f} seconds'.format( (t_end - t_start).total_seconds() )) quotes = [fut.result() for fut in futures] return quotes
async def downloader_coro(cc_list, base_url, verbose, concur_req): # <1> counter = collections.Counter() semaphore = asyncio.Semaphore(concur_req) # <2> to_do = [download_one(cc, base_url, semaphore, verbose) for cc in sorted(cc_list)] # <3> to_do_iter = asyncio.as_completed(to_do) # <4> if not verbose: to_do_iter = tqdm.tqdm(to_do_iter, total=len(cc_list)) # <5> for future in to_do_iter: # <6> try: res = await future # <7> except FetchError as exc: # <8> country_code = exc.country_code # <9> try: error_msg = exc.__cause__.args[0] # <10> except IndexError: error_msg = exc.__cause__.__class__.__name__ # <11> if verbose and error_msg: msg = '*** Error for {}: {}' print(msg.format(country_code, error_msg)) status = HTTPStatus.error else: status = res.status counter[status] += 1 # <12> return counter # <13>
def downloader_coro(cc_list): to_do = [download_one(cc) for cc in cc_list] results = [] for future in asyncio.as_completed(to_do): print(future) result = yield from future results.append(result) return results
async def store(self, key, value, cached = True): def get_store_future(node): return self.tcpService.call.store( node.remote, key, value ) queryNode = Node(key) futures = [] commands = [get_store_future(node) for distance, node in self.route.findNeighbors(queryNode)] for f in asyncio.as_completed(commands): futures.append(await f) for f in asyncio.as_completed(futures): await f if cached: await self.storage.store(key, value) return True
async def main(): start = time.time() futures = [fetch_async(i) for i in range(1, MAX_CLIENTS + 1)] for i, future in enumerate(asyncio.as_completed(futures)): result = await future print('{} {}'.format(">>" * (i + 1), result)) print("Process took: {:.2f} seconds".format(time.time() - start))
async def check(self): if self.oldver: self.oldvers = read_verfile(self.oldver) else: self.oldvers = {} self.curvers = self.oldvers.copy() tries = self.tries token_q = asyncio.Queue(maxsize=self.max_concurrent) for _ in range(self.max_concurrent): await token_q.put(True) async def worker(name, conf): await token_q.get() try: for i in range(tries): try: ret = await get_version( name, conf, keyman=self.keymanager) return name, ret except Exception as e: if i + 1 < tries: logger.warning('failed, retrying', name=name, exc_info=e) await asyncio.sleep(i) else: return name, e finally: await token_q.put(True) config = self.config futures = [] for name in config.sections(): if name == '__config__': continue conf = config[name] conf['oldver'] = self.oldvers.get(name, None) fu = asyncio.ensure_future(worker(name, conf)) futures.append(fu) for fu in asyncio.as_completed(futures): name, result = await fu if isinstance(result, Exception): logger.error('unexpected error happened', name=name, exc_info=result) self.on_exception(name, result) elif result is not None: self.print_version_update(name, result) else: conf = config[name] if not conf.getboolean('missing_ok', False): logger.warn('no-result', name=name) self.on_no_result(name) if self.newver: write_verfile(self.newver, self.curvers)
async def get_results_as_completed(tasks): '''Execution is asynchronous with each task result returned returned to this function as soon as it is ready, in order of speed of execution.''' results = [] for fut in asyncio.as_completed(tasks): result = await fut print('Usable:', result) results.append(result) return results
def trigger(self, event): log.debug("Received event: %s", event) listeners = self._listeners[event] for future in asyncio.as_completed( [callback(evant) for callback, event_type in listeners if self.matches(event, event_type)]): result = yield from future self.dispatch(result)
def foo(): values = [] for f in asyncio.as_completed([a, b], timeout=0.12, loop=loop): try: v = yield from f values.append((1, v)) except asyncio.TimeoutError as exc: values.append((2, exc)) return values
def run_experiment(base_url, num_iter=500): urls = generate_urls(base_url, num_iter) http_client = chunked_http_client(100) tasks = [http_client(url) for url in urls] responses_sum = 0 for future in asyncio.as_completed(tasks): data = yield from future responses_sum += len(data) return responses_sum
async def run(self): """ DockerFirstStage. """ future_manifests = [] tag_list = [] to_download = [] man_dcs = {} total_blobs = [] with ProgressBar(message='Downloading tag list', total=1) as pb: repo_name = self.remote.namespaced_upstream_name relative_url = '/v2/{name}/tags/list'.format(name=repo_name) tag_list_url = urljoin(self.remote.url, relative_url) list_downloader = self.remote.get_downloader(url=tag_list_url) await list_downloader.run(extra_data={'repo_name': repo_name}) with open(list_downloader.path) as tags_raw: tags_dict = json.loads(tags_raw.read()) tag_list = tags_dict['tags'] # check for the presence of the pagination link header link = list_downloader.response_headers.get('Link') await self.handle_pagination(link, repo_name, tag_list) whitelist_tags = self.remote.whitelist_tags if whitelist_tags: tag_list = list(set(tag_list) & set(whitelist_tags.split(','))) pb.increment() for tag_name in tag_list: relative_url = '/v2/{name}/manifests/{tag}'.format( name=self.remote.namespaced_upstream_name, tag=tag_name, ) url = urljoin(self.remote.url, relative_url) downloader = self.remote.get_downloader(url=url) to_download.append( downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS})) pb_parsed_tags = ProgressBar(message='Processing Tags', state='running') for download_tag in asyncio.as_completed(to_download): tag = await download_tag with open(tag.path, 'rb') as content_file: raw_data = content_file.read() content_data = json.loads(raw_data) media_type = content_data.get('mediaType') tag.artifact_attributes['file'] = tag.path saved_artifact = Artifact(**tag.artifact_attributes) try: saved_artifact.save() except IntegrityError: del tag.artifact_attributes['file'] saved_artifact = Artifact.objects.get( **tag.artifact_attributes) tag_dc = self.create_tag(saved_artifact, tag.url) if media_type == MEDIA_TYPE.MANIFEST_LIST: list_dc = self.create_tagged_manifest_list( tag_dc, content_data) await self.put(list_dc) tag_dc.extra_data['man_relation'] = list_dc for manifest_data in content_data.get('manifests'): man_dc = self.create_manifest(list_dc, manifest_data) future_manifests.append(man_dc.get_or_create_future()) man_dcs[man_dc.content.digest] = man_dc await self.put(man_dc) else: man_dc = self.create_tagged_manifest(tag_dc, content_data, raw_data) await self.put(man_dc) tag_dc.extra_data['man_relation'] = man_dc self.handle_blobs(man_dc, content_data, total_blobs) await self.put(tag_dc) pb_parsed_tags.increment() pb_parsed_tags.state = 'completed' pb_parsed_tags.total = pb_parsed_tags.done pb_parsed_tags.save() for manifest_future in asyncio.as_completed(future_manifests): man = await manifest_future with man._artifacts.get().file.open() as content_file: raw = content_file.read() content_data = json.loads(raw) man_dc = man_dcs[man.digest] self.handle_blobs(man_dc, content_data, total_blobs) for blob in total_blobs: await self.put(blob)
async def main(task_list): for task in asyncio.as_completed(task_list): await task
async def _wait_for_first(futures: Sequence[Awaitable[Any]]) -> None: for future in asyncio.as_completed(futures): # We don't need to catch CancelledError here (and cancel not done futures) # because our callback (above) takes care of that. await cast(Awaitable[Any], future) return
async def query_arm(ctx: Context, args: argparse.Namespace) -> None: logger.info(f"Starting enumeration for ARM - {ctx.cloud['ARM']}") async with SubscriptionClient(ctx.cred_async, base_url=ctx.cloud["ARM"]) as sub_client: async for tenant in sub_client.tenants.list(): tenant_dict = tenant.as_dict() tenant_dict["subscriptions"] = [] logger.info( f"Enumerating subscription and resource groups for tenant {tenant.tenant_id}" ) # GET LIST OF SUBS. sub_list = [] async for subscription in sub_client.subscriptions.list(): if args.subs: if not subscription.subscription_id in args.subs: continue if args.nosubs: if subscription.subscription_id in args.nosubs: continue sub_list.append(subscription) if not sub_list: logger.error(f"No subscriptions found for {tenant.tenant_id}") continue if ctx.cloud["MGMT"]: certsTasks = [ asyncio.create_task(_query_management_certs(ctx, sub)) for sub in sub_list ] certs_output = OUTPUT_FOLDER / f"certs.sqlite" for cert in asyncio.as_completed(*[certsTasks]): if await cert: await sqlite_writer(certs_output, cert) executor = concurrent.futures.ThreadPoolExecutor( max_workers=len(sub_list)) rbacTasks = { executor.submit(_query_rbac, ctx, sub) for sub in sub_list } rbac_output = OUTPUT_FOLDER / f"rbac.sqlite" for rbac in concurrent.futures.as_completed(*[rbacTasks]): if rbac.result(): for role in rbac.result(): await sqlite_writer(rbac_output, role) subTasks = [ asyncio.create_task(_query_subscription(ctx, sub)) for sub in sub_list ] for result in asyncio.as_completed(*[subTasks]): tenant_dict["subscriptions"].append(await result) tenant_output = OUTPUT_FOLDER / f"tenant.sqlite" await sqlite_writer(tenant_output, tenant_dict)
async def _wait_for_first(futures: Sequence[Awaitable[Any]]) -> None: for future in asyncio.as_completed(futures): await cast(Awaitable[Any], future) return
async def task_wait_n(n: int, max_delay: int) -> List[float]: """ Random wait time. """ tasks = [task_wait_random(max_delay) for _ in range(n)] return [await task for task in asyncio.as_completed(tasks)]
async def cli_fetch(args): """ *** fetch *** Usage ----- f[etch] refno[...] Description ----------- Attempts to find the URL, and download, the full text PDF for the specified refnos. For more information on how to specify refnos, type 'h list'. The heuristics used are hardcoded, so are not guaranteed to work on every DOI, and indeed even those that work now may break later. But the major publishers all work (for now). Supported publishers are: ACS, Wiley, Elsevier, Nature, Science, Springer, Taylor and Francis, and Annual Reviews (as of 27 May 2020). Note that in order to download the full-text PDF, institutional access must be enabled, e.g. via VPN. (Or, of course, the PDF must be open-access.) """ # Argument parsing if _g.articleList == []: return _error("fetch: no articles have been loaded") if args == []: return _error("fetch: no references selected") try: refnos = parse_refnos(args) except ArgumentError as e: return _error(f"fetch: {str(e)}") if len(refnos) == 0: return _error("fetch: no references selected") # Check which ones need downloading articles_to_fetch = [] for refno in refnos: article = _g.articleList[refno - 1] if not article.to_fname("pdf").exists(): articles_to_fetch.append(article) else: print(f"fetch: PDF for ref {refno} already in library") yes, no = 0, 0 if articles_to_fetch == []: return _ret.SUCCESS else: # Construct DOI objects. dois = [DOI(article.doi) for article in articles_to_fetch] async with Spinner(message="Obtaining URLs...", total=len(dois)) as spinner: tasks = [ asyncio.create_task( doi.to_full_pdf_url(client_session=_g.ahSession)) for doi in dois ] # We're just using as_completed() to update the spinner. We aren't # actually retrieving the results from here, because they are not # returned in order. for coro in asyncio.as_completed(tasks): await coro spinner.increment(1) # Now they should all be done, so we can retrieve the results. urls = [task.result() for task in tasks] for article, url in zip(articles_to_fetch, urls): if url == _ret.FAILURE: no += 1 else: x = await article.register_pdf(url, "pdf", _g.ahSession) if x == _ret.FAILURE: no += 1 else: yes += 1 print("fetch: {} PDFs successfully fetched, {} failed".format(yes, no)) return _ret.SUCCESS
async def load_channels(self, load_message=True): async with self.load_channels_lock: if load_message: print( 'Loading feed services... This could take some time depending on the number of feeds.' ) start = datetime.datetime.utcnow() existing_ids = await self._discord_client.loop.run_in_executor( None, self.get_channel_ids_with_feeds) get_channel_tasks = [] try: if existing_ids is not None: for channel_id_with_feed in existing_ids: if self.service.cli_args.startup_debug: # mock a channel and change it to a valid id to test load c_data = { "id": channel_id_with_feed, "type": None, "name": "Startup DEBUG Feed", "position": 1 } first_guild = self._discord_client.guilds[ 0] # pick out the first guild in the list for state population channel_obj = discord.TextChannel( state=None, data=c_data, guild=first_guild) get_channel_tasks.append( self.get_channel_feed(channel_obj)) else: channel_obj = self._discord_client.get_channel( channel_id_with_feed) if channel_obj is not None: get_channel_tasks.append( self.get_channel_feed(channel_obj)) else: async for i in self.__get_text_channels(): get_channel_tasks.append(self.get_channel_feed(i)) except Exception as ex: traceback.print_exc() print(ex) ratio_print = .25 loaded_count = 0 total_load = len(get_channel_tasks) if len(get_channel_tasks) > 0: if load_message: print("Started loading {} feeds.".format(total_load)) for f in asyncio.as_completed(get_channel_tasks): loaded_count += 1 try: await f except Exception as ex: traceback.print_exc() print( "Error when loading a channel feed: Ex: {}".format( ex)) if load_message: if loaded_count > (total_load * ratio_print): print("Loaded {} feeds of {}. {}% done".format( loaded_count, total_load, int(ratio_print * 100))) ratio_print += .25 if load_message: print("Loaded {} feeds in {:.1f} seconds".format( len(get_channel_tasks), (datetime.datetime.utcnow() - start).total_seconds()))
async def run_tasks(tasks): """完成一个即打印一个""" for task in asyncio.as_completed(tasks): res = await task print('completed:', res)
async def create_service(action, redis, stack_instance, to_be_deleted, force_delete, service_name, service): opa_broker_factory = OPABrokerFactory() opa_broker = opa_broker_factory.get_opa_broker() success = True document_manager = get_document_manager() service_doc = document_manager.get_service(service_name) if service_doc.service_policies: logger.debug( f"Evaluating service policies: {service_doc.service_policies}") for svc_policy in service_doc.service_policies: logger.debug(f"Evaluating service policy: {svc_policy}") opa_data = {} opa_data['inputs'] = svc_policy['inputs'] opa_data[ 'functional_requirements'] = service_doc.functional_requirements opa_data['infrastructure_target'] = service.infrastructure_target opa_data['service'] = service_name opa_data['stack_instance'] = stack_instance.dict() policy = document_manager.get_policy_template(svc_policy['name']) # Make sure the policy is in OPA opa_broker.add_policy(policy.name, policy.policy) opa_result = opa_broker.ask_opa_policy_decision( svc_policy['name'], "filter", opa_data) logger.debug( f"OPA result for service policy {svc_policy}: {opa_result}") functional_requirements = opa_result['result'] else: functional_requirements = service_doc.functional_requirements if action == "delete": functional_requirements = reversed(functional_requirements) for fr in functional_requirements: fr_doc = document_manager.get_functional_requirement(fr) fr_jobs = [] infrastructure_target = service.infrastructure_target cloud_provider = service.cloud_provider logger.debug( f"Retrieved fr '{fr_doc}' from service_doc '{service_doc}'") invoc = {} invoc['action'] = action invoc['functional_requirement'] = fr invoc['image'] = fr_doc.invocation[cloud_provider].image invoc['before_command'] = fr_doc.invocation[ cloud_provider].before_command invoc['infrastructure_target'] = infrastructure_target invoc['stack_instance'] = stack_instance.name tool = fr_doc.invocation[cloud_provider].tool invoc['tool'] = tool if tool.lower() == "ansible": if fr_doc.invocation[cloud_provider].playbook_path is not None: invoc['playbook_path'] = fr_doc.invocation[ cloud_provider].playbook_path if fr_doc.invocation[cloud_provider].serial: invoc['serial'] = fr_doc.invocation[cloud_provider].serial invoc['service'] = service_name invoc["hosts"] = service.hosts logger.debug("Appending job") job = await redis.enqueue_job("invoke_automation", invoc, _queue_name=service.agent) fr_jobs.append(asyncio.create_task(job.result(timeout=7200))) if fr_doc.as_group: logger.debug("running as group") break for fr_job in asyncio.as_completed(fr_jobs): automation_result = await fr_job await update_status(automation_result, stack_instance, action, to_be_deleted) if automation_result["status"] == "FAILED": success = False if not force_delete and not success: logger.debug("Not all fr's succeeded, stopping execution") break logger.debug("tasks executed") return success
async def _runner(self): futures = [self._request(i) for i in self.jobs] for i, future in enumerate(asyncio.as_completed(futures)): results = await future self.results += results
async def walk_step_tasks(dirs): tasks = task_list(dirs) await asyncio.sleep(0) for finished_tasks in asyncio.as_completed(tasks): yield finished_tasks
def process_as_results_come_in(): coroutines = [get_url(url) for url in p.make_url_list()] resp_lis = [] for coroutine in asyncio.as_completed(coroutines): resp = yield from coroutine print(resp.text)
async def _wait_for_first(futures): for future in asyncio.as_completed(futures): await cast(asyncio.Future, future) return
async def as_completed(): print("asyncio.as_completed start") for coro in asyncio.as_completed([http_call(x) for x in range(1, 4)]): print(await coro) print("asyncio.as_completed end")
async def main(coros): for i in asyncio.as_completed(coros): print(await i)
async def wait_with_progress(coros: list, desc: str = None, unit: str = None): for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros), desc=desc, unit=unit): yield await f
async def main() -> None: parser = argparse.ArgumentParser( description="Runs CP2K regression test suite.") parser.add_argument("--mpiranks", type=int, default=2) parser.add_argument("--ompthreads", type=int, default=2) parser.add_argument("--maxtasks", type=int, default=os.cpu_count()) parser.add_argument("--timeout", type=int, default=400) parser.add_argument("--maxerrors", type=int, default=50) parser.add_argument("--no-keep-alive", dest="keep_alive", action="store_false") parser.add_argument("--debug", action="store_true") parser.add_argument("--restrictdir", action="append") parser.add_argument("arch") parser.add_argument("version") cfg = Config(parser.parse_args()) print( "************************** testing started *****************************" ) start_time = time.perf_counter() # Query CP2K binary for feature flags. version_output, _ = await (await cfg.launch_exe("cp2k", "--version")).communicate() flags_line = re.search(r" cp2kflags:(.*)\n", version_output.decode("utf8")) if not flags_line: print( version_output.decode("utf8") + "\nCould not parse feature flags.") sys.exit(1) else: flags = flags_line.group(1).split() print( "\n-------------------------- Settings ------------------------------------" ) print(f"MPI ranks: {cfg.mpiranks}") print(f"OpenMP threads: {cfg.ompthreads}") print(f"GPU devices: {cfg.num_gpus}") print(f"Workers: {cfg.num_workers}") print(f"Timeout [s]: {cfg.timeout}") print(f"Work base dir: {cfg.work_base_dir}") print(f"Keep alive: {cfg.keep_alive}") print(f"Debug: {cfg.debug}") print(f"ARCH: {cfg.arch}") print(f"VERSION: {cfg.version}") print(f"Flags: " + ",".join(flags)) # Have to copy everything upfront because the test dirs are not self-contained. print( "\n------------------------------------------------------------------------" ) print("Copying test files...", end="") shutil.copytree(cfg.cp2k_root / "tests", cfg.work_base_dir) print("done") # Discover unit tests. unittest_batch = Batch("UNIT", cfg) unittest_batch.workdir.mkdir() unittest_glob = (cfg.cp2k_root / "exe" / cfg.arch).glob(f"*_unittest.{cfg.version}") for exe in unittest_glob: unittest_batch.unittests.append( Unittest(exe.stem, unittest_batch.workdir)) # Read TEST_TYPES. test_types_fn = cfg.cp2k_root / "tests" / "TEST_TYPES" test_types: List[Optional[TestType]] = [None] # test type zero lines = test_types_fn.read_text(encoding="utf8").split("\n") test_types += [TestType(l) for l in lines[1:int(lines[0]) + 1]] # Read TEST_DIRS. batches: List[Batch] = [unittest_batch] test_dirs_fn = cfg.cp2k_root / "tests" / "TEST_DIRS" for line in test_dirs_fn.read_text(encoding="utf8").split("\n"): line = line.split("#", 1)[0].strip() if not line: continue batch = Batch(line, cfg) # Read TEST_FILES. test_files_fn = Path(batch.src_dir / "TEST_FILES") for line in test_files_fn.read_text(encoding="utf8").split("\n"): line = line.split("#", 1)[0].strip() if not line: continue batch.regtests.append(Regtest(line, test_types, batch.workdir)) batches.append(batch) # Create async tasks. tasks = [] num_restrictdirs = 0 for batch in batches: if not batch.requirements_satisfied(flags, cfg.mpiranks): print( f"Skipping {batch.name} because its requirements are not satisfied." ) elif not any(re.match(p, batch.name) for p in cfg.restrictdirs): num_restrictdirs += 1 elif batch.name in ALLWAYS_SKIP_DIRS: print( f"Skipping {batch.name} because it doesn't work with the cp2k shell." ) else: tasks.append(asyncio.create_task(run_batch(batch, cfg))) # launch if num_restrictdirs: print( f"Skipping {num_restrictdirs} test directories because of --restrictdir." ) if not tasks: print("\nNo test directories selected, check --restrictdir filter.") sys.exit(1) # Wait for tasks to finish and print their results. print( f"Launched {len(tasks)} test directories and {cfg.num_workers} worker...\n" ) all_results: List[TestResult] = [] with open(cfg.error_summary, "wt", encoding="utf8", errors="replace") as err_fh: for num_done, task in enumerate(asyncio.as_completed(tasks)): batch_result = await task all_results += batch_result.results print(f">>> {batch_result.batch.workdir}") print("\n".join(str(r) for r in batch_result.results)) print(f"<<< {batch_result.batch.workdir} ({num_done + 1}", end="") print(f" of {len(tasks)}) done in {batch_result.duration:.2f} sec") sys.stdout.flush() err_fh.write("\n".join(r.error for r in batch_result.results if r.error)) err_fh.flush() if sum(r.status != "OK" for r in all_results) > cfg.max_errors: print(f"\nGot more than {cfg.max_errors} errors, aborting...") break print( "\n--------------------------------- Errors -------------------------------" ) print("\n".join(r.error for r in all_results if r.error)) print( "\n-------------------------------- Timings -------------------------------" ) timings = sorted(r.duration for r in all_results) print( 'Plot: name="timings", title="Timing Distribution", ylabel="time [s]"') for p in (100, 99, 98, 95, 90, 80): v = percentile(timings, p / 100.0) print(f'PlotPoint: name="{p}th_percentile", plot="timings", ', end="") print(f'label="{p}th %ile", y={v:.2f}, yerr=0.0') print( "\n-------------------------------- Summary -------------------------------" ) total_duration = time.perf_counter() - start_time num_tests = len(all_results) num_failed = sum(r.status in ("TIMEOUT", "RUNTIME FAIL") for r in all_results) num_wrong = sum(r.status == "WRONG RESULT" for r in all_results) num_ok = sum(r.status == "OK" for r in all_results) print(f"Number of FAILED tests {num_failed}") print(f"Number of WRONG tests {num_wrong}") print(f"Number of CORRECT tests {num_ok}") print(f"Total number of tests {num_tests}") summary = f"\nSummary: correct: {num_ok} / {num_tests}" summary += f"; wrong: {num_wrong}" if num_wrong > 0 else "" summary += f"; failed: {num_failed}" if num_failed > 0 else "" summary += f"; {total_duration/60.0:.0f}min" print(summary) print("Status: " + ("OK" if num_ok == num_tests else "FAILED") + "\n") print( "*************************** testing ended ******************************" ) sys.exit(num_tests - num_ok)
async def get_pages(urls, proxy_url): tasks = [ fetch_page(url, aiohttp.ProxyConnector(proxy_url)) for url in urls] for task in asyncio.as_completed(tasks): url, content = await task print('url: %s; content: %.100s' % (url, content))
async def crawler(): async with aiohttp.ClientSession() as session: futures = map(asyncio.ensure_future, map(session.get, urls)) for task in asyncio.as_completed(futures): print(await task)
async def load_startup_scripts_with_metadata( *md_items, standin_directories=None, processes: int = 8, use_gdb: bool = True, ) -> ScriptContainer: """ Load all given startup scripts into a shared ScriptContainer. Parameters ---------- *md_items : list of IocMetadata List of IOC metadata. standin_directories : dict Stand-in/substitute directory mapping. processes : int The number of processes to use when loading. """ total_files = len(md_items) total_child_load_time = 0.0 with time_context() as total_time, ProcessPoolExecutor( max_workers=processes, initializer=_process_init) as executor: coros = [ asyncio.wrap_future( executor.submit(_load_ioc, identifier=idx, md=md, standin_directories=standin_directories, use_gdb=use_gdb)) for idx, md in enumerate(md_items) ] for coro in asyncio.as_completed(coros): try: load_result = await coro md = md_items[load_result.identifier] except Exception as ex: logger.exception( "Internal error while loading: %s: %s [server %.1f s]", type(ex).__name__, ex, total_time(), ) continue use_cache = load_result.result == "use_cache" if not use_cache: loaded = load_result.result else: try: loaded = load_cached_ioc(md, allow_failed_load=True) if loaded is None: raise ValueError("Cache entry is empty?") except Exception as ex: logger.exception( "Internal error while loading cached IOC from disk: " "%s: %s [server %.1f s]", type(ex).__name__, ex, total_time(), ) continue total_child_load_time += load_result.load_time if isinstance(loaded, IocLoadFailure): failure_result: IocLoadFailure = loaded logger.error( "Failed to load %s in subprocess: %s " "[%.1f s; server %.1f]: %s\n%s", md.name or md.script, failure_result.ex_class, load_result.load_time, total_time(), failure_result.ex_message, (failure_result.traceback if failure_result.ex_class != "FileNotFoundError" else ""), ) if md.base_version == settings.DEFAULT_BASE_VERSION: md.base_version = "unknown" yield md, LoadedIoc.from_errored_load(md, loaded) continue with time_context() as ctx: loaded_ioc = apischema.deserialize(LoadedIoc, loaded) logger.info( "Child loaded %s%s in %.1f s, server deserialized in %.1f s", md.name or md.script, " from cache" if load_result.cache_hit else "", load_result.load_time, ctx(), ) yield md, loaded_ioc logger.info( "Loaded %d startup scripts in %.1f s (wall time) with %d process(es)", total_files, total_time(), processes, ) logger.info( "Child processes reported taking a total of %.1f " "sec, the total time on %d process(es)", total_child_load_time, processes, )
async def get_url(): for future in asyncio.as_completed(tasks): fut = await future print(fut)
async def update(self, ctx): """Updates cogs""" tasknum = 0 num_repos = len(self.repos) min_dt = 0.5 burst_inc = 0.1 / (NUM_THREADS) touch_n = tasknum touch_t = time() def regulate(touch_t, touch_n): dt = time() - touch_t if dt + burst_inc * (touch_n) > min_dt: touch_n = 0 touch_t = time() return True, touch_t, touch_n return False, touch_t, touch_n + 1 tasks = [] for r in self.repos: task = partial(self.update_repo, r) task = self.bot.loop.run_in_executor(self.executor, task) tasks.append(task) base_msg = "Downloading updated cogs, please wait... " status = ' %d/%d repos updated' % (tasknum, num_repos) msg = await self.bot.say(base_msg + status) updated_cogs = [] new_cogs = [] deleted_cogs = [] failed_cogs = [] error_repos = {} installed_updated_cogs = [] for f in as_completed(tasks): tasknum += 1 try: name, updates, oldhash = await f if updates: if type(updates) is dict: for k, l in updates.items(): tl = [(name, c, oldhash) for c in l] if k == 'A': new_cogs.extend(tl) elif k == 'D': deleted_cogs.extend(tl) elif k == 'M': updated_cogs.extend(tl) except UpdateError as e: name, what = e.args error_repos[name] = what edit, touch_t, touch_n = regulate(touch_t, touch_n) if edit: status = ' %d/%d repos updated' % (tasknum, num_repos) msg = await self._robust_edit(msg, base_msg + status) status = 'done. ' for t in updated_cogs: repo, cog, _ = t if self.repos[repo][cog]['INSTALLED']: try: await self.install(repo, cog, no_install_on_reqs_fail=False) except RequirementFail: failed_cogs.append(t) else: installed_updated_cogs.append(t) for t in updated_cogs.copy(): if t in failed_cogs: updated_cogs.remove(t) if not any(self.repos[repo][cog]['INSTALLED'] for repo, cog, _ in updated_cogs): status += ' No updates to apply. ' if new_cogs: status += '\nNew cogs: ' \ + ', '.join('%s/%s' % c[:2] for c in new_cogs) + '.' if deleted_cogs: status += '\nDeleted cogs: ' \ + ', '.join('%s/%s' % c[:2] for c in deleted_cogs) + '.' if updated_cogs: status += '\nUpdated cogs: ' \ + ', '.join('%s/%s' % c[:2] for c in updated_cogs) + '.' if failed_cogs: status += '\nCogs that got new requirements which have ' + \ 'failed to install: ' + \ ', '.join('%s/%s' % c[:2] for c in failed_cogs) + '.' if error_repos: status += '\nThe following repos failed to update: ' for n, what in error_repos.items(): status += '\n%s: %s' % (n, what) msg = await self._robust_edit(msg, base_msg + status) if not installed_updated_cogs: return patchnote_lang = 'Prolog' shorten_by = 8 + len(patchnote_lang) for note in self.patch_notes_handler(installed_updated_cogs): if note is None: continue for page in pagify(note, delims=['\n'], shorten_by=shorten_by): await self.bot.say(box(page, patchnote_lang)) await self.bot.say("Cogs updated. Reload updated cogs? (yes/no)") answer = await self.bot.wait_for_message(timeout=15, author=ctx.message.author) if answer is None: await self.bot.say("Ok then, you can reload cogs with" " `{}reload <cog_name>`".format(ctx.prefix)) elif answer.content.lower().strip() == "yes": registry = dataIO.load_json(os.path.join("data", "red", "cogs.json")) update_list = [] fail_list = [] for repo, cog, _ in installed_updated_cogs: if not registry.get('cogs.' + cog, False): continue try: self.bot.unload_extension("cogs." + cog) self.bot.load_extension("cogs." + cog) update_list.append(cog) except: fail_list.append(cog) msg = 'Done.' if update_list: msg += " The following cogs were reloaded: " \ + ', '.join(update_list) + "\n" if fail_list: msg += " The following cogs failed to reload: " \ + ', '.join(fail_list) await self.bot.say(msg) else: await self.bot.say("Ok then, you can reload cogs with" " `{}reload <cog_name>`".format(ctx.prefix))
async def new_signage_point_harvester( self, new_challenge: harvester_protocol.NewSignagePointHarvester, peer: WSChiaConnection): """ The harvester receives a new signage point from the farmer, this happens at the start of each slot. The harvester does a few things: 1. The harvester applies the plot filter for each of the plots, to select the proportion which are eligible for this signage point and challenge. 2. The harvester gets the qualities for each plot. This is approximately 7 reads per plot which qualifies. Note that each plot may have 0, 1, 2, etc qualities for that challenge: but on average it will have 1. 3. Checks the required_iters for each quality and the given signage point, to see which are eligible for inclusion (required_iters < sp_interval_iters). 4. Looks up the full proof of space in the plot for each quality, approximately 64 reads per quality 5. Returns the proof of space to the farmer """ if len(self.harvester.pool_public_keys) == 0 or len( self.harvester.farmer_public_keys) == 0: # This means that we have not received the handshake yet return start = time.time() assert len(new_challenge.challenge_hash) == 32 # Refresh plots to see if there are any new ones if start - self.harvester.last_load_time > 120: await self.harvester.refresh_plots() self.harvester.last_load_time = time.time() loop = asyncio.get_running_loop() def blocking_lookup( filename: Path, plot_info: PlotInfo) -> List[Tuple[bytes32, ProofOfSpace]]: # Uses the DiskProver object to lookup qualities. This is a blocking call, # so it should be run in a thread pool. try: sp_challenge_hash = ProofOfSpace.calculate_pos_challenge( plot_info.prover.get_id(), new_challenge.challenge_hash, new_challenge.sp_hash, ) try: quality_strings = plot_info.prover.get_qualities_for_challenge( sp_challenge_hash) except Exception as e: self.harvester.log.error(f"Error using prover object {e}") return [] responses: List[Tuple[bytes32, ProofOfSpace]] = [] if quality_strings is not None: # Found proofs of space (on average 1 is expected per plot) for index, quality_str in enumerate(quality_strings): required_iters: uint64 = calculate_iterations_quality( self.harvester.constants. DIFFICULTY_CONSTANT_FACTOR, quality_str, plot_info.prover.get_size(), new_challenge.difficulty, new_challenge.sp_hash, ) sp_interval_iters = calculate_sp_interval_iters( self.harvester.constants, new_challenge.sub_slot_iters) if required_iters < sp_interval_iters: # Found a very good proof of space! will fetch the whole proof from disk, # then send to farmer try: proof_xs = plot_info.prover.get_full_proof( sp_challenge_hash, index) except RuntimeError: self.harvester.log.error( f"Exception fetching full proof for {filename}" ) continue plot_public_key = ProofOfSpace.generate_plot_public_key( plot_info.local_sk.get_g1(), plot_info.farmer_public_key) responses.append(( quality_str, ProofOfSpace( sp_challenge_hash, plot_info.pool_public_key, plot_info.pool_contract_puzzle_hash, plot_public_key, uint8(plot_info.prover.get_size()), proof_xs, ), )) return responses except Exception as e: self.harvester.log.error(f"Unknown error: {e}") return [] async def lookup_challenge( filename: Path, plot_info: PlotInfo ) -> List[harvester_protocol.NewProofOfSpace]: # Executes a DiskProverLookup in a thread pool, and returns responses all_responses: List[harvester_protocol.NewProofOfSpace] = [] if self.harvester._is_shutdown: return [] proofs_of_space_and_q: List[Tuple[ bytes32, ProofOfSpace]] = await loop.run_in_executor( self.harvester.executor, blocking_lookup, filename, plot_info) for quality_str, proof_of_space in proofs_of_space_and_q: all_responses.append( harvester_protocol.NewProofOfSpace( new_challenge.challenge_hash, new_challenge.sp_hash, quality_str.hex() + str(filename.resolve()), proof_of_space, new_challenge.signage_point_index, )) return all_responses awaitables = [] passed = 0 total = 0 for try_plot_filename, try_plot_info in self.harvester.provers.items(): if try_plot_filename.exists(): # Passes the plot filter (does not check sp filter yet though, since we have not reached sp) # This is being executed at the beginning of the slot total += 1 if ProofOfSpace.passes_plot_filter( self.harvester.constants, try_plot_info.prover.get_id(), new_challenge.challenge_hash, new_challenge.sp_hash, ): passed += 1 awaitables.append( lookup_challenge(try_plot_filename, try_plot_info)) # Concurrently executes all lookups on disk, to take advantage of multiple disk parallelism total_proofs_found = 0 for sublist_awaitable in asyncio.as_completed(awaitables): for response in await sublist_awaitable: total_proofs_found += 1 msg = make_msg(ProtocolMessageTypes.new_proof_of_space, response) await peer.send_message(msg) now = uint64(int(time.time())) farming_info = FarmingInfo( new_challenge.challenge_hash, new_challenge.sp_hash, now, uint32(passed), uint32(total_proofs_found), uint32(total), ) pass_msg = make_msg(ProtocolMessageTypes.farming_info, farming_info) await peer.send_message(pass_msg) self.harvester.log.info( f"{len(awaitables)} plots were eligible for farming {new_challenge.challenge_hash.hex()[:10]}..." f" Found {total_proofs_found} proofs. Time: {time.time() - start:.5f} s. " f"Total {len(self.harvester.provers)} plots")
async def cli_add(args): """ *** add *** Usage ----- a[dd] DOI[...] Description ----------- Adds one or more DOIs to the reference list. Separate DOIs must be separated by spaces. After the reference is added, the list is sorted again using the currently active sorting method. Uses the Crossref API to obtain metadata about an article. """ # TODO automatically try to fetch pdf??? # Argument parsing if args == []: return _error("addRef: no DOIs provided") yes = 0 no = 0 # Check if any are already in the library dois = [] for doi in args: found = False for refno, article in enumerate(_g.articleList, start=1): if doi == article.doi: found = True break if found: _error(f"add: DOI '{doi}' already in library.\n" f" Use 'u[pdate] {refno}' to refresh metadata.") no += 1 else: dois.append(doi) if dois == []: return articles = [] coroutines = [DOI(doi).to_article_cr(_g.ahSession) for doi in dois] async with Spinner(message="Fetching metadata...", total=len(dois)) as spinner: for crt in asyncio.as_completed(coroutines): articles.append(await crt) spinner.increment(1) for article in articles: # Check for failure if article.title is None: _error(f"add: invalid DOI '{article.doi}'") no += 1 continue else: article.time_added = datetime.now(timezone.utc) article.time_opened = datetime.now(timezone.utc) # Prompt user whether to accept the article empty_article = Article() empty_article.diff(article) msg = "add: accept new data (y/n)? ".format() style = pt.styles.Style.from_dict({ "prompt": _g.ptBlue, "": _g.ptGreen }) try: ans = await pt.PromptSession().prompt_async(msg, style=style) except (EOFError, KeyboardInterrupt): ans = "no" if ans.strip().lower() in ["", "y", "yes"]: _g.articleList.append(article) print(f"add: added DOI {article.doi}") yes += 1 else: print(f"add: DOI {article.doi} not added") no += 1 print(f"add: {yes} DOIs added, {no} failed") _g.changes += ["add"] * yes _sort.sort() # Sort according to the currently active mode return yes, no
def wait_with_progress(coros): for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)): yield from f
async def download_novel(url: str): # check website source nparse = None if BaseParser.identify(url) == Website.WUXIAWORLDCO: nparse = WuxiaWorldCo() elif BaseParser.identify(url) == Website.BOXNOVELCOM: nparse = BoxNovelCom() else: print(f'[UNSUPPORTED WEBSITE] {url}') return # get markup async with aiohttp.ClientSession() as session: novel, status = await get_novel(session, url, nparse) plog([status], url) if novel is not None: plog(['chapter count'], len(novel.chapters)) # get filepath to save novel download_path = Path(DOWNLOADS_DIR, novel.meta.title.replace(' ', '_').upper()) novel_path = Path( download_path, ''.join([novel.meta.title.replace(' ', '_').lower(), '.json'])).absolute() # ensure download directory exists if not download_path.exists(): download_path.mkdir() # load json file if exists and has data if novel_path.exists() and novel_path.lstat().st_size > 0: # load previous novel information # novel = await load_novel(novel_path) # save novel information converted_chapters = await save_novel(novel, novel_path) plog(['chapter -> dict'], converted_chapters) # check number of undownloaded content amount_to_download = 0 for chapter in novel.chapters: cpath = Path( download_path, 'contents', ''.join([ chapter.url.split('/')[::-1][0].replace( '.html', ''), '.chapter' ])) if not cpath.exists(): amount_to_download += 1 plog(['# downloads'], amount_to_download) # create tasks to download content tasks = [] for chapter in novel.chapters: chapter_path = Path( download_path, 'contents', ''.join([ chapter.url.split('/')[::-1][0].replace( '.html', ''), '.chapter' ])) if not chapter_path.exists(): chapter.url = ''.join( [novel.base_url, chapter.url] ) if novel.base_url not in chapter.url else chapter.url task = asyncio.create_task( get_chapter_content(session, chapter, nparse)) tasks.append(task) # process completed content download bytes_downloaded = 0 downloaded = 0 for future in asyncio.as_completed(tasks): try: content, status, chapter = await future chapter_path = Path( download_path, 'contents', ''.join([ chapter.url.split('/')[::-1][0].replace( '.html', ''), '.chapter' ])) if content is not None: downloaded += 1 await save(content, chapter_path) bytes_downloaded += len(content) plog([status, chapter.id], f'{len(content)} b - {chapter.url}') except aiohttp.ServerConnectionError as e: plog(['retry']) chapter.url = ''.join( [novel.base_url, chapter.url] ) if novel.base_url not in chapter.url else chapter.url task = asyncio.create_task( get_chapter_content(session, chapter, nparse)) tasks.append(task) plog(['downloaded'], downloaded)
async def cli_update(args): """ *** update *** Usage ----- u[pdate] refno[...] Description ----------- Update one or more references using the Crossref API. If any differences in the metadata are detected, then the user is prompted to accept or reject the changes before applying them to the database. At least one refno must be specified. For more details about the format in which refnos are specified, type 'h list'. """ # Argument processing if _g.articleList == []: return _error("update: no articles have been loaded") try: refnos = parse_refnos(args) except ArgumentError as e: return _error(f"update: {str(e)}") if len(refnos) == 0: return _error("update: no references selected") # Lists containing old and new Articles. Since data is being pulled # asynchronously, we need to be careful with the sorting. Throughout this # section we sort every list by the DOIs. old_articles = [_g.articleList[r - 1] for r in refnos] old_articles, refnos = zip( *sorted(zip(old_articles, refnos), key=(lambda t: t[0].doi))) crts = [article.to_newarticle_cr(_g.ahSession) for article in old_articles] new_articles = [] # Perform asynchronous HTTP requests async with Spinner(message="Fetching metadata...", total=len(refnos)) as spinner: for crt in asyncio.as_completed(crts): new_articles.append(await crt) spinner.increment(1) # After we finish pulling the new Articles, they are out of order. We can # sort the new Articles by DOI to get the same ordering as the old Articles # and refnos. new_articles.sort(key=attrgetter("doi")) # Now we sort everything by refnos so that we can present them nicely to # the user. refnos, old_articles, new_articles = zip( *sorted(zip(refnos, old_articles, new_articles))) # Present them one by one to the user yes = 0 for refno, old_article, new_article in zip(refnos, old_articles, new_articles): if new_article.title is None: _error(f"update: ref {refno} has invalid DOI '{old_article.doi}'") continue # copy over timeAdded, timeOpened data from old reference new_article.time_added = old_article.time_added new_article.time_opened = old_article.time_opened # calculate and report differences ndiffs = old_article.diff(new_article) if ndiffs == 0: print(f"update: ref {refno}: no new data found") else: msg = f"update: ref {refno}: accept new data? (y/n) " style = pt.styles.Style.from_dict({ "prompt": _g.ptBlue, "": _g.ptGreen }) try: ans = await pt.PromptSession().prompt_async(msg, style=style) except (EOFError, KeyboardInterrupt): ans = "no" if ans.strip().lower() in ["", "y", "yes"]: _g.articleList[refno - 1] = new_article print(f"update: ref {refno}: successfully updated") yes += 1 else: # ok, it isn't really (y/n), it's (y/not y) print(f"update: ref {refno}: changes rejected") print(f"update: {yes} article{_p(yes)} updated") _g.changes += ["update"] * yes return _ret.SUCCESS
async def main(): parser = OptionParser() parser.add_option( "-U", "--url", dest="base_url", help= "the mapless api base url (eg.: https://0000000000.execute-api.us-east-1.amazonaws.com)" ) parser.add_option( "-K", "--api-key", dest="api_key", help= "the mapless api key (eg.: 5ViTxWEDxR3Rk9T5tTquV5VktPKlBP8Z9QjHcqe1)") parser.add_option("-H", "--host", dest="host", help="hostname/IP") parser.add_option("-p", "--port", type=int, dest="port", default=80, help="port (default: 80)") parser.add_option("--path", dest="path", default='', help="HTTP path") parser.add_option("-u", "--user", dest="username", default='root', help="username") parser.add_option("-P", "--password", dest="password_file", help="file with passwords") parser.add_option( "", "--proto", dest="proto", default="http", help="supported protocol [http, ssh, ami] (default: http)") parser.add_option("-f", "--file", dest="targets_file", help="csv file with targets (format: ip, port)") parser.add_option( "--rl", "--rate-limit", type=int, dest="rate_limit", default=INT_MAX, help="limit the number of requests/period (default: no limit)") parser.add_option("--rlp", "--rate-limit-period", type=float, dest="rate_limit_period", default=1.0, help="rate limit period (default: 1.0)") parser.add_option( "--cl", "--connection-limit", type=int, dest="connection_limit", default=16, help= "number of simultaneous connections to the API backend (default: 16)") parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="print INFO level messages to stdout") parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="print DEBUG level messages to stdout") (options, _) = parser.parse_args() if options.verbose: logging.basicConfig(level=logging.INFO) elif options.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) BASE_URL = os.environ.get('MAPLESS_BASE_URL', options.base_url) ENDPOINT = f'/dev/login/{options.proto}' URL = f'{BASE_URL}{ENDPOINT}' APIKEY = os.environ.get('MAPLESS_API_KEY', options.api_key) HEADERS = {'X-API-KEY': APIKEY, 'Content-Type': 'application/json'} logging.debug(f'HEADERS: {HEADERS}') CONNECTOR = aiohttp.TCPConnector(limit_per_host=options.connection_limit, ttl_dns_cache=100) THROTTLER = Throttler(rate_limit=options.rate_limit, period=options.rate_limit_period) async def test_auth(session, host, port, username, password, throttler, path=None): params = { 'host': host, 'port': port, 'username': username, 'password': password } async with throttler, session.get(URL, params=params) as resp: logging.info(f"{username}:{password}@{host}:{port}") if resp.status == 200: data = await resp.json() logging.debug(data) print( f"Found valid password '{colorit.color_front(password,0,255,0)}' for {username}@{host}:{port}" ) elif resp.status > 401: data = await resp.json() logging.debug(data) with open(options.password_file, 'r') as password_file: async with aiohttp.ClientSession(headers=HEADERS, connector=CONNECTOR) as session: scan_args = [] for password in password_file.readlines(): if options.targets_file: import csv with open(options.targets_file, 'r') as csvfile: reader = csv.DictReader(csvfile) scan_args += list( map( lambda row: { 'host': row['ip'], 'port': row['port'], 'username': options.username, 'password': password.strip(), 'session': session, 'path': options.path, 'throttler': THROTTLER }, reader)) else: scan_args.append({ 'host': options.host, 'port': options.port, 'username': options.username, 'password': password.strip(), 'session': session, 'path': options.path, 'throttler': THROTTLER }) tasks = [] logging.debug(f'# OF TASKS: {len(scan_args)}') for args in scan_args: task = asyncio.create_task(test_auth(**args)) tasks.append(task) asyncio.create_task(increment_rate_limit(THROTTLER)) import tqdm responses = [] for f in tqdm.tqdm(asyncio.as_completed(tasks), total=len(tasks)): responses.append(await f)
async def test_images(root): queue = [get_image(path) for path in root.iterdir()] for f in asyncio.as_completed(queue): path, shape = await f