def store_runtime(a_task: asyncio.Task): a_bench = task_map[a_task] if result_file.exists(): with result_file.open('r+') as fp: try: original: Dict[str, Any] = json.load(fp) if 'runtime' in original: original['runtime'][a_bench.identifier] = a_bench.runtime else: original['runtime'] = {a_bench.identifier: a_bench.runtime} fp.seek(0) json.dump(original, fp, indent=4) except json.decoder.JSONDecodeError: fp.seek(0) fp.truncate() json.dump({'runtime': {a_bench.identifier: a_bench.runtime}}, fp, indent=4) else: with result_file.open('w') as fp: json.dump({'runtime': {a_bench.identifier: a_bench.runtime}}, fp, indent=4) a_task.remove_done_callback(store_runtime)
async def _async_run_long_action(self, long_task: asyncio.Task) -> None: """Run a long task while monitoring for stop request.""" async def async_cancel_long_task() -> None: # Stop long task and wait for it to finish. long_task.cancel() with suppress(Exception): await long_task # Wait for long task while monitoring for a stop request. stop_task = self._hass.async_create_task(self._stop.wait()) try: await asyncio.wait({long_task, stop_task}, return_when=asyncio.FIRST_COMPLETED) # If our task is cancelled, then cancel long task, too. Note that if long task # is cancelled otherwise the CancelledError exception will not be raised to # here due to the call to asyncio.wait(). Rather we'll check for that below. except asyncio.CancelledError: await async_cancel_long_task() raise finally: stop_task.cancel() if long_task.cancelled(): raise asyncio.CancelledError if long_task.done(): # Propagate any exceptions that occurred. long_task.result() else: # Stopped before long task completed, so cancel it. await async_cancel_long_task()
def typed_asyncoro(*args, **kwargs): runtime._pc_level += 1 coro = func(*args, **kwargs) if rettype: decl = returnType(rettype, wrap=False) else: try: decl = coro.send(None) except StopIteration as exc: runtime._pc_level -= 1 return exc.value except Exception: runtime._pc_level -= 1 raise if runtime.options.no_async: while True: try: coro.send(None) except StopIteration as exc: runtime._pc_level -= 1 if decl is not None: __reconcile(decl, exc.value) return decl except Exception: runtime._pc_level -= 1 raise if pc: coro = _wrap_in_coro(_ProgramCounterWrapper(runtime, coro)) task = Task(coro, loop=runtime._loop) task.add_done_callback(lambda t: _reconcile(decl, t)) return _ncopy(decl)
def task_complete(self, task: asyncio.Task): if task.cancelled(): return if task.exception(): task.print_stack() self.keep_alive = self.loop.create_task(self.rebooter()) self.keep_alive.add_done_callback(self.task_complete)
async def _cancel_task_if_exists(task: asyncio.Task): if task: task.cancel() try: await task except asyncio.CancelledError: pass
def test_get_contract(self): client = Client(self.loop) self.loop.run_until_complete(client.connect()) task = Task(client.get_contract(TEST_CONTRACT)) self.loop.run_until_complete(task) self.assertIsNotNone(task.result()) self.loop.run_until_complete(client.disconnect())
def _handle_task_result(task: asyncio.Task) -> None: try: task.result() except asyncio.CancelledError: pass # Task cancellation should not be logged as an error. except Exception: # pylint: disable=broad-except logging.exception('Exception raised by task = %r', task)
def _task_done_callback(self, task_id: t.Hashable, done_task: asyncio.Task) -> None: """ Delete the task and raise its exception if one exists. If `done_task` and the task associated with `task_id` are different, then the latter will not be deleted. In this case, a new task was likely rescheduled with the same ID. """ self._log.info( f"Performing done callback for task #{task_id} {id(done_task)}.") scheduled_task = self._scheduled_tasks.get(task_id) if scheduled_task and done_task is scheduled_task: # A task for the ID exists and is the same as the done task. # Since this is the done callback, the task is already done so no need to cancel it. self._log.info(f"Deleting task #{task_id} {id(done_task)}.") del self._scheduled_tasks[task_id] elif scheduled_task: # A new task was likely rescheduled with the same ID. self._log.debug( f"The scheduled task #{task_id} {id(scheduled_task)} " f"and the done task {id(done_task)} differ.") elif not done_task.cancelled(): self._log.warning( f"Task #{task_id} not found while handling task {id(done_task)}! " f"A task somehow got unscheduled improperly (i.e. deleted but not cancelled)." ) with contextlib.suppress(asyncio.CancelledError): exception = done_task.exception() # Log the exception if one exists. if exception: self._log.error(f"Error in task #{task_id} {id(done_task)}!", exc_info=exception)
def task_done_callback(self, task: asyncio.Task): try: task.result() except asyncio.CancelledError: pass except Exception as error: log.exception(f"Task failed.", exc_info=error)
def task_callback(task: asyncio.Task) -> None: with contextlib.suppress(asyncio.CancelledError, asyncio.InvalidStateError): if exc := task.exception(): log.exception("%s raised an Exception", task.get_name(), exc_info=exc)
async def cancel(task: asyncio.Task): if not (task.done() or task.cancelled()): try: task.cancel() await task except asyncio.CancelledError: pass
def _worker_done(self, task: asyncio.Task) -> None: assert task is self._worker_task if task.cancelled(): self._connection_task.cancel() elif task.exception(): f = io.StringIO() task.print_stack(file=f) self.logger.error(f.getvalue()) now = time.time() self._worker_task_failure_timestamps.append(time.time()) if len(self._worker_task_failure_timestamps) == 5: if self._worker_task_failure_timestamps.pop(0) >= now - 10: self.logger.error( "Worker task exceeded exception threshold; terminating" ) self._close("Exception threshold exceeded") return self.logger.warning("Restarting worker task") self._worker_task = self.loop.create_task(self._worker()) self._worker_task.add_done_callback(self._worker_done) else: self.logger.debug("Worker task exited gracefully") return
def add_to_map(self, url: str, action: Method, fut: asyncio.Task, **kwargs) -> None: if fut.exception(): _logger.warning(f"Can't {action.value} on {url}: {fut.exception()}") self.hook_add_to_map_error(url=url, action=action, fut=fut, **kwargs) return sub_name = kwargs.pop("sub_name") if action == Method.ADD_SUB: vs = VirtualSubscription(**kwargs) self.real_map[url][sub_name] = vs self.name_to_subscription[url][sub_name] = fut.result() if action == Method.ADD_MI: nodes = kwargs["nodes"] vs = self.real_map[url][sub_name] vs.subscribe_data_change(nodes, *astuple(kwargs["node_attr"])) for node, handle in zip(nodes, fut.result()): if isinstance(handle, ua.StatusCode): # a StatusCode is returned, the request has failed. vs.unsubscribe([node]) _logger.info(f"Node {node} subscription failed: {handle}") # The node is invalid, remove it from both maps if handle.name == "BadNodeIdUnknown": _logger.warning( f"WARNING: Abandoning {node} because it returned {handle} from {url}" ) real_vs = self.ha_client.ideal_map[url][sub_name] real_vs.unsubscribe([node]) continue self.node_to_handle[url][node] = handle self.hook_add_to_map(fut=fut, url=url, action=action, **kwargs)
def _task_done(self, task: asyncio.Task) -> None: self._tasks.remove(task) if task.cancelled(): return exc = task.exception() if exc is not None: raise exc
async def _stop(self, kill_task: asyncio.Task, grace_time: int) -> None: """Stop docker container task Args: kill_task: kill task as asyncio future grace_time: timout before sigkill """ time_start = time.time() # I have no idea why this happens but without a sleep the event loop # gets blocked and the container takes an extra 10 seconds to stop await asyncio.sleep(1) LOGGER.info(f"stopping docker container {self._name}") # for better logging we are handling our own sigkill # so add 1 to timout so will not be called try: self._process.stop(timeout=grace_time + 1) except docker.errors.NotFound: pass LOGGER.debug( f"container {self._name} stopped in {int(time.time() - time_start)} seconds", False) kill_task.cancel() # fix for gitlab ci failing to stop containers if self._unique_name in [ cont.name for cont in client.containers.list(all=True) ]: await self._kill(0)
async def finalize_task(task: Task) -> Any: try: return await task finally: if not task.cancelled(): task.cancel()
def _on_done(self, task: asyncio.Task) -> None: if not task.cancelled(): try: task.result() except Exception: log.exception("Async-iterator task ended with error") pass # TODO: log it or something...
def background_task_callback(task: asyncio.Task) -> None: """Check if the finished background task failed to make sure we log errors.""" if task.cancelled(): log.info(f"Background task `{task.get_name()}` was cancelled.") elif exception := task.exception(): log.error(f"Background task `{task.get_name()}` failed:", exc_info=exception)
def _send_then_recv(self, send, recv): fut_recv = Task(recv()) result = None for i in range(self.max_tries): try: yield from send() except ConnectionError as e: logging.warn("Failed to send RADIUS request: %s" % e) yield from sleep(TIMEOUT, loop=self.loop) continue try: result = yield from wait_for(shield(fut_recv), self.timeout) break except TimeoutError: # No need to restart task, since it is protected by shield(). logging.warning("Timeout, re-send RADIUS request.") except ValueError as e: logging.warning("Malformed RADIUS packet received: %s" % e) logging.info("Please check the shared secret.") fut_recv = Task(self._recv_response()) except ConnectionError as e: logging.warn("Failed to receive RADIUS response: %s" % e) yield from sleep(TIMEOUT, loop=self.loop) fut_recv = Task(self._recv_response()) if result is None: logging.warning("Timeout. No valid RADIUS response.") fut_recv.cancel() return result
async def main(): q = QueueClient(STORAGE_ACCOUNT, STORAGE_KEY) #print("Table Deletion", end=" ") #print((await tq.deleteTable('aiotest')).status) print("Queue Creation", end=" ") print((await q.createQueue('aiotest')).status) print("\nInsertion:") tasks = [] for _ in range(OPERATION_COUNT): tasks.append(Task(q.putMessage('aiotest', 'hello world'))) start = time() res = await gather(*tasks) print("{} operations/s".format(OPERATION_COUNT/(time()-start))) #print([r.status for r in res]) print("Retrieval:") receipts = [] start = time() for i in range(int(OPERATION_COUNT/32)+1): async for msg in q.getMessages('aiotest', numofmessages=32): receipts.append((msg['MessageId'], msg['PopReceipt'])) print("{} operations/s".format(OPERATION_COUNT/(time()-start))) print("Deletion:") tasks = [] for r in receipts: tasks.append(Task(q.deleteMessage('aiotest', *r))) start = time() res = await gather(*tasks) print("{} operations/s".format(OPERATION_COUNT/(time()-start))) print() await q.close()
def on_timeout(task: asyncio.Task, loop: asyncio.AbstractEventLoop): nonlocal cancelled if task.done(): return task.cancel() cancelled = True
def _task_done(self, task: Task) -> None: with self._lock: coroutine = self._task_map.pop(task) try: if task.exception() and self.stack_limit: task.print_stack(limit=None if self.stack_limit is True else self.stack_limit, file=self.stack_file) except CancelledError: coroutine.close()
async def test_dispatch_event_cancel(self, source): """Test that dispatching an event when there are no listeners will still work.""" source.event_a.connect(lambda event: None) future = source.event_a.dispatch() future.cancel() task = next(t for t in Task.all_tasks() if t is not Task.current_task()) await task
def _done(_task: asyncio.Task): _host = _task.get_name() _res = _task.result() if not len(_res): print(f"IPF device not found: {_host}") return callback(_host, _res[0])
async def ensureTaskCanceled(asyncio_task: asyncio.Task) -> None: while not asyncio_task.done(): try: asyncio_task.cancel() except: pass finally: await asyncio.sleep(0)
def done_callback(task: asyncio.Task) -> None: """Send exception when consuming task have been cancelled.""" try: task.result() except asyncio.CancelledError: self.log.info( f"The consume task of {type(self).__name__} was canceled. Messages may be lost." )
def ui_closed(self, task: asyncio.Task) -> None: if task.exception(): self.logger.error("UI failed with an exception: %s", task.exception()) self.returncode = 1 else: self.returncode = task.result() self.stop_event.set()
async def cancel_await(task: Task, callback: Optional[Callable[[], None]] = None) -> None: task.cancel() try: await task except asyncio.CancelledError: if callback is not None: callback()
def callback(task: Task): if task.exception(): _LOG.warning( "Unable to connect to newly discovered controller", exc_info=task.exception()) return self._controllers[device_uid] = controller self.controller_discovered(controller)
def task_done_callback(task: asyncio.Task): """Properly handle and log errors in the startup task.""" try: task.result() except asyncio.CancelledError: pass except Exception as error: log.exception("Failed to initialize the cog.", exc_info=error)
def check_task_exception(self, fut: asyncio.Task): if fut.done(): try: exc = fut.exception() except asyncio.CancelledError as e: exc = e if exc: self.log(f"Task raised exception: {str(exc)}")
def test_get_account_name(self): client = Client(self.loop) self.loop.run_until_complete(client.connect()) task = Task(client.get_account_name()) self.loop.run_until_complete(task) self.assertIsNotNone(task.result()) self.loop.run_until_complete(client.disconnect()) account_name = task.result() self.assertTrue(len(account_name) > 0)
def test_get_account(self): # This can be a time-consuming test (sometimes IB takes a minute or so # to return all of the account data) client = Client(self.loop) self.loop.run_until_complete(client.connect()) task = Task(client.get_account()) self.loop.run_until_complete(task) self.assertIsNotNone(task.result()) self.loop.run_until_complete(client.disconnect())
def _accept_client(self, client_reader, client_writer): """manage new client connections""" task = Task(self._handle_client(client_reader, client_writer)) self.clients[task] = (client_reader, client_writer) def client_done(task): del self.clients[task] task.add_done_callback(client_done)
def signal_handler(self): """Signal handler for asynchronous event loop Stops it gracefully """ with open("/dev/null") as sys.stderr: self._thread_executor.shutdown(wait=False) [task.cancel() for task in Task.all_tasks() if task is not Task.current_task()] self._event_loop.stop()
async def cancel(task: Task, loop: Optional[AbstractEventLoop]=None) -> None: """Cancel a task and wait until it's done. **Note**: this function is a coroutine. Canceling a child task and returning without waiting for the child task to complete is a common cause of "event loop closed" ``RuntimeError`` exceptions, especially during program shutdown. Therefore, this becomes a common pattern: .. code-block:: python task.cancel() await asyncio.wait({task}) However, if the parent task itself is also canceled, then the ``asyncio.wait()`` call will be interrupted and the child task will still not complete. To solve this, we must also manage to trap the ``asyncio.CancelledError`` exception and call ``asyncio.wait({task})`` again and properly re-raise the ``asyncio.CancelledError`` exception. For example: .. code-block:: python task.cancel() try: await asyncio.wait({task}) except asyncio.CancelledError: await asyncio.wait({task}) raise This is not trivial and must be done so many times in a program that cancels tasks that it merits a replacement API for ``task.cancel()``. :param task: The ``asyncio.Task`` object to cancel. :param loop: The event loop to use for awaiting. Defaults to the current event loop. .. versionadded:: 0.3 """ loop = loop or asyncio.get_event_loop() task.cancel() try: await asyncio.wait({task}, loop=loop) except asyncio.CancelledError: await asyncio.wait({task}, loop=loop) raise
def test_get_orders(self): client = Client(self.loop) self.loop.run_until_complete(client.connect()) # Place two test orders buy_order = ibo.Order('buy', 110000, 'stp', aux_price=2) task = Task(client.place_order(TEST_CONTRACT, buy_order)) self.loop.run_until_complete(task) buy_order_id = task.result() self.assertTrue(buy_order_id > 0) sell_order = ibo.Order('sell', 110000, 'stp', aux_price=0.2) task = Task(client.place_order(TEST_CONTRACT, sell_order)) self.loop.run_until_complete(task) sell_order_id = task.result() self.assertTrue(sell_order_id > 0) # Retrieve the open orders matching our test ids task = Task(client.get_orders()) self.loop.run_until_complete(task) self.assertIsNotNone(task.result()) orders = [x for x in task.result() if x.order_id in (buy_order_id, sell_order_id)] self.assertEqual(2, len(orders)) # Cancel the orders and disconnect task = Task(client.cancel_order(buy_order_id)) self.loop.run_until_complete(task) task = Task(client.cancel_order(sell_order_id)) self.loop.run_until_complete(task) self.loop.run_until_complete(client.disconnect())
def set_timeout(task: asyncio.Task, timeout: [float, int], loop: asyncio.AbstractEventLoop = None, timeout_cancel=True): assert isinstance(timeout, (float, int)) if loop is None: loop = get_running_loop() now_time = loop.time() out_time = now_time + timeout if timeout_cancel: if timeout <= 0: task.cancel() return unset_timeout(task) handle = loop.call_at(out_time, task.cancel) setattr(task, _MODULE_TIMEOUT_HANDLE, handle) setattr(task, _MODULE_TIMEOUT, out_time)
def update_coroutines_count(self, simple=True, loop=LOOP): try: tasks = Task.all_tasks(loop) self.coroutines_count = len(tasks) if simple else sum(not t.done() for t in tasks) except RuntimeError: # Set changed size during iteration self.coroutines_count = '-1'
def __enter__(self): if self._timeout is None: return self self._task = Task.current_task(self._loop) tm = self._loop.time() + self._timeout self._cancel_handler = self._loop.call_at(tm, self._cancel_task) return self
def progress_bar(requests, loop, freq=0.01): width, _ = get_terminal_size() done_count = len(tuple(filter(lambda t: t.done(), Task.all_tasks()))) tasks_left_count = requests - done_count progress = int(done_count / requests * width) print("\r" + "*" * progress + "." * (width - progress), end="") if tasks_left_count > 0: loop.call_later(freq, progress_bar, requests, loop)
def __curtask__(self): """ Create namespace in current task. """ task = Task.current_task(loop=self._loop) if not task: raise RuntimeError('No task is currently running') if not hasattr(task, '_locals'): task._locals = local_storage() return task._locals
def display_loop(term, es_servers): """Starts and waits on inputs, slightly mangles data and sends them to be rendered This corresponds loosly with an event loop in game programming """ query = "" # avoid recreating this all the time due to implicit dns lookups in creation es = Elasticsearch(es_servers) loop = asyncio.get_event_loop() input = Task(input_loop(loop, term, query)) search = Task(search_loop(loop, es, query)) jobs = [input, search] while True: done, pending = yield from asyncio.wait(jobs) jobs = [] # this needs to come before input handling as it may try # and cancel a compleated job before its processed. as resuls # take a bit to come back its better to just briefly display them if search in done: results = search.result() render_search_results(term, results) if input in done: query = input.result() render_query_field(term, query) # we wrap this is a task for the membership query above # if we just pass in a corutine, asyncio.wait takes it upon # itself to wrap that in a Task and the comparison will always # fail input = Task(input_loop(loop, term, query)) jobs.append(input) # ES does not return results to us for short queries, so just short # circuit them and avoid network traffic if len(query) > 1: # we wont be using the results anymore so just discard the worker search.cancel() search = Task(search_loop(loop, es, query)) jobs.append(search) else: # force a clear of the results render_search_results(term, []) sys.stdout.flush() # Make sure we resubmit any jobs that have not yet been compleated # otherwise we leak them and things go into limbo jobs += pending
def issue_req_token(self, reply_fut, timeout): token = random_alphanumeric(6) while token in self.reply_inbox: token = random_alphanumeric(6) timer_handle = self._loop.call_later( timeout, partial(self.timeout_reply, token)) self.reply_inbox[token] = ReplyInboxItem( reply_fut, timer_handle, Task.current_task(self._loop)) return token
def _cleanup(disable_pending_task_warnings=False): # Close the server loop.run_until_complete(asyncio.gather( *[x.shutdown() for x in instances] )) loop.stop() if disable_pending_task_warnings: [t.result() for t in Task.all_tasks()] loop.close()
def identify_future(fut=None): """ Function to identify a task or future. :param fut: Future to identify. Optional. Default value is None. When it is None it use :meth:``asyncio.tasks.Task.current_task`` method. :type fut: asyncio.Future or None :return: int """ if fut is None: fut = Task.current_task() return id(fut)
async def start_get_updates(self): """ Starts get updates loop. """ await self.get_me() while not Task.current_task(self.loop).cancelled(): updates = await self.get_updates() for update in updates: if update.update_id >= self.update_offset: self.update_offset = update.update_id + 1 asyncio.ensure_future(self.process_update(update), loop=self.loop)
async def wrapper(*args, details: EventDetails, **kwargs): current_task = Task.current_task(loop=self._loop) self._request_tasks.add(current_task) async with EventContext(self._parent_context, self._session_details, details) as ctx: try: retval = subscriber.handler(ctx, *args, **kwargs) if isawaitable(retval): await retval except Exception: report_exception( ctx, 'Error running subscription handler for topic {!r}'.format( subscriber.topic), logger=False) raise finally: self._request_tasks.remove(current_task)
def cleanup(overseer, manager): try: overseer.print_handle.cancel() overseer.running = False print('Exiting, please wait until all tasks finish') log = get_logger('cleanup') print('Finishing tasks...') LOOP.create_task(overseer.exit_progress()) pending = gather(*Task.all_tasks(loop=LOOP), return_exceptions=True) try: LOOP.run_until_complete(wait_for(pending, 40)) except TimeoutError as e: print('Coroutine completion timed out, moving on.') except Exception as e: log = get_logger('cleanup') log.exception('A wild {} appeared during exit!', e.__class__.__name__) db_proc.stop() overseer.refresh_dict() print('Dumping pickles...') dump_pickle('accounts', ACCOUNTS) FORT_CACHE.pickle() altitudes.pickle() if conf.CACHE_CELLS: dump_pickle('cells', Worker.cells) spawns.pickle() while not db_proc.queue.empty(): pending = db_proc.queue.qsize() # Spaces at the end are important, as they clear previously printed # output - \r doesn't clean whole line print('{} DB items pending '.format(pending), end='\r') sleep(.5) finally: print('Closing pipes, sessions, and event loop...') manager.shutdown() SessionManager.close() close_sessions() LOOP.close() print('Done.')
async def follow_through(task: Task, loop: Optional[AbstractEventLoop]=None) -> Any: """Wait for a task to complete (even if canceled while waiting). **Note**: this function is a coroutine. Not propagating cancellation to a child task and returning without waiting for the child task to complete is a common cause of "event loop closed" ``RuntimeError`` exceptions, especially during program shutdown. Therefore, this becomes a common pattern: .. code-block:: python try: await asyncio.wait({task}) except asyncio.CancelledError: task.cancel() await asyncio.wait({task}) raise return task.result() This is not trivial and must be done so many times in a program that spawns child tasks that it merits a helper method. :param task: The ``asyncio.Task`` object to see through to completion. :param loop: The event loop to use for awaiting. Defaults to the current event loop. .. versionadded:: 0.3 """ loop = loop or asyncio.get_event_loop() try: await asyncio.wait({task}, loop=loop) except asyncio.CancelledError: await cancel(task, loop=loop) raise return task.result()
async def wrapper(*args, _call_details: CallDetails, **kwargs): current_task = Task.current_task(loop=self._loop) self._request_tasks.add(current_task) async with CallContext( self._parent_context, self._session_details, _call_details) as ctx: try: retval = procedure.handler(ctx, *args, **kwargs) if isawaitable(retval): retval = await retval except ApplicationError: raise # These are deliberately raised so no need to report them except Exception as exc: # Report the exception unless it's a mapped exception if exc.__class__ not in self._session._ecls_to_uri_pat: report_exception( ctx, 'Error running handler for procedure {!r}'.format(procedure.name), logger=False) raise finally: self._request_tasks.remove(current_task) return retval
def info(self): return Task.all_tasks()
log = logging.getlogger("test_taskprio") logging.setGlobal(logging.TRACE) import time import sys from asyncio import Task from asyncio import Scheduler # dummy coroutine def dummy(): yield taskLate = Task(dummy(), name = "taskLate", prio = 10) taskEarly = Task(dummy(), name = "taskEarly", prio = 10) taskMedium5 = Task(dummy(), name = "taskMedium5", prio = 5) taskMedium15 = Task(dummy(), name = "taskMedium15", prio = 15) taskLate.time2run = 1001 taskEarly.time2run = 110 taskMedium5.time2run = 500 taskMedium15.time2run = 500 sched = Scheduler() def pushonheap(task): print ("Pushing task %s " % task.name ) sched.schedule(task)
def get_context_or_error(): return Task.current_task().actor_ctx
def get_context_or_none(): return getattr(Task.current_task(), 'actor_ctx', None)
async def get_urls(site_url, depth=0, loop=None): async def retrieve_site(url, timeout=1): logging.debug("%s: retrieving", url) await connection_sem.acquire() try: response = await wait_for(get(url), timeout) # Potential Errors: # ValueError: Host could not be detected # ... logging.debug("%s: Connected, retrieving text", url) except (ValueError, TimeoutError, ClientOSError): logging.debug("%s: Connection error", url) try: text = await wait_for(response.text(), timeout) logging.debug("%s: Retrieved", url) # Potential Errors: # ... except (UnicodeDecodeError, UnboundLocalError, TimeoutError): logging.debug("%s: Could not retrieve text", url) text = '' connection_sem.release() return text async def process_anchors(text): hrefs = URL_RE.findall(text) href_targets = set() for href in hrefs: if not href.startswith('http'): href = urljoin(site_url, href) # Remove potential query parameter from URL href, *_ = href.split('?') if not URL_IGNORE_RE.match(href) and depth < MAX_DEPTH: href_targets.add(href) for href_target in href_targets: if r.sismember('seen', href_target): continue logging.debug("Enqueueing %s", site_url) loop.create_task(get_urls(href_target, depth + 1, loop=loop)) r.sadd('seen', site_url) logging.info("Crawling %s", site_url) if not r.exists(site_url): logging.debug("%s not in cache", site_url) text = await retrieve_site(site_url, timeout=5) # Only cache if text has been retrieved successfully if text: r.set(site_url, text) r.expire(site_url, 60 * 60 * 24) else: logging.debug("Retrieved %s from cache", site_url) text = r.get(site_url) await process_anchors(str(text)) tasks = Task.all_tasks(loop) logging.info("Semaphore locked? %s", connection_sem.locked()) logging.info("Remaining tasks: %d", len(tasks) - 1) if len(tasks) == 1: loop.stop()