def _chain_to_another_future(self, base_future: Future): """Chain a Future instance directly to another Future instance.""" if base_future in self._chained_future_log: raise Exception(f"""Circular chain error. Future { base_future } is already in resolved chain { set(self._chained_future_log) }.""") else: self._chained_future_log.add(base_future) def _done_handler(base_future: Future): """Convert return of previous Future to results of new Future.""" if not base_future.done(): # Never True. Avoid infinite timeout. self.cancel() return if base_future.cancelled(): self.cancel() return try: result = base_future.result() if isinstance(result, Future): self._chain_to_another_future(result) else: self.set_result(result) return except BaseException: ex, trace_back = sys.exc_info()[1:] self.set_exception_info(ex, trace_back) return base_future.add_done_callback(_done_handler)
async def _call_func(self, func: Callable, args: tuple, kwargs: Dict[str, Any], future: Future) -> None: def callback(f: Future) -> None: if f.cancelled(): self.call(scope.cancel) try: retval = func(*args, **kwargs) if iscoroutine(retval): with CancelScope() as scope: if future.cancelled(): scope.cancel() else: future.add_done_callback(callback) retval = await retval except self._cancelled_exc_class: future.cancel() except BaseException as exc: if not future.cancelled(): future.set_exception(exc) # Let base exceptions fall through if not isinstance(exc, Exception): raise else: if not future.cancelled(): future.set_result(retval) finally: scope = None # type: ignore[assignment]
def asyncRun(self, cb, *args, **kwargs): """Helper call to run a callback `cb` within the task's main loop. Returns an instance of Future() that can be waited for obtain the result of computation. The callback will be run only once. """ def _future_execute(f, cb, *args, **kwargs): try: # Only execute `cb` if the future wasn't cancelled if f.set_running_or_notify_cancel(): f.set_result(cb(*args, **kwargs)) except Exception as e: f.set_exception(e) # return False so that glib will automatically remove the # idle source return False def _future_cancel(handle, f): if f.cancelled(): glib.source_remove(handle) f = Future() handle = glib.idle_add(partial(_future_execute, f, cb, *args, **kwargs)) f.add_done_callback(partial(_future_cancel, handle)) return f
def await_submit(self, awaitable: typing.Awaitable) -> ThreadFuture: """ Schedule an awaitable on the loop and return the corresponding future """ self._ensure_running() future = ThreadFuture() def callback(): # Here we're on the comms thread again async def proxy(): if not future.cancelled(): return await awaitable coro_future = asyncio.ensure_future(proxy(), loop=self._loop) aio_future_chain_thread(coro_future, future) handle = self._loop.call_soon_threadsafe(callback) def handle_cancel(done_future: ThreadFuture): """Function to propagate a cancellation of the concurrent future up to the loop callback """ if done_future.cancelled(): self._loop.call_soon_threadsafe(handle.cancel) future.add_done_callback(handle_cancel) return future
def wrapper(*args, **kwargs): self = args[0] future = Future() def run_thread(r_fn, r_future, *r_args, **r_kwargs): try: # print("run_thread args", r_fn, r_future, *r_args, # **r_kwargs) result = r_fn(*r_args, **r_kwargs) r_future.set_result(result) except Exception as e: print("Exception received") if r_future.cancelled() == False: r_future.set_exception(e) with self.lock: thread = threading.Thread(target=run_thread, args=( fn, future, ) + (args), kwargs=kwargs) thread.start() print("running stuff") def callback(c_future): print("Callback created: ", c_future) print("Callback thread: ", thread) if c_future.cancelled(): print("Killing thread") thread.raise_exception() future.add_done_callback(callback) return future
def asyncRun(self, cb, *args, **kwargs): """Helper call to run a callback `cb` within the task's main loop. Returns an instance of Future() that can be waited for obtain the result of computation. The callback will be run only once. """ def _future_execute(f, cb, *args, **kwargs): try: # Only execute `cb` if the future wasn't cancelled if f.set_running_or_notify_cancel(): f.set_result(cb(*args, **kwargs)) except Exception as e: f.set_exception(e) # return False so that glib will automatically remove the # idle source return False def _future_cancel(handle, f): if f.cancelled(): glib.source_remove(handle) f = Future() handle = glib.idle_add(partial(_future_execute, f, cb, *args, **kwargs)) f.add_done_callback(partial(_future_cancel, handle)) return f
def as_future(func, args=(), kwargs=None, daemon=None, cb=None): """ Executes the given function in a separate thread. Returns a :class:`Future<concurrent.futures.Future>` object immediately. :param func: The function to execute :param args: Positional arguments for the function :param kwargs: Keyword arguments for the function :param bool daemon: Whether the :class:`Thread<threading.Thread>` that the function runs in should be a daemon or not (default: see :attr:`daemon<threading.Thread.daemon>`) :param cb: A callback function that accepts one positional argument to be called when the future is complete. The function will be called with the future object that completed. :return: A :class:`Future<concurrent.futures.Future>` object that will hold the results of executing the given function """ future = Future() if cb is not None: future.add_done_callback(cb) func_name = func.__name__ name = 'future:{}#{}'.format(func_name, next(_names[func_name])) thread = Thread(target=_run_func, args=(future, func, args, kwargs), name=name, daemon=daemon) thread.start() return future
def submit(self, func, *args, **kwargs) -> ThreadFuture: """ Schedule a function on the loop and return the corresponding future """ self._ensure_running() future = ThreadFuture() def callback(): if not future.cancelled(): with futures.capture_exceptions(future): result = func(*args, **kwargs) if asyncio.isfuture(result): result = aio_future_to_thread(result) future.set_result(result) handle = self._loop.call_soon_threadsafe(callback) def handle_cancel(done_future: ThreadFuture): """Function to propagate a cancellation of the concurrent future up to the loop callback""" if done_future.cancelled(): self._loop.call_soon_threadsafe(handle.cancel) future.add_done_callback(handle_cancel) return future
def submit(self, func, *args, **kwargs): """ Reimplemented from :class:`concurrent.futures.Executor` Schedule the `func(*args, **kwargs)` to be executed and return an :class:`Future` instance representing the result of the computation. """ with self._state_lock: if self._shutdown: raise RuntimeError("Cannot schedule new futures after " + "shutdown.") if isinstance(func, Task): warnings.warn("Use `submit_task` to run `Task`s", DeprecationWarning, stacklevel=2) f, runnable = self.__make_task_runnable(func) else: f = Future() runnable = FutureRunnable(f, func, args, kwargs) self._futures.append(f) f.add_done_callback(self._future_done) self._threadPool.start(runnable) return f
class CompositeResultFuture: def __init__(self, futures: Iterable[Future]): self._futures = tuple(futures) self._result_future = Future() self.__pending = set(range(len(self._futures))) for idx, future in enumerate(futures): if future.done(): self.__pending.remove(idx) else: future.add_done_callback(partial(self.__cb, idx=idx)) def __getitem__(self, item): return self.result()[item] def __cb(self, _future: Future, idx): self.__pending.remove(idx) if not self.__pending: self._set_result() def _set_result(self): self._result_future.set_result([f.result() for f in self._futures]) @property def futures(self) -> Tuple[Future, ...]: return self._futures def done(self) -> bool: return self._result_future.done() def result(self, timeout: Optional[int] = None): return self._result_future.result(timeout) def add_done_callback(self, fn): self._result_future.add_done_callback(fn)
def _then(future: Future, on_fulfilled, on_rejected): next_future = Promise() def callback(prev_future: Future): if prev_future.cancelled() or not prev_future.done(): next_future.cancel() return if prev_future.exception() is not None: if on_rejected is None: next_future.set_exception(prev_future.exception()) else: next_future.set_result(on_rejected(prev_future.exception())) return try: result = prev_future.result() if on_fulfilled is not None: result = on_fulfilled(result) if isinstance(result, Future): result = result.result() next_future.set_result(result) except BaseException as ex: if on_rejected is None: next_future.set_exception(ex) else: next_future.set_result(on_rejected(ex)) future.add_done_callback(callback) return next_future
def map_future(future: Future, func: Callable[[T], S]) -> "MappableFuture[S]": """ Apply function to underlying value preserving Future interface :param future: :param func: :return: new future which will be resolved once original future completes >>> fut = Future() >>> mapped = map_future(fut, lambda val: val + 10) >>> fut.set_result(32) >>> mapped.result() 42 """ new_fut: S = MappableFuture() def _do_map(f): try: if f.cancelled(): new_fut.cancel() return res = func(f.result()) new_fut.set_result(res) except Exception as e: new_fut.set_exception(e) future.add_done_callback(_do_map) return new_fut
def send_request(self, method, params=None, callback=None): """Sends a JSON RPC request to the client. Args: method(str): The method name of the message to send params(any): The payload of the message Returns: Future that will be resolved once a response has been received """ msg_id = str(uuid.uuid4()) logger.debug('Sending request with id "%s": %s %s', msg_id, method, params) request = JsonRPCRequestMessage( id=msg_id, jsonrpc=JsonRPCProtocol.VERSION, method=method, params=params ) future = Future() # If callback function is given, call it when result is received if callback: def wrapper(future: Future): result = future.result() logger.info('Client response for %s received: %s', params, result) callback(result) future.add_done_callback(wrapper) self._server_request_futures[msg_id] = future self._send_data(request) return future
def _send_request(self, request, callback=None, timeout=1000, message_type=ua.MessageType.SecureMessage): """ send request to server, lower-level method timeout is the timeout written in ua header returns future """ with self._lock: request.RequestHeader = self._create_request_header(timeout) self.logger.debug("Sending: %s", request) try: binreq = request.to_binary() except: # reset reqeust handle if any error # see self._create_request_header self._request_handle -= 1 raise self._request_id += 1 future = Future() if callback: future.add_done_callback(callback) self._callbackmap[self._request_id] = future msg = self._connection.message_to_binary( binreq, message_type=message_type, request_id=self._request_id) self._socket.write(msg) return future
def _send_request(self, request, callback=None, timeout=1000, message_type=ua.MessageType.SecureMessage): """ send request to server, lower-level method timeout is the timeout written in ua header returns future """ with self._lock: request.RequestHeader = self._create_request_header(timeout) self.logger.debug("Sending: %s", request) try: binreq = struct_to_binary(request) except Exception: # reset reqeust handle if any error # see self._create_request_header self._request_handle -= 1 raise self._request_id += 1 future = Future() if callback: future.add_done_callback(callback) self._callbackmap[self._request_id] = future # Change to the new security token if the connection has been renewed. if self._connection.next_security_token.TokenId != 0: self._connection.revolve_tokens() msg = self._connection.message_to_binary( binreq, message_type=message_type, request_id=self._request_id) self._socket.write(msg) return future
def submit(self, func, *args, **kwargs): from concurrent.futures import Future fut = Future() self.tasks[fut] = self.pool.apply_async(func, args, kwargs, fut.set_result, fut.set_exception) fut.add_done_callback(self.tasks.pop) return fut
def submit(self, func, *args, **kwargs): from concurrent.futures import Future fut = Future() self.tasks[fut] = self.pool.apply_async( func, args, kwargs, fut.set_result, fut.set_exception ) fut.add_done_callback(self.tasks.pop) return fut
def __init__(self, f: Future): self.__start = time.perf_counter() self.__f = f self.data = None self.error = None self.__end = None f.add_done_callback(self.__on_done)
def get_raster_tile(self, path: str, *, tile_bounds: Sequence[float] = None, tile_size: Sequence[int] = None, preserve_values: bool = False, asynchronous: bool = False) -> Any: future: Future[np.ma.MaskedArray] result: np.ma.MaskedArray settings = get_settings() if tile_size is None: tile_size = settings.DEFAULT_TILE_SIZE kwargs = dict( path=path, tile_bounds=tile_bounds, tile_size=tuple(tile_size), preserve_values=preserve_values, reprojection_method=settings.REPROJECTION_METHOD, resampling_method=settings.RESAMPLING_METHOD, target_crs=self._TARGET_CRS, rio_env_options=self._RIO_ENV_OPTIONS, ) cache_key = hash(ensure_hashable(kwargs)) try: with self._cache_lock: result = self._raster_cache[cache_key] except KeyError: pass else: if asynchronous: # wrap result in a future future = Future() future.set_result(result) return future else: return result retrieve_tile = functools.partial(raster.get_raster_tile, **kwargs) future = submit_to_executor(retrieve_tile) def cache_callback(future: Future) -> None: # insert result into global cache if execution was successful if future.exception() is None: self._add_to_cache(cache_key, future.result()) if asynchronous: future.add_done_callback(cache_callback) return future else: result = future.result() cache_callback(future) return result
def launch_if_ready(self, task_id): """ launch_if_ready will launch the specified task, if it is ready to run (for example, without dependencies, and in pending state). This should be called by any piece of the DataFlowKernel that thinks a task may have become ready to run. It is not an error to call launch_if_ready on a task that is not ready to run - launch_if_ready will not incorrectly launch that task. launch_if_ready is thread safe, so may be called from any thread or callback. """ if self._count_deps(self.tasks[task_id]['depends']) == 0: # We can now launch *task* new_args, kwargs, exceptions = self.sanitize_and_wrap( task_id, self.tasks[task_id]['args'], self.tasks[task_id]['kwargs']) self.tasks[task_id]['args'] = new_args self.tasks[task_id]['kwargs'] = kwargs if not exceptions: # There are no dependency errors exec_fu = None # Acquire a lock, retest the state, launch with self.tasks[task_id]['task_launch_lock']: if self.tasks[task_id]['status'] == States.pending: exec_fu = self.launch_task(task_id, self.tasks[task_id]['func'], *new_args, **kwargs) else: logger.info( "Task {} failed due to dependency failure".format(task_id)) # Raise a dependency exception self.tasks[task_id]['status'] = States.dep_fail if self.monitoring is not None: task_log_info = self._create_task_log_info(task_id, 'lazy') self.monitoring.send(MessageType.TASK_INFO, task_log_info) exec_fu = Future() exec_fu.retries_left = 0 exec_fu.set_exception( DependencyError(exceptions, task_id, None)) if exec_fu: try: exec_fu.add_done_callback( partial(self.handle_exec_update, task_id)) except Exception as e: logger.error( "add_done_callback got an exception {} which will be ignored" .format(e)) self.tasks[task_id]['exec_fu'] = exec_fu
def _copy_result(self, source: Future, destination: Future): def _done(_): if source.exception() is not None: destination.set_exception(source.exception()) else: destination.set_result(source.result()) self.queue.task_done() source.add_done_callback(_done)
def __init__(self, url: str, webloc_filepath: str, options: str, future: Future, callback: callable): self.id = ''.join( random.choice(string.ascii_lowercase + string.digits) for i in range(self.id_length)) self.url = url self.webloc_filepath = webloc_filepath self.options = options self._future = future self.callback = callback future.add_done_callback(self.done_callback)
def wait(future: Future): """Wait future object finished. :param future: Future object generated by run_in_executor*() """ sem = BoundedSemaphore() sem.acquire() future.add_done_callback( partial(lambda future, sem: sem.release(), sem=sem)) with sem: return
def add_request(self, request): def _callback(future, add_request=self.add_request): generator_or_result = future.result() if inspect.isgenerator(generator_or_result): for request in generator_or_result: add_request(request) f = Future() request.set_future(f) f.add_done_callback(_callback) self.queue.put(_WorkItem(f, request))
def _add_future_data( future: Future, return_type: Any, _from_tuple=True, viewer: Optional[viewer.Viewer] = None, source: dict = None, ): """Process a Future object. This function will be called to process function that has a return annotation of one of the `napari.types.<layer_name>Data` ... and will add the data in `result` to the current viewer as the corresponding layer type. Parameters ---------- future : Future An instance of `concurrent.futures.Future` (or any third-party) object with the same interface, that provides `add_done_callback` and `result` methods. When the future is `done()`, the `result()` will be added to the viewer. return_type : type The return annotation that was used in the decorated function. _from_tuple : bool, optional (only for internal use). True if the future returns `LayerDataTuple`, False if it returns one of the `LayerData` types. """ # when the future is done, add layer data to viewer, dispatching # to the appropriate method based on the Future data type. adder = (_add_layer_data_tuples_to_viewer if _from_tuple else _add_layer_data_to_viewer) def _on_future_ready(f: Future): adder( f.result(), return_type=return_type, viewer=viewer, source=source, ) _FUTURES.discard(future) # We need the callback to happen in the main thread... # This still works (no-op) in a headless environment, but # we could be even more granular with it, with a function # that checks if we're actually in a QApp before wrapping. # with suppress(ImportError): # from superqt.utils import ensure_main_thread # _on_future_ready = ensure_main_thread(_on_future_ready) future.add_done_callback(_on_future_ready) _FUTURES.add(future)
def _get(self): fut = item = None with self._mutex: # Critical section never blocks. if not self._queue or self._getters: fut = Future() fut.add_done_callback(lambda f: self._get_complete() if not f.cancelled() else None) self._getters.append(fut) else: item = self._queue.popleft() self._get_complete() return item, fut
def _map_future(fut: Future, func: Callable[[T], S]) -> "RPCFuture[S]": new_fut: RPCFuture[S] = RPCFuture() def _do_map(f): try: res = func(f.result()) new_fut.set_result(res) except Exception as e: new_fut.set_exception(e) fut.add_done_callback(_do_map) return new_fut
def _get(self): fut = item = None with self._mutex: # Critical section never blocks. if not self._queue or self._getters: fut = Future() fut.add_done_callback(lambda f: self._get_complete() if not f.cancelled() else None) self._getters.append(fut) else: item = self._queue.popleft() self._get_complete() return item, fut
def heartbeat(self): if self._running_evt.is_set(): tag = self._really._gen_tag() ping = { "tag": tag, "cmd": "ping", } future = Future() future.add_done_callback(self._heartbeat_callback) self.register_future(tag, Pong, future) logging.debug("Sending out a heartbeat") self._really._raw_send(ping) self._set_heartbeat_timer()
def heartbeat(self): if self._running_evt.is_set(): tag = self._really._gen_tag() ping = { "tag": tag, "cmd": "ping", } future = Future() future.add_done_callback(self._heartbeat_callback) self.register_future(tag, Pong, future) logging.debug("Sending out a heartbeat") self._really._raw_send(ping) self._set_heartbeat_timer()
def put_task(self, dp, callback=None): """ Args: dp (list): A datapoint as inputs. It could be either batched or not batched depending on the predictor implementation). callback: a thread-safe callback. When the results are ready, it will be called with the "future" object. Returns: concurrent.futures.Future: a Future of results. """ f = Future() if callback is not None: f.add_done_callback(callback) self.input_queue.put((dp, f)) return f
def task_queue(task, iterator, concurrency=10, on_fail=lambda _: None): """ Concurrent execution of task in number of pools. """ def submit(): try: obj = next(iterator) except StopIteration: return if result.cancelled(): return stats['delayed'] += 1 future = executor.submit(task, obj) future.obj = obj future.add_done_callback(upload_done) def upload_done(future): with io_lock: submit() stats['delayed'] -= 1 stats['done'] += 1 if future.exception(): on_fail(future.exception(), future.obj) if stats['delayed'] == 0: result.set_result(stats) def cleanup(_): with io_lock: executor.shutdown(wait=False) io_lock = threading.RLock() executor = ThreadPoolExecutor(concurrency) result = Future() result.stats = stats = {'done': 0, 'delayed': 0} result.add_done_callback(cleanup) with io_lock: for _ in range(concurrency): submit() return result
def submit(self, func, *args, **kwargs): """ Reimplemented from :class:`concurrent.futures.Executor` Schedule the `func(*args, **kwargs)` to be executed and return an :class:`Future` instance representing the result of the computation. """ with self._state_lock: if self._shutdown: raise RuntimeError("Cannot schedule new futures after " + "shutdown.") f = Future() runnable = FutureRunnable(f, func, args, kwargs) self._futures.append(f) f.add_done_callback(self._future_done) self._threadPool.start(runnable) return f
def submit(self, fn, *futures: Tuple[Future], mapping=None) -> Future: with self._lock: self._total += 1 if len(futures) == 0: return self._executor.submit(fn) else: depend = FuturesAggregator(*futures) proxy = Future() self._pending[proxy] = depend proxy.add_done_callback(self._proxy_done) depend.add_done_callback(partial(self._ready_callback, fn, mapping, proxy)) return proxy
def _send_request(self, request, callback=None, timeout=1000): #HACK to make sure we can convert our request to binary before increasing request counter etc ... request.to_binary() #END HACK with self._lock: request.RequestHeader = self._create_request_header(timeout) hdr = ua.Header(ua.MessageType.SecureMessage, ua.ChunkType.Single, self._security_token.ChannelId) symhdr = self._create_sym_algo_header() seqhdr = self._create_sequence_header() future = Future() if callback: future.add_done_callback(callback) self._callbackmap[seqhdr.RequestId] = future self._write_socket(hdr, symhdr, seqhdr, request) if not callback: data = future.result() self._check_answer(data, " in response to " + request.__class__.__name__) return data
def _on_message(self, from_addr, data): data = json.loads(data) if "request" in data: reply_future = Future() self.requests_received[reply_future] = (from_addr, data["request"], data.get("id")) self.ON_REQUEST.fire(reply_future, from_addr, data["request"], id) reply_future.add_done_callback(self.reply) #then(reply_future, self.reply) if "reply" in data: if "id" not in data: print "error: missing 'id' in reply" return if not (isinstance(data["id"], basestring) or type(data["id"]) is int): print "error: 'id' should be an int or a string" return if data["id"] not in self.requests_sent: print self.requests_sent print data["id"] print "error: unknown 'id'" return future, request = self.requests_sent[data["id"]] future.set_result(data["reply"])
def poll(self, timeout=-1): """Return a Future for a poll event""" future = Future() if timeout == 0: try: result = super(Poller, self).poll(0) except Exception as e: future.set_exception(e) else: future.set_result(result) return future loop = IOLoop.current() # register Future to be called as soon as any event is available on any socket # only support polling on zmq sockets, for now watcher = Future() for socket, mask in self.sockets: if mask & _zmq.POLLIN: socket._add_recv_event("poll", future=watcher) if mask & _zmq.POLLOUT: socket._add_send_event("poll", future=watcher) def on_poll_ready(f): if future.done(): return if watcher.exception(): future.set_exception(watcher.exception()) else: try: result = super(Poller, self).poll(0) except Exception as e: future.set_exception(e) else: future.set_result(result) watcher.add_done_callback(on_poll_ready) if timeout > 0: # schedule cancel to fire on poll timeout, if any def trigger_timeout(): if not watcher.done(): watcher.set_result(None) timeout_handle = loop.call_later(1e-3 * timeout, trigger_timeout) def cancel_timeout(f): loop.remove_timeout(timeout_handle) future.add_done_callback(cancel_timeout) def cancel_watcher(f): if not watcher.done(): watcher.cancel() future.add_done_callback(cancel_watcher) return future
def _send_request(self, request, callback=None, timeout=1000, message_type=ua.MessageType.SecureMessage): """ send request to server, lower-level method timeout is the timeout written in ua header returns future """ with self._lock: request.RequestHeader = self._create_request_header(timeout) try: binreq = request.to_binary() except: # reset reqeust handle if any error # see self._create_request_header self._request_handle -= 1 raise self._request_id += 1 future = Future() if callback: future.add_done_callback(callback) self._callbackmap[self._request_id] = future msg = self._connection.message_to_binary(binreq, message_type, self._request_id) self._socket.write(msg) return future
def submit(self, func, *args, **kwargs): """ Reimplemented from :class:`concurrent.futures.Executor` Schedule the `func(*args, **kwargs)` to be executed and return an :class:`Future` instance representing the result of the computation. """ with self._state_lock: if self._shutdown: raise RuntimeError("Cannot schedule new futures after " + "shutdown.") if isinstance(func, Task): warnings.warn("Use `submit_task` to run `Task`s", DeprecationWarning, stacklevel=2) f, runnable = self.__make_task_runnable(func) else: f = Future() runnable = FutureRunnable(f, func, args, kwargs) self._futures.append(f) f.add_done_callback(self._future_done) self._threadPool.start(runnable) return f
print('*crash*') return try: it = iter(obj) except TypeError: yield obj return else: yield from it #g = tokenize() #for res in g: # print(res) from concurrent.futures import Future def f(): f = future() def foo(fut): print(fut, fut.result()) f = Future() f.add_done_callback(foo) f.set_result(42)
class TaskExecutor(): """ Runs concurrent tasks - initial parameter is number of concurrent threads - run method applies task, iterator, fail_function """ _executor = None _io_lock = threading.RLock() _concurrency = 10 _task = lambda _: None _on_fail = lambda _: None _iterator = iter([]) _available = False result = Future() def __init__(self, concurrency=10): self._concurrency = concurrency self._executor = ThreadPoolExecutor(concurrency) self._available = True def _submit(self): try: obj = next(self._iterator) except StopIteration: return if self.result.cancelled(): return self.result.stats['delayed'] += 1 future = self._executor.submit(self._task, obj) future.obj = obj future.add_done_callback(self._upload_done) def _upload_done(self, future): with self._io_lock: self._submit() self.result.stats['delayed'] -= 1 self.result.stats['done'] += 1 if future.exception(): self._on_fail(future.exception(), future.obj) if self.result.stats['delayed'] == 0: self.result.set_result(self.result.stats) def _cleanup(self, _): self._available = False with self._io_lock: self._executor.shutdown(wait=False) def run(self, task, iterator, on_fail=lambda _: None): if not self._available: raise Exception("Executor is not _available") self._iterator = iterator self._task = task self._on_fail = on_fail self.result = Future() self.result.stats = {'done': 0, 'delayed': 0} self.result.add_done_callback(self._cleanup) with self._io_lock: for _ in range(self._concurrency): self._submit() return self.result
class Crawler(RabbitManager): def __init__(self, redis_url="localhost", **kwargs): self._redis_url = redis_url self._crawl_throttle = 2 # seconds # auto queue, the parent don't know what to do with it # but it won't raise errors kwargs["queue"] = "auto" super(Crawler, self).__init__(**kwargs) self.listener_promise = Future() self.listener_promise.add_done_callback(self.listener_is_done) self.listener = CrawlerRedisListener(self._redis_url, ['crawl:start', 'crawl:stop', 'hosts:add'], self.log, self.listener_promise) self.stopped = False self.publisher = CrawlerRedisPublisher(redis_url) self.running = Event() self.running.set() self._last_queue_host = None def listener_is_done(self, promise): if promise.cancelled(): # smth went wrong, shutdown crawler self.log.warning("Shutting down, lost contact with crawl listener") self.stop() else: # listener is stopped, are we stopping too? if not restart it if not self.stopped: self.listener.start() def stop(self): self.stopped = True super(Crawler, self).stop() def __enter__(self): super(Crawler, self).__enter__() self.listener.start() return self def __exit__(self, exc_type, exc_val, exc_tb): self.listener.stop() super(Crawler, self).__exit__(exc_type, exc_val, exc_tb) def should_i_crawl(self, url): host = crawlmanager.extract_hostname(url) if self.listener.host_is_being_crawled(url, host=host): return False elif (self.listener.host_last_crawl_time(url, host=host) + timedelta(seconds=self._crawl_throttle) <= datetime.now(timezone.utc)): return True else: return False def send_result(self, req, crawl_request): result = addict.Dict() result.body = req.text result.cookies = [(key, val) for key, val in req.cookies.get_dict().items()] result.url = crawl_request.url result.crawl_task = crawl_request result.actions = crawl_request.actions result.headers = req.headers result.status_code = req.status_code result.crawl_time = datetime.now(timezone.utc).isoformat() toddler.send_message_sync( self._rabbitmq_url, # this is the rabbit url ujson.dumps(result.to_dict()), exchange=self._exchange, routing_key=self._routing_key ) def send_empty_result(self, crawl_request, e): result = addict.Dict() result.body = str(e) result.status_code = 500 result.url = crawl_request.url result.crawl_task = crawl_request result.actions = crawl_request.actions result.crawal_time = datetime.now(timezone.utc).isoformat() toddler.send_message_sync( self._rabbitmq_url, ujson.dumps(result.to_dict()), exchange=self._exchange, routing_key=self._routing_key ) def another_queue(self, old_host): make_queue_name = lambda x: "CrawlRequest:{}".format(x) host_index = self.listener.hosts.index(old_host) if host_index+1 == len(self.listener.hosts): return make_queue_name(self.listener.hosts[0]) else: return make_queue_name(self.listener.hosts[1]) def on_message(self, channel, basic_deliver, properties, body): """Invoked when message received from rabbit :param pika.channel.Channel channel: :param pika.spec.Basic.Deliver basic_deliver: :param pika.spec.BasicProperties properties: :param str body: :return: """ self.log.info("Received messages # %s from %s", basic_deliver.delivery_tag, properties.app_id) try: if self._tasks_number >= self._max_tasks: raise RuntimeError("Max tasks limit reached") self._tasks_number += 1 ftr = self._executor.submit(self.process_task, body) def process_done(future: Future): nonlocal self self._tasks_number -= 1 if future.cancelled(): # process_task ended by cancel self.requeue_message(self.requeue_message( basic_deliver.delivery_tag) ) else: if future.exception(): exception = future.exception() if (not isinstance(exception, RequeueMessage) and not isinstance(exception, ChangeQueue)): self.log.exception(exception) self.requeue_message( basic_deliver.delivery_tag ) if isinstance(exception, ChangeQueue): if not self.running.is_set(): self.running.clear() self.log.info("Changing queues") self.stop_consuming() self._queue = self.another_queue( exception.host) self.running.set() else: self.acknowledge_message(basic_deliver.delivery_tag) ftr.add_done_callback(process_done) return ftr except RuntimeError: self.requeue_message(basic_deliver.delivery_tag) time.sleep(0.5) except Exception as e: self.log.exception(e) self.requeue_message(basic_deliver.delivery_tag) time.sleep(10) def run(self): """Run consumer""" self.log.info("Running consumer") connection = self.connect() """:type: pika.SelectConnection""" channel = connection.channel() self._channel = channel self._connection = connection while not self.stopped: self.running.wait(5) if self.stopped: break for method_frame, properties, body in channel.consume(self.queue): while self._tasks_number >= self._max_tasks: time.sleep(0.1) self.on_message(channel, method_frame, properties, body) time.sleep(0.1) @property def queue(self): if self._last_queue_host is None: host_index = 0 else: host_index = self.listener.hosts.index(self._last_queue_host) if host_index >= len(self.listener.hosts): host_index = 0 host = self.listener.hosts[host_index] self._last_queue_host = host return "CrawlRequestQueue_{}".format(host) def download_content(self, crawl_request): s = requests.Session() if len(crawl_request.cookies) > 0: [s.cookies.set(name, value) for name, value in crawl_request.cookies] try: if isinstance(crawl_request.referer, str): s.headers.update({'referer': crawl_request.referer}) except KeyError: pass try: method = str(crawl_request.method).upper() except KeyError: method = "GET" response = None try: if method == "POST": try: response = s.post(str(crawl_request.url), data=crawl_request.data) except KeyError: try: response = s.post(str(crawl_request.url), json=crawl_request.json) except KeyError: response = s.post(str(crawl_request.url)) else: response = s.get(str(crawl_request.url)) except TypeError as e: self.log.error("Got TypeError on url" + repr(crawl_request)) raise e except (exceptions.ConnectionError, exceptions.RequestException) as e: self.log.warning( "Connection error with {}: {}".format(crawl_request.url, str(e))) self.send_empty_result(crawl_request, e) except Exception as e: self.log.error("Exception on {} ".format(crawl_request.url)) self.log.error(repr(crawl_request)) self.log.exception(e) return method, response @json_task def process_task(self, crawl_request): """ Processes the task `We are in thread, no asyncio please, as we do not attach event loop here` :param crawl_request: :return: """ crawl_request = addict.Dict(crawl_request) wait_counter = 0 while not self.should_i_crawl(crawl_request.url): # self.log.debug("It's not the time to crawl {}".format( # crawl_request.url)) e = Event() e.wait(0.01) wait_counter += 1 if self.stopped or wait_counter > 5: # @TODO add get host raise ChangeQueue(crawlmanager.extract_hostname( crawl_request.url )) self.log.debug("Will crawl: {}".format(crawl_request.url)) self.publisher.send_start_crawl(crawl_request.url) try: if "timeout" not in crawl_request: raise KeyError timeout = dateutil.parser.parse(crawl_request.timeout) if timeout.tzinfo is None: timeout = timeout.replace(tzinfo=timezone.utc) if datetime.now(timezone.utc) <= timeout: raise RequeueMessage except (KeyError, ValueError, TypeError): # no timeout so do it asap pass method, response = self.download_content(crawl_request) self.publisher.send_end_crawl(crawl_request.url) if response is None: raise RequeueMessage self.log.info("{} - {} {}".format(method, response.status_code, crawl_request.url)) self.send_result(response, crawl_request)