def test_await_future(self): f = Future() def finish_later(): time.sleep(0.1) f.set_result('future') Thread(target=finish_later).start() assert self.client.wait([f]) assert f.done() assert f.result() == 'future'
def _on_task_done(self, future: Future): assert future.done() assert self.__task is not None assert self.__task.future is future assert self.__task.watcher.future() is future self.__task, task = None, self.__task task.deleteLater() ex = future.exception() if ex is not None: self.on_exception(ex) else: self.on_done(future.result())
class NamespacedResourceReflector(LoggingConfigurable): """ Base class for keeping a local up-to-date copy of a set of kubernetes resources. Must be subclassed once per kind of resource that needs watching. """ labels = Dict({}, config=True, help=""" Labels to reflect onto local cache """) fields = Dict({}, config=True, help=""" Fields to restrict the reflected objects """) namespace = Unicode(None, allow_none=True, help=""" Namespace to watch for resources in """) resources = Dict({}, help=""" Dictionary of resource names to the appropriate resource objects. This can be accessed across threads safely. """) kind = Unicode('resource', help=""" Human readable name for kind of object we're watching for. Used for diagnostic messages. """) list_method_name = Unicode("", help=""" Name of function (on apigroup respresented by `api_group_name`) that is to be called to list resources. This will be passed a namespace & a label selector. You most likely want something of the form list_namespaced_<resource> - for example, `list_namespaced_pod` will give you a PodReflector. This must be set by a subclass. """) api_group_name = Unicode('CoreV1Api', help=""" Name of class that represents the apigroup on which `list_method_name` is to be found. Defaults to CoreV1Api, which has everything in the 'core' API group. If you want to watch Ingresses, for example, you would have to use ExtensionsV1beta1Api """) request_timeout = Int(60, config=True, help=""" Network timeout for kubernetes watch. Trigger watch reconnect when a given request is taking too long, which can indicate network issues. """) timeout_seconds = Int(10, config=True, help=""" Timeout for kubernetes watch. Trigger watch reconnect when no watch event has been received. This will cause a full reload of the currently existing resources from the API server. """) restart_seconds = Int(30, config=True, help=""" Maximum time before restarting a watch. The watch will be restarted at least this often, even if events are still arriving. Avoids trusting kubernetes watch to yield all events, which seems to not be a safe assumption. """) on_failure = Any( help="""Function to be called when the reflector gives up.""") def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Load kubernetes config here, since this is a Singleton and # so this __init__ will be run way before anything else gets run. try: config.load_incluster_config() except config.ConfigException: config.load_kube_config() self.api = shared_client(self.api_group_name) # FIXME: Protect against malicious labels? self.label_selector = ','.join( ['{}={}'.format(k, v) for k, v in self.labels.items()]) self.field_selector = ','.join( ['{}={}'.format(k, v) for k, v in self.fields.items()]) self.first_load_future = Future() self._stop_event = threading.Event() self.start() def __del__(self): self.stop() def _list_and_update(self): """ Update current list of resources by doing a full fetch. Overwrites all current resource info. """ initial_resources = getattr(self.api, self.list_method_name)( self.namespace, label_selector=self.label_selector, field_selector=self.field_selector, _request_timeout=self.request_timeout, _preload_content=False, ) # This is an atomic operation on the dictionary! initial_resources = json.loads(initial_resources.read()) self.resources = { p["metadata"]["name"]: p for p in initial_resources["items"] } # return the resource version so we can hook up a watch return initial_resources["metadata"]["resourceVersion"] def _watch_and_update(self): """ Keeps the current list of resources up-to-date This method is to be run not on the main thread! We first fetch the list of current resources, and store that. Then we register to be notified of changes to those resources, and keep our local store up-to-date based on these notifications. We also perform exponential backoff, giving up after we hit 32s wait time. This should protect against network connections dropping and intermittent unavailability of the api-server. Every time we recover from an exception we also do a full fetch, to pick up changes that might've been missed in the time we were not doing a watch. Note that we're playing a bit with fire here, by updating a dictionary in this thread while it is probably being read in another thread without using locks! However, dictionary access itself is atomic, and as long as we don't try to mutate them (do a 'fetch / modify / update' cycle on them), we should be ok! """ selectors = [] log_name = "" if self.label_selector: selectors.append("label selector=%r" % self.label_selector) if self.field_selector: selectors.append("field selector=%r" % self.field_selector) log_selector = ', '.join(selectors) cur_delay = 0.1 self.log.info( "watching for %s with %s in namespace %s", self.kind, log_selector, self.namespace, ) while True: self.log.debug("Connecting %s watcher", self.kind) start = time.monotonic() w = watch.Watch() try: resource_version = self._list_and_update() if not self.first_load_future.done(): # signal that we've loaded our initial data self.first_load_future.set_result(None) watch_args = { 'namespace': self.namespace, 'label_selector': self.label_selector, 'field_selector': self.field_selector, 'resource_version': resource_version, } if self.request_timeout: # set network receive timeout watch_args['_request_timeout'] = self.request_timeout if self.timeout_seconds: # set watch timeout watch_args['timeout_seconds'] = self.timeout_seconds method = partial(getattr(self.api, self.list_method_name), _preload_content=False) # in case of timeout_seconds, the w.stream just exits (no exception thrown) # -> we stop the watcher and start a new one for watch_event in w.stream(method, **watch_args): # Remember that these events are k8s api related WatchEvents # objects, not k8s Event or Pod representations, they will # reside in the WatchEvent's object field depending on what # kind of resource is watched. # # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.16/#watchevent-v1-meta # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.16/#event-v1-core cur_delay = 0.1 resource = watch_event['object'] if watch_event['type'] == 'DELETED': # This is an atomic delete operation on the dictionary! self.resources.pop(resource["metadata"]["name"], None) else: # This is an atomic operation on the dictionary! self.resources[resource["metadata"]["name"]] = resource if self._stop_event.is_set(): self.log.info("%s watcher stopped", self.kind) break watch_duration = time.monotonic() - start if watch_duration >= self.restart_seconds: self.log.debug( "Restarting %s watcher after %i seconds", self.kind, watch_duration, ) break except ReadTimeoutError: # network read time out, just continue and restart the watch # this could be due to a network problem or just low activity self.log.warning("Read timeout watching %s, reconnecting", self.kind) continue except Exception: cur_delay = cur_delay * 2 if cur_delay > 30: self.log.exception( "Watching resources never recovered, giving up") if self.on_failure: self.on_failure() return self.log.exception( "Error when watching resources, retrying in %ss", cur_delay) time.sleep(cur_delay) continue else: # no events on watch, reconnect self.log.debug("%s watcher timeout", self.kind) finally: w.stop() if self._stop_event.is_set(): self.log.info("%s watcher stopped", self.kind) break self.log.warning("%s watcher finished", self.kind) def start(self): """ Start the reflection process! We'll do a blocking read of all resources first, so that we don't race with any operations that are checking the state of the pod store - such as polls. This should be called only once at the start of program initialization (when the singleton is being created), and not afterwards! """ if hasattr(self, 'watch_thread'): raise ValueError( 'Thread watching for resources is already running') self._list_and_update() self.watch_thread = threading.Thread(target=self._watch_and_update) # If the watch_thread is only thread left alive, exit app self.watch_thread.daemon = True self.watch_thread.start() def stop(self): self._stop_event.set() def stopped(self): return self._stop_event.is_set()
class KernelManager(ConnectionFileMixin): """Manages a single kernel in a subprocess on this host. This version starts kernels with Popen. """ def __init__(self, *args, **kwargs): super().__init__(**kwargs) self._shutdown_status = _ShutdownStatus.Unset try: self._ready = Future() except RuntimeError: # No event loop running, use concurrent future self._ready = CFuture() _created_context: Bool = Bool(False) # The PyZMQ Context to use for communication with the kernel. context: Instance = Instance(zmq.Context) @default("context") def _context_default(self) -> zmq.Context: self._created_context = True return zmq.Context() # the class to create with our `client` method client_class: DottedObjectName = DottedObjectName( "jupyter_client.blocking.BlockingKernelClient") client_factory: Type = Type(klass="jupyter_client.KernelClient") @default("client_factory") def _client_factory_default(self) -> Type: return import_item(self.client_class) @observe("client_class") def _client_class_changed(self, change: t.Dict[str, DottedObjectName]) -> None: self.client_factory = import_item(str(change["new"])) kernel_id: str = Unicode(None, allow_none=True) # The kernel provisioner with which this KernelManager is communicating. # This will generally be a LocalProvisioner instance unless the kernelspec # indicates otherwise. provisioner: t.Optional[KernelProvisionerBase] = None kernel_spec_manager: Instance = Instance(kernelspec.KernelSpecManager) @default("kernel_spec_manager") def _kernel_spec_manager_default(self) -> kernelspec.KernelSpecManager: return kernelspec.KernelSpecManager(data_dir=self.data_dir) @observe("kernel_spec_manager") @observe_compat def _kernel_spec_manager_changed(self, change: t.Dict[str, Instance]) -> None: self._kernel_spec = None shutdown_wait_time: Float = Float( 5.0, config=True, help="Time to wait for a kernel to terminate before killing it, " "in seconds. When a shutdown request is initiated, the kernel " "will be immediately sent an interrupt (SIGINT), followed" "by a shutdown_request message, after 1/2 of `shutdown_wait_time`" "it will be sent a terminate (SIGTERM) request, and finally at " "the end of `shutdown_wait_time` will be killed (SIGKILL). terminate " "and kill may be equivalent on windows. Note that this value can be" "overridden by the in-use kernel provisioner since shutdown times may" "vary by provisioned environment.", ) kernel_name: Unicode = Unicode(kernelspec.NATIVE_KERNEL_NAME) @observe("kernel_name") def _kernel_name_changed(self, change: t.Dict[str, Unicode]) -> None: self._kernel_spec = None if change["new"] == "python": self.kernel_name = kernelspec.NATIVE_KERNEL_NAME _kernel_spec: t.Optional[kernelspec.KernelSpec] = None @property def kernel_spec(self) -> t.Optional[kernelspec.KernelSpec]: if self._kernel_spec is None and self.kernel_name != "": self._kernel_spec = self.kernel_spec_manager.get_kernel_spec( self.kernel_name) return self._kernel_spec kernel_cmd = List(Unicode(), help="""The Popen Command to launch the kernel.""") extra_env = Dict( help="""Extra environment variables to be set for the kernel.""") @property def ready(self) -> Future: """A future that resolves when the kernel process has started for the first time""" return self._ready @property def ipykernel(self) -> bool: return self.kernel_name in {"python", "python2", "python3"} # Protected traits _launch_args: Any = Any() _control_socket: Any = Any() _restarter: Any = Any() autorestart: Bool = Bool( True, config=True, help="""Should we autorestart the kernel if it dies.""") shutting_down: bool = False def __del__(self) -> None: self._close_control_socket() self.cleanup_connection_file() # -------------------------------------------------------------------------- # Kernel restarter # -------------------------------------------------------------------------- def start_restarter(self) -> None: pass def stop_restarter(self) -> None: pass def add_restart_callback(self, callback: t.Callable, event: str = "restart") -> None: """register a callback to be called when a kernel is restarted""" if self._restarter is None: return self._restarter.add_callback(callback, event) def remove_restart_callback(self, callback: t.Callable, event: str = "restart") -> None: """unregister a callback to be called when a kernel is restarted""" if self._restarter is None: return self._restarter.remove_callback(callback, event) # -------------------------------------------------------------------------- # create a Client connected to our Kernel # -------------------------------------------------------------------------- def client(self, **kwargs) -> KernelClient: """Create a client configured to connect to our kernel""" kw = {} kw.update(self.get_connection_info(session=True)) kw.update(dict( connection_file=self.connection_file, parent=self, )) # add kwargs last, for manual overrides kw.update(kwargs) return self.client_factory(**kw) # -------------------------------------------------------------------------- # Kernel management # -------------------------------------------------------------------------- def format_kernel_cmd( self, extra_arguments: t.Optional[t.List[str]] = None) -> t.List[str]: """replace templated args (e.g. {connection_file})""" extra_arguments = extra_arguments or [] assert self.kernel_spec is not None cmd = self.kernel_spec.argv + extra_arguments if cmd and cmd[0] in { "python", "python%i" % sys.version_info[0], "python%i.%i" % sys.version_info[:2], }: # executable is 'python' or 'python3', use sys.executable. # These will typically be the same, # but if the current process is in an env # and has been launched by abspath without # activating the env, python on PATH may not be sys.executable, # but it should be. cmd[0] = sys.executable # Make sure to use the realpath for the connection_file # On windows, when running with the store python, the connection_file path # is not usable by non python kernels because the path is being rerouted when # inside of a store app. # See this bug here: https://bugs.python.org/issue41196 ns = dict( connection_file=os.path.realpath(self.connection_file), prefix=sys.prefix, ) if self.kernel_spec: ns["resource_dir"] = self.kernel_spec.resource_dir ns.update(self._launch_args) pat = re.compile(r"\{([A-Za-z0-9_]+)\}") def from_ns(match): """Get the key out of ns if it's there, otherwise no change.""" return ns.get(match.group(1), match.group()) return [pat.sub(from_ns, arg) for arg in cmd] async def _async_launch_kernel(self, kernel_cmd: t.List[str], **kw) -> None: """actually launch the kernel override in a subclass to launch kernel subprocesses differently Note that provisioners can now be used to customize kernel environments and """ assert self.provisioner is not None connection_info = await self.provisioner.launch_kernel( kernel_cmd, **kw) assert self.provisioner.has_process # Provisioner provides the connection information. Load into kernel manager and write file. self._force_connection_info(connection_info) _launch_kernel = run_sync(_async_launch_kernel) # Control socket used for polite kernel shutdown def _connect_control_socket(self) -> None: if self._control_socket is None: self._control_socket = self._create_connected_socket("control") self._control_socket.linger = 100 def _close_control_socket(self) -> None: if self._control_socket is None: return self._control_socket.close() self._control_socket = None async def _async_pre_start_kernel( self, **kw) -> t.Tuple[t.List[str], t.Dict[str, t.Any]]: """Prepares a kernel for startup in a separate process. If random ports (port=0) are being used, this method must be called before the channels are created. Parameters ---------- `**kw` : optional keyword arguments that are passed down to build the kernel_cmd and launching the kernel (e.g. Popen kwargs). """ self.shutting_down = False self.kernel_id = self.kernel_id or kw.pop('kernel_id', str( uuid.uuid4())) # save kwargs for use in restart self._launch_args = kw.copy() # build the Popen cmd extra_arguments = kw.pop('extra_arguments', []) kernel_cmd = self.format_kernel_cmd(extra_arguments=extra_arguments) env = kw.pop('env', os.environ).copy() # Don't allow PYTHONEXECUTABLE to be passed to kernel process. # If set, it can bork all the things. env.pop('PYTHONEXECUTABLE', None) if not self.kernel_cmd: # If kernel_cmd has been set manually, don't refer to a kernel spec # Environment variables from kernel spec are added to os.environ env.update(self.kernel_spec.env or {}) elif self.extra_env: env.update(self.extra_env) # launch the kernel subprocess self.log.debug("Starting kernel: %s", kernel_cmd) self.kernel = self._launch_kernel(kernel_cmd, env=env, **kw) self.start_restarter() self._connect_control_socket() assert self.provisioner is not None await self.provisioner.post_launch(**kw) post_start_kernel = run_sync(_async_post_start_kernel) async def _async_start_kernel(self, **kw): """Starts a kernel on this host in a separate process. If random ports (port=0) are being used, this method must be called before the channels are created. Parameters ---------- `**kw` : optional keyword arguments that are passed down to build the kernel_cmd and launching the kernel (e.g. Popen kwargs). """ done = self._ready.done() try: kernel_cmd, kw = await ensure_async(self.pre_start_kernel(**kw)) # launch the kernel subprocess self.log.debug("Starting kernel: %s", kernel_cmd) await ensure_async(self._launch_kernel(kernel_cmd, **kw)) await ensure_async(self.post_start_kernel(**kw)) if not done: # Add a small sleep to ensure tests can capture the state before done await asyncio.sleep(0.01) self._ready.set_result(None) except Exception as e: if not done: self._ready.set_exception(e) self.log.exception(self._ready.exception()) raise e start_kernel = run_sync(_async_start_kernel) async def _async_request_shutdown(self, restart: bool = False) -> None: """Send a shutdown request via control channel""" content = dict(restart=restart) msg = self.session.msg("shutdown_request", content=content) # ensure control socket is connected self._connect_control_socket() self.session.send(self._control_socket, msg) assert self.provisioner is not None await self.provisioner.shutdown_requested(restart=restart) self._shutdown_status = _ShutdownStatus.ShutdownRequest request_shutdown = run_sync(_async_request_shutdown) async def _async_finish_shutdown( self, waittime: t.Optional[float] = None, pollinterval: float = 0.1, restart: t.Optional[bool] = False, ) -> None: """Wait for kernel shutdown, then kill process if it doesn't shutdown. This does not send shutdown requests - use :meth:`request_shutdown` first. """ if waittime is None: waittime = max(self.shutdown_wait_time, 0) if self.provisioner: # Allow provisioner to override waittime = self.provisioner.get_shutdown_wait_time( recommended=waittime) try: await asyncio.wait_for(self._async_wait(pollinterval=pollinterval), timeout=waittime / 2) except asyncio.TimeoutError: self.log.debug("Kernel is taking too long to finish, terminating") self._shutdown_status = _ShutdownStatus.SigtermRequest await ensure_async(self._send_kernel_sigterm()) try: await asyncio.wait_for(self._async_wait(pollinterval=pollinterval), timeout=waittime / 2) except asyncio.TimeoutError: self.log.debug("Kernel is taking too long to finish, killing") self._shutdown_status = _ShutdownStatus.SigkillRequest await ensure_async(self._kill_kernel(restart=restart)) else: # Process is no longer alive, wait and clear if self.has_kernel: assert self.provisioner is not None await self.provisioner.wait() finish_shutdown = run_sync(_async_finish_shutdown) async def _async_cleanup_resources(self, restart: bool = False) -> None: """Clean up resources when the kernel is shut down""" if not restart: self.cleanup_connection_file() self.cleanup_ipc_files() self._close_control_socket() self.session.parent = None if self._created_context and not restart: self.context.destroy(linger=100) if self.provisioner: await self.provisioner.cleanup(restart=restart) cleanup_resources = run_sync(_async_cleanup_resources) async def _async_shutdown_kernel(self, now: bool = False, restart: bool = False): """Attempts to stop the kernel process cleanly. This attempts to shutdown the kernels cleanly by: 1. Sending it a shutdown message over the control channel. 2. If that fails, the kernel is shutdown forcibly by sending it a signal. Parameters ---------- now : bool Should the kernel be forcible killed *now*. This skips the first, nice shutdown attempt. restart: bool Will this kernel be restarted after it is shutdown. When this is True, connection files will not be cleaned up. """ # Shutdown is a no-op for a kernel that had a failed startup if self._ready.exception(): return self.shutting_down = True # Used by restarter to prevent race condition # Stop monitoring for restarting while we shutdown. self.stop_restarter() await ensure_async(self.interrupt_kernel()) if now: await ensure_async(self._kill_kernel()) else: await ensure_async(self.request_shutdown(restart=restart)) # Don't send any additional kernel kill messages immediately, to give # the kernel a chance to properly execute shutdown actions. Wait for at # most 1s, checking every 0.1s. await ensure_async(self.finish_shutdown(restart=restart)) await ensure_async(self.cleanup_resources(restart=restart)) shutdown_kernel = run_sync(_async_shutdown_kernel) async def _async_restart_kernel(self, now: bool = False, newports: bool = False, **kw) -> None: """Restarts a kernel with the arguments that were used to launch it. Parameters ---------- now : bool, optional If True, the kernel is forcefully restarted *immediately*, without having a chance to do any cleanup action. Otherwise the kernel is given 1s to clean up before a forceful restart is issued. In all cases the kernel is restarted, the only difference is whether it is given a chance to perform a clean shutdown or not. newports : bool, optional If the old kernel was launched with random ports, this flag decides whether the same ports and connection file will be used again. If False, the same ports and connection file are used. This is the default. If True, new random port numbers are chosen and a new connection file is written. It is still possible that the newly chosen random port numbers happen to be the same as the old ones. `**kw` : optional Any options specified here will overwrite those used to launch the kernel. """ if self._launch_args is None: raise RuntimeError("Cannot restart the kernel. " "No previous call to 'start_kernel'.") if not self._ready.done(): raise RuntimeError("Cannot restart the kernel. " "Kernel has not fully started.") # Stop currently running kernel. await ensure_async(self.shutdown_kernel(now=now, restart=True)) if newports: self.cleanup_random_ports() # Start new kernel. self._launch_args.update(kw) await ensure_async(self.start_kernel(**self._launch_args)) restart_kernel = run_sync(_async_restart_kernel) @property def has_kernel(self) -> bool: """Has a kernel process been started that we are actively managing.""" return self.provisioner is not None and self.provisioner.has_process async def _async_send_kernel_sigterm(self, restart: bool = False) -> None: """similar to _kill_kernel, but with sigterm (not sigkill), but do not block""" if self.has_kernel: assert self.provisioner is not None await self.provisioner.terminate(restart=restart) _send_kernel_sigterm = run_sync(_async_send_kernel_sigterm) async def _async_kill_kernel(self, restart: bool = False) -> None: """Kill the running kernel. This is a private method, callers should use shutdown_kernel(now=True). """ if self.has_kernel: assert self.provisioner is not None await self.provisioner.kill(restart=restart) # Wait until the kernel terminates. try: await asyncio.wait_for(self._async_wait(), timeout=5.0) except asyncio.TimeoutError: # Wait timed out, just log warning but continue - not much more we can do. self.log.warning( "Wait for final termination of kernel timed out - continuing..." ) pass else: # Process is no longer alive, wait and clear if self.has_kernel: await self.provisioner.wait() _kill_kernel = run_sync(_async_kill_kernel) async def _async_interrupt_kernel(self) -> None: """Interrupts the kernel by sending it a signal. Unlike ``signal_kernel``, this operation is well supported on all platforms. """ if self.has_kernel: interrupt_mode = self.kernel_spec.interrupt_mode if interrupt_mode == 'signal': if sys.platform == 'win32': from .win_interrupt import send_interrupt send_interrupt(self.kernel.win32_interrupt_event) else: self.signal_kernel(signal.SIGINT) elif interrupt_mode == 'message': msg = self.session.msg("interrupt_request", content={}) self._connect_control_socket() self.session.send(self._control_socket, msg) else: raise RuntimeError( "Cannot interrupt kernel. No kernel is running!") interrupt_kernel = run_sync(_async_interrupt_kernel) async def _async_signal_kernel(self, signum: int) -> None: """Sends a signal to the process group of the kernel (this usually includes the kernel and any subprocesses spawned by the kernel). Note that since only SIGTERM is supported on Windows, this function is only useful on Unix systems. """ if self.has_kernel: assert self.provisioner is not None await self.provisioner.send_signal(signum) else: raise RuntimeError("Cannot signal kernel. No kernel is running!") signal_kernel = run_sync(_async_signal_kernel) async def _async_is_alive(self) -> bool: """Is the kernel process still running?""" if self.has_kernel: assert self.provisioner is not None ret = await self.provisioner.poll() if ret is None: return True return False is_alive = run_sync(_async_is_alive) async def _async_wait(self, pollinterval: float = 0.1) -> None: # Use busy loop at 100ms intervals, polling until the process is # not alive. If we find the process is no longer alive, complete # its cleanup via the blocking wait(). Callers are responsible for # issuing calls to wait() using a timeout (see _kill_kernel()). while await ensure_async(self.is_alive()): await asyncio.sleep(pollinterval)
class ClientOperation(Operation): """ Base class for a client operation. Nearly all functions are private/protected. Child classes should rewrite public API to properly document the types they deal with. """ def __init__(self, stream_handler: StreamResponseHandler, shape_index: ShapeIndex, connection: Connection): # do not instantiate directly, created by ServiceClient.new_operation() # all callbacks that modify state fire on the same thread, # so don't need locks to protect members self._stream_handler = stream_handler self._shape_index = shape_index self._message_count = 0 self._closed_future = Future() self._closed_future.set_running_or_notify_cancel() # prevent cancel self._initial_response_future = Future() self._initial_response_future.set_running_or_notify_cancel( ) # prevent cancel self._protocol_handler = _ProtocolContinuationHandler(self) self._continuation = connection._new_stream(self._protocol_handler) def _activate(self, request: Shape) -> Future: headers = [ Header.from_string(CONTENT_TYPE_HEADER, CONTENT_TYPE_APPLICATION_JSON), Header.from_string(SERVICE_MODEL_TYPE_HEADER, request._model_name()) ] payload = self._json_payload_from_shape(request) logger.debug("%r sending request APPLICATION_MESSAGE %s %r", self, headers, payload) return self._continuation.activate( operation=self._model_name(), headers=headers, payload=payload, message_type=protocol.MessageType.APPLICATION_MESSAGE) def _send_stream_event(self, event: Shape) -> Future: headers = [ Header.from_string(CONTENT_TYPE_HEADER, CONTENT_TYPE_APPLICATION_JSON), Header.from_string(SERVICE_MODEL_TYPE_HEADER, event._model_name()) ] payload = self._json_payload_from_shape(event) logger.debug("%r sending event APPLICATION_MESSAGE %s %r", self, headers, payload) return self._continuation.send_message( headers=headers, payload=payload, message_type=protocol.MessageType.APPLICATION_MESSAGE) def _get_response(self) -> Future: return self._initial_response_future def close(self) -> Future: try: # try to send empty APPLICATION_MESSAGE with TERMINATE_STREAM flag. # this fails if stream is already closed, so just ignore errors. self._continuation.send_message( message_type=protocol.MessageType.APPLICATION_MESSAGE, flags=protocol.MessageFlag.TERMINATE_STREAM) except Exception: pass return self._closed_future def _find_header(self, headers, name, header_type=HeaderType.STRING): """Return header value, or None""" name_lower = name.lower() for header in headers: if header.name.lower() == name_lower: if header.type == header_type: return header.value return None def _shape_from_json_payload(self, payload_bytes, shape_type): try: payload_str = payload_bytes.decode() payload_obj = json.loads(payload_str) shape = shape_type._from_payload(payload_obj) return shape except Exception as e: raise DeserializeError( "Failed to deserialize %s" % shape_type._model_name(), e, payload_bytes) def _json_payload_from_shape(self, shape): try: payload_obj = shape._to_payload() payload_str = json.dumps(payload_obj) payload_bytes = payload_str.encode() return payload_bytes except Exception as e: raise SerializeError("Failed to serialize", shape, e) def _on_continuation_message(self, headers: Sequence[Header], payload: bytes, message_type: protocol.MessageType, flags: int, **kwargs): self._message_count += 1 logger.debug("%r received #%d %s %s %r", self, self._message_count, message_type.name, headers, payload) try: model_name = self._find_header(headers, SERVICE_MODEL_TYPE_HEADER) if model_name is None: if flags & protocol.MessageFlag.TERMINATE_STREAM: # it's ok for a TERMINATE_STREAM message to be empty return msg = "Missing header: " + SERVICE_MODEL_TYPE_HEADER raise UnmappedDataError(msg, headers, payload) content_type = self._find_header(headers, CONTENT_TYPE_HEADER) if content_type is None: msg = "Missing header: " + CONTENT_TYPE_HEADER raise UnmappedDataError(msg, headers, payload) if content_type != CONTENT_TYPE_APPLICATION_JSON: msg = "Unexpected {}: '{}', expected: '{}'".format( CONTENT_TYPE_HEADER, content_type, CONTENT_TYPE_APPLICATION_JSON) raise UnmappedDataError(msg, headers, payload) if message_type == protocol.MessageType.APPLICATION_MESSAGE: self._handle_data(model_name, payload) return # otherwise it's an APPLICATION_ERROR found_type = self._shape_index.find_shape_type(model_name) if found_type is None: msg = "Unknown error type: {}".format(model_name) raise UnmappedDataError(msg, payload) if not issubclass(found_type, Exception): msg = "Unexpected type: {} sent as APPLICATION_ERROR, expected subclass of Exception".format( model_name) raise UnmappedDataError(msg, payload) shape = self._shape_from_json_payload(payload, found_type) raise shape except Exception as e: self._handle_error(e, flags) def _handle_data(self, model_name, payload): """ Pass APPLICATION_MESSAGE payload along as a 1st response, or subsequent stream-event. Any exceptions raised by this function will be passed to _handle_error(). """ if self._message_count == 1: # 1st message is "response" expected_type = self._response_type() expected_name = expected_type._model_name() if model_name != expected_name: msg = "Unexpected response type: {}, expected: {}".format( model_name, expected_name) raise UnmappedDataError(msg, payload) shape = self._shape_from_json_payload(payload, expected_type) self._initial_response_future.set_result(shape) else: # messages after the 1st are "stream events" expected_type = self._response_stream_type() if expected_type is None: msg = "Operation does not support response stream events, received type: {}".format( model_name) raise UnmappedDataError(msg, payload) expected_name = expected_type._model_name() if model_name != expected_name: msg = "Unexpected response stream event type: {}, expected: {}".format( model_name, expected_name) raise UnmappedDataError(msg, payload) shape = self._shape_from_json_payload(payload, expected_type) self._stream_handler.on_stream_event(shape) def _handle_error(self, error, message_flags): """ Pass along an APPLICATION_ERROR payload, or an exception encountered while processing an APPLICATION_MESSAGE, as a failed 1st response or a stream-error. """ stream_already_terminated = message_flags & protocol.MessageFlag.TERMINATE_STREAM try: if self._message_count == 1: # error from 1st message is "response" error. self._initial_response_future.set_exception(error) # errors on initial response must terminate the stream if not stream_already_terminated: self.close() elif self._stream_handler is not None: # error from subsequent messages are "stream errors" # If this callback returns True (or forgets to return a value) # then close the stream return_val = self._stream_handler.on_stream_error(error) if return_val or return_val is None: if not stream_already_terminated: self.close() else: # this operation did not expect more than 1 message raise error except Exception: logger.exception("%r unhandled exception while receiving message", self) def _on_continuation_closed(self, **kwargs) -> None: logger.debug("%r closed", self) if not self._initial_response_future.done(): self._initial_response_future.set_exception(StreamClosedError()) self._closed_future.set_result(None) if self._stream_handler: try: self._stream_handler.on_stream_closed() except Exception: logger.exception("%r unhandled exception calling callback", self)
def test_future_result_now(): fut = Future() assert not fut.done() corocc.start(result_now(), future=fut) assert fut.done() assert fut.result() == 42
class NamespacedResourceReflector(LoggingConfigurable): """ Base class for keeping a local up-to-date copy of a set of kubernetes resources. Must be subclassed once per kind of resource that needs watching. """ labels = Dict( {}, config=True, help=""" Labels to reflect onto local cache """ ) namespace = Unicode( None, allow_none=True, help=""" Namespace to watch for resources in """ ) resources = Dict( {}, help=""" Dictionary of resource names to the appropriate resource objects. This can be accessed across threads safely. """ ) kind = Unicode( 'resource', help=""" Human readable name for kind of object we're watching for. Used for diagnostic messages. """ ) list_method_name = Unicode( "", help=""" Name of function (on apigroup respresented by `api_group_name`) that is to be called to list resources. This will be passed a namespace & a label selector. You most likely want something of the form list_namespaced_<resource> - for example, `list_namespaced_pod` will give you a PodReflector. This must be set by a subclass. """ ) api_group_name = Unicode( 'CoreV1Api', help=""" Name of class that represents the apigroup on which `list_method_name` is to be found. Defaults to CoreV1Api, which has everything in the 'core' API group. If you want to watch Ingresses, for example, you would have to use ExtensionsV1beta1Api """ ) on_failure = Any(help="""Function to be called when the reflector gives up.""") def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Load kubernetes config here, since this is a Singleton and # so this __init__ will be run way before anything else gets run. try: config.load_incluster_config() except config.ConfigException: config.load_kube_config() self.api = shared_client(self.api_group_name) # FIXME: Protect against malicious labels? self.label_selector = ','.join(['{}={}'.format(k, v) for k, v in self.labels.items()]) self.first_load_future = Future() self.start() def _list_and_update(self): """ Update current list of resources by doing a full fetch. Overwrites all current resource info. """ initial_resources = getattr(self.api, self.list_method_name)( self.namespace, label_selector=self.label_selector ) # This is an atomic operation on the dictionary! self.resources = {p.metadata.name: p for p in initial_resources.items} # return the resource version so we can hook up a watch return initial_resources.metadata.resource_version def _watch_and_update(self): """ Keeps the current list of resources up-to-date This method is to be run not on the main thread! We first fetch the list of current resources, and store that. Then we register to be notified of changes to those resources, and keep our local store up-to-date based on these notifications. We also perform exponential backoff, giving up after we hit 32s wait time. This should protect against network connections dropping and intermittent unavailability of the api-server. Every time we recover from an exception we also do a full fetch, to pick up changes that might've been missed in the time we were not doing a watch. Note that we're playing a bit with fire here, by updating a dictionary in this thread while it is probably being read in another thread without using locks! However, dictionary access itself is atomic, and as long as we don't try to mutate them (do a 'fetch / modify / update' cycle on them), we should be ok! """ cur_delay = 0.1 while True: self.log.info("watching for %s with label selector %s in namespace %s", self.kind, self.label_selector, self.namespace) w = watch.Watch() try: resource_version = self._list_and_update() if not self.first_load_future.done(): # signal that we've loaded our initial data self.first_load_future.set_result(None) for ev in w.stream( getattr(self.api, self.list_method_name), self.namespace, label_selector=self.label_selector, resource_version=resource_version, ): cur_delay = 0.1 resource = ev['object'] if ev['type'] == 'DELETED': # This is an atomic delete operation on the dictionary! self.resources.pop(resource.metadata.name, None) else: # This is an atomic operation on the dictionary! self.resources[resource.metadata.name] = resource except Exception: cur_delay = cur_delay * 2 if cur_delay > 30: self.log.exception("Watching resources never recovered, giving up") if self.on_failure: self.on_failure() return self.log.exception("Error when watching resources, retrying in %ss", cur_delay) time.sleep(cur_delay) continue finally: w.stop() def start(self): """ Start the reflection process! We'll do a blocking read of all resources first, so that we don't race with any operations that are checking the state of the pod store - such as polls. This should be called only once at the start of program initialization (when the singleton is being created), and not afterwards! """ if hasattr(self, 'watch_thread'): raise ValueError('Thread watching for resources is already running') self._list_and_update() self.watch_thread = threading.Thread(target=self._watch_and_update) # If the watch_thread is only thread left alive, exit app self.watch_thread.daemon = True self.watch_thread.start()
class SystemdJobHandler(object): """An utility for waiting for one or more systemd jobs. Usage: with SystemdJobHandler() as job_handler: job_path = job_handler.manager.$do_something_to_create_a_job job_handler.register_job(job_path) # Can register more parallel jobs like this job_results = yield job_handler.all_jobs_done_future() job_results will be a dictionary, in SYSTEMD_MANAGER_INTERFACE.JobRemoved terms job_results[unit] = result """ def __init__(self): self.__future = Future() self.__pending_jobs = set() self.__job_results = {} self.__signal_match = None bus = slip.dbus.SystemBus() manager_object = bus.get_object(SYSTEMD_MANAGER_NAME, SYSTEMD_MANAGER_PATH) self.__manager = dbus.Interface(manager_object, SYSTEMD_MANAGER_INTERFACE) def __job_removed_handler(self, job_id, job_path, unit, result): """SYSTEMD_MANAGER_INTERFACE.JobRemoved signal handler""" log.debug1("systemd JobRemoved signal: %s" % repr( (job_id, job_path, unit, result))) if job_path in self.__pending_jobs: self.__job_results[unit] = result self.__pending_jobs.remove(job_path) if len(self.__pending_jobs) == 0: self.__future.set_result(self.__job_results) # We use the context manager protocol to ensure the signal registration is # correctly removed. def __enter__(self): assert self.__signal_match is None, "Recursive use of SystemdJobProcessor" assert not self.__future.done(), "Repeated use of SystemdJobProcessor" self.__signal_match = self.__manager.connect_to_signal( "JobRemoved", self.__job_removed_handler) return self # To allow “with SystemdJobHandler as job_handler:”… def __exit__(self, *args): self.__signal_match.remove() self.__signal_match = None return False # This is not strictly speaking a necessary part of the API, but since we # need the interface object for ourselves and the caller needs it as well, # let’s make it available. @property def manager(self): """A dbus.Interface object for SYSTEMD_MANAGER_INTERFACE.""" return self.__manager def register_job(self, job_path): """Register a job to be followed to completion. :param job_path: A path of the job object. Make sure to provide the path soon after receiving it (in particular before allowing any D-Bus signals to be processed). """ assert self.__signal_match is not None, \ "Registering for jobs when not watching for results" self.__pending_jobs.add(job_path) def all_jobs_done_future(self): """Return a future for results of registered jobs. :returns: a future. The value eventually set as a result is a dictionary of unit name -> job result string. """ assert self.__signal_match is not None and len( self.__pending_jobs) != 0 return self.__future
def done_callback(self, future: Future): success = future.done() and future.exception() == None self.callback(success, self.url, self.webloc_filepath)
def test_futures(self): f = Future() self.assertEqual(f.done(), False) self.assertEqual(f.running(), False) self.assertTrue(f.cancel()) self.assertTrue(f.cancelled()) with self.assertRaises(CancelledError): f.result() with self.assertRaises(CancelledError): f.exception() f = Future() f.set_running_or_notify_cancel() with self.assertRaises(TimeoutError): f.result(0.1) with self.assertRaises(TimeoutError): f.exception(0.1) f = Future() f.set_running_or_notify_cancel() f.set_result("result") self.assertEqual(f.result(), "result") self.assertEqual(f.exception(), None) f = Future() f.set_running_or_notify_cancel() f.set_exception(Exception("foo")) with self.assertRaises(Exception): f.result() class Ref(): def __init__(self, ref): self.ref = ref def set(self, ref): self.ref = ref # Test that done callbacks are called. called = Ref(False) f = Future() f.add_done_callback(lambda f: called.set(True)) f.set_result(None) self.assertTrue(called.ref) # Test that callbacks are called when cancelled. called = Ref(False) f = Future() f.add_done_callback(lambda f: called.set(True)) f.cancel() self.assertTrue(called.ref) # Test that callbacks are called immediately when the future is # already done. called = Ref(False) f = Future() f.set_result(None) f.add_done_callback(lambda f: called.set(True)) self.assertTrue(called.ref) count = Ref(0) f = Future() f.add_done_callback(lambda f: count.set(count.ref + 1)) f.add_done_callback(lambda f: count.set(count.ref + 1)) f.set_result(None) self.assertEqual(count.ref, 2) # Test that the callbacks are called with the future as argument. done_future = Ref(None) f = Future() f.add_done_callback(lambda f: done_future.set(f)) f.set_result(None) self.assertIs(f, done_future.ref)
from ..utils import get_third_party_modules_from_config from .pool import create_supervisor_actor_pool, create_worker_actor_pool from .service import ( start_supervisor, start_worker, stop_supervisor, stop_worker, load_config, ) from .session import AbstractSession, _new_session, ensure_isolation_created logger = logging.getLogger(__name__) _is_exiting_future = SyncFuture() atexit.register(lambda: _is_exiting_future.set_result(0) if not _is_exiting_future.done() else None) atexit.register(stop_isolation) async def new_cluster_in_isolation( address: str = "0.0.0.0", n_worker: int = 1, n_cpu: Union[int, str] = "auto", cuda_devices: Union[List[int], str] = "auto", subprocess_start_method: str = None, backend: str = None, config: Union[str, Dict] = None, web: bool = True, timeout: float = None, ) -> ClientType: if subprocess_start_method is None:
class RemoteTask(TaskDefinition): def __init__(self, taskdef: TaskDefinition, cluster): kwargs = taskdef.serialize() super().__init__(**kwargs) self.conn = None self.nonce = 0 self.cluster = cluster self.future = Future() self.awaitable = asyncio.wrap_future(self.future) self.status = WAIT self.error = None self.result = None def __await__(self): return self.awaitable.__await__() @property def done(self) -> bool: return self.future.done() def destroy(self) -> None: self.cluster.destroy(self.id) def set_status(self, status: str) -> None: # sanity checks if self.status == FAIL and status == DONE: raise RuntimeError('Cant complete a failed task') if self.status == DONE and status == FAIL: raise RuntimeError('Cant fail a failed completed') # update status self.status = status def set_error(self, error: str) -> None: self.set_status(FAIL) self.error = error if not self.future.done(): self.future.set_exception(TaskError(error)) def set_result(self, result: any, result_type: any = 'any') -> None: # unpack type & deserialize result result_type = type_from_description(result_type) result = result_type.deserialize(result) self.set_status(DONE) self.result = result if not self.future.done(): self.future.set_result(result) async def wait_for_init(self, timeout=30) -> None: if self.status != WAIT: raise RuntimeError(f'Cant await task with status {self.status}') slept = 0 interval = 0.2 while True: if self.status == WORK: return if self.status == FAIL: raise RuntimeError( f'Awaited task failed with error: {self.error}') if slept > timeout: raise TimeoutError('Task took to long to initialize') await asyncio.sleep(interval) slept += interval async def call(self, method, args={}): if self.status != WORK: await self.wait_for_init() # raise RuntimeError( # f'RPC is only available when status = WORK, was {self.status}. ' # f'Attempted to call {method}') return await self.conn.rpc.call(method, args) async def stop(self): # special case RPC - it always causes a send exception await self.call('stop') def __getattr__(self, method): async def magic_rpc(**kwargs): return await self.call(method, kwargs) return magic_rpc
class ExpectationBase(object): __metaclass__ = ABCMeta def __init__(self): self._future = Future() self._awaited = False self._scheduler = None self._success = False self._timeout = None self._deadline = None self._timedout = False self._float_tol = DEFAULT_FLOAT_TOL def _schedule(self, scheduler): # This expectation is scheduled on the `scheduler`, subclasses of ExpectationBase can # perform some operations on this scheduler: schedule another expectation later or # perform an operation on the scheduler object when this expectation is schedule (like # sending a message for which this expectation object expect some result). self._awaited = True self._scheduler = scheduler if self._timeout is not None: self._deadline = timestamp_now() + self._timeout def success(self): return self._success def wait(self, _timeout=None): if self._awaited: try: self._future.result(timeout=_timeout) except FutureTimeoutError: self.set_timedout() except FutureCancelledError: self.cancel() return self def set_result(self): self._success = True return self._future.set_result(self.received_events()) def set_exception(self, exception): return self._future.set_exception(exception) def set_timeout(self, _timeout): self._timeout = _timeout def set_timedout(self): if not self._success: self._timedout = True self.cancel() def cancel(self): return self._future.cancel() def cancelled(self): return self._future.cancelled() def timedout(self): if self._timedout: return True if self._success: return False if self._deadline is not None: self._timedout = (timestamp_now() > self._deadline) if self._timedout: self.cancel() return self._timedout def set_float_tol(self, _float_tol): self._float_tol = _float_tol def base_copy(self, *args, **kwds): other = self.__class__(*args, **kwds) ExpectationBase.__init__(other) other._timeout = self._timeout other._float_tol = self._float_tol return other @abstractmethod def copy(self): """ All expectations sublclasses must implement a shallow copy. """ pass def done(self): return (self._future.done() or not self._awaited) and self._success def __bool__(self): return self.done() def __or__(self, other): return ArsdkWhenAnyExpectation([self, other]) def __and__(self, other): return ArsdkWhenAllExpectations([self, other]) def __rshift__(self, other): return ArsdkWhenSequenceExpectations([self, other]) __nonzero__ = __bool__
class NamespacedResourceReflector(LoggingConfigurable): """ Base class for keeping a local up-to-date copy of a set of kubernetes resources. Must be subclassed once per kind of resource that needs watching. """ labels = Dict( {}, config=True, help=""" Labels to reflect onto local cache """ ) fields = Dict( {}, config=True, help=""" Fields to restrict the reflected objects """ ) namespace = Unicode( None, allow_none=True, help=""" Namespace to watch for resources in """ ) resources = Dict( {}, help=""" Dictionary of resource names to the appropriate resource objects. This can be accessed across threads safely. """ ) kind = Unicode( 'resource', help=""" Human readable name for kind of object we're watching for. Used for diagnostic messages. """ ) list_method_name = Unicode( "", help=""" Name of function (on apigroup respresented by `api_group_name`) that is to be called to list resources. This will be passed a namespace & a label selector. You most likely want something of the form list_namespaced_<resource> - for example, `list_namespaced_pod` will give you a PodReflector. This must be set by a subclass. """ ) api_group_name = Unicode( 'CoreV1Api', help=""" Name of class that represents the apigroup on which `list_method_name` is to be found. Defaults to CoreV1Api, which has everything in the 'core' API group. If you want to watch Ingresses, for example, you would have to use ExtensionsV1beta1Api """ ) request_timeout = Int( 60, config=True, help=""" Network timeout for kubernetes watch. Trigger watch reconnect when a given request is taking too long, which can indicate network issues. """ ) timeout_seconds = Int( 10, config=True, help=""" Timeout for kubernetes watch. Trigger watch reconnect when no watch event has been received. This will cause a full reload of the currently existing resources from the API server. """ ) restart_seconds = Int( 30, config=True, help=""" Maximum time before restarting a watch. The watch will be restarted at least this often, even if events are still arriving. Avoids trusting kubernetes watch to yield all events, which seems to not be a safe assumption. """) on_failure = Any(help="""Function to be called when the reflector gives up.""") def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Load kubernetes config here, since this is a Singleton and # so this __init__ will be run way before anything else gets run. try: config.load_incluster_config() except config.ConfigException: config.load_kube_config() self.api = shared_client(self.api_group_name) # FIXME: Protect against malicious labels? self.label_selector = ','.join(['{}={}'.format(k, v) for k, v in self.labels.items()]) self.field_selector = ','.join(['{}={}'.format(k, v) for k, v in self.fields.items()]) self.first_load_future = Future() self._stop_event = threading.Event() self.start() def __del__(self): self.stop() def _list_and_update(self): """ Update current list of resources by doing a full fetch. Overwrites all current resource info. """ initial_resources = getattr(self.api, self.list_method_name)( self.namespace, label_selector=self.label_selector, field_selector=self.field_selector, _request_timeout=self.request_timeout, ) # This is an atomic operation on the dictionary! self.resources = {p.metadata.name: p for p in initial_resources.items} # return the resource version so we can hook up a watch return initial_resources.metadata.resource_version def _watch_and_update(self): """ Keeps the current list of resources up-to-date This method is to be run not on the main thread! We first fetch the list of current resources, and store that. Then we register to be notified of changes to those resources, and keep our local store up-to-date based on these notifications. We also perform exponential backoff, giving up after we hit 32s wait time. This should protect against network connections dropping and intermittent unavailability of the api-server. Every time we recover from an exception we also do a full fetch, to pick up changes that might've been missed in the time we were not doing a watch. Note that we're playing a bit with fire here, by updating a dictionary in this thread while it is probably being read in another thread without using locks! However, dictionary access itself is atomic, and as long as we don't try to mutate them (do a 'fetch / modify / update' cycle on them), we should be ok! """ selectors = [] log_name = "" if self.label_selector: selectors.append("label selector=%r" % self.label_selector) if self.field_selector: selectors.append("field selector=%r" % self.field_selector) log_selector = ', '.join(selectors) cur_delay = 0.1 self.log.info( "watching for %s with %s in namespace %s", self.kind, log_selector, self.namespace, ) while True: self.log.debug("Connecting %s watcher", self.kind) start = time.monotonic() w = watch.Watch() try: resource_version = self._list_and_update() if not self.first_load_future.done(): # signal that we've loaded our initial data self.first_load_future.set_result(None) watch_args = { 'namespace': self.namespace, 'label_selector': self.label_selector, 'field_selector': self.field_selector, 'resource_version': resource_version, } if self.request_timeout: # set network receive timeout watch_args['_request_timeout'] = self.request_timeout if self.timeout_seconds: # set watch timeout watch_args['timeout_seconds'] = self.timeout_seconds # in case of timeout_seconds, the w.stream just exits (no exception thrown) # -> we stop the watcher and start a new one for ev in w.stream( getattr(self.api, self.list_method_name), **watch_args ): cur_delay = 0.1 resource = ev['object'] if ev['type'] == 'DELETED': # This is an atomic delete operation on the dictionary! self.resources.pop(resource.metadata.name, None) else: # This is an atomic operation on the dictionary! self.resources[resource.metadata.name] = resource if self._stop_event.is_set(): self.log.info("%s watcher stopped", self.kind) break watch_duration = time.monotonic() - start if watch_duration >= self.restart_seconds: self.log.debug( "Restarting %s watcher after %i seconds", self.kind, watch_duration, ) break except ReadTimeoutError: # network read time out, just continue and restart the watch # this could be due to a network problem or just low activity self.log.warning("Read timeout watching %s, reconnecting", self.kind) continue except Exception: cur_delay = cur_delay * 2 if cur_delay > 30: self.log.exception("Watching resources never recovered, giving up") if self.on_failure: self.on_failure() return self.log.exception("Error when watching resources, retrying in %ss", cur_delay) time.sleep(cur_delay) continue else: # no events on watch, reconnect self.log.debug("%s watcher timeout", self.kind) finally: w.stop() if self._stop_event.is_set(): self.log.info("%s watcher stopped", self.kind) break self.log.warning("%s watcher finished", self.kind) def start(self): """ Start the reflection process! We'll do a blocking read of all resources first, so that we don't race with any operations that are checking the state of the pod store - such as polls. This should be called only once at the start of program initialization (when the singleton is being created), and not afterwards! """ if hasattr(self, 'watch_thread'): raise ValueError('Thread watching for resources is already running') self._list_and_update() self.watch_thread = threading.Thread(target=self._watch_and_update) # If the watch_thread is only thread left alive, exit app self.watch_thread.daemon = True self.watch_thread.start() def stop(self): self._stop_event.set() def stopped(self): return self._stop_event.is_set()
def testing_nonfuture(): fpath = '~/shuffled.txt' df = DataFuture(None, fpath) print(df) print("Result: ", df.filepath) assert df.filepath == os.path.abspath(os.path.expanduser(fpath)) if __name__ == "__main__": # logging.basicConfig(filename='futures.testing.log',level=logging.DEBUG) import sys import random logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) logger.debug("Begin Testing") with open('shuffled.txt', 'w') as testfile: nums = list(range(0, 10000)) random.shuffle(nums) for item in nums: testfile.write("{0}\n".format(item)) foo = Future() # type: Future[str] df = DataFuture(foo, './shuffled.txt') dx = DataFuture(foo, '~/shuffled.txt') print(foo.done()) print(df.done()) testing_nonfuture()
class SystemdJobHandler(object): """An utility for waiting for one or more systemd jobs. Usage: with SystemdJobHandler() as job_handler: job_path = job_handler.manager.$do_something_to_create_a_job job_handler.register_job(job_path) # Can register more parallel jobs like this job_results = yield job_handler.all_jobs_done_future() job_results will be a dictionary, in SYSTEMD_MANAGER_INTERFACE.JobRemoved terms job_results[unit] = result """ def __init__(self): self.__future = Future() self.__pending_jobs = set() self.__job_results = {} self.__signal_match = None bus = slip.dbus.SystemBus() manager_object = bus.get_object(SYSTEMD_MANAGER_NAME, SYSTEMD_MANAGER_PATH) self.__manager = dbus.Interface(manager_object, SYSTEMD_MANAGER_INTERFACE) def __job_removed_handler(self, job_id, job_path, unit, result): """SYSTEMD_MANAGER_INTERFACE.JobRemoved signal handler""" log.debug1("systemd JobRemoved signal: %s" % repr((job_id, job_path, unit, result))) if job_path in self.__pending_jobs: self.__job_results[unit] = result self.__pending_jobs.remove(job_path) if len(self.__pending_jobs) == 0: self.__future.set_result(self.__job_results) # We use the context manager protocol to ensure the signal registration is # correctly removed. def __enter__(self): assert self.__signal_match is None, "Recursive use of SystemdJobProcessor" assert not self.__future.done(), "Repeated use of SystemdJobProcessor" self.__signal_match = self.__manager.connect_to_signal("JobRemoved", self.__job_removed_handler) return self # To allow “with SystemdJobHandler as job_handler:”… def __exit__(self, *args): self.__signal_match.remove() self.__signal_match = None return False # This is not strictly speaking a necessary part of the API, but since we # need the interface object for ourselves and the caller needs it as well, # let’s make it available. @property def manager(self): """A dbus.Interface object for SYSTEMD_MANAGER_INTERFACE.""" return self.__manager def register_job(self, job_path): """Register a job to be followed to completion. :param job_path: A path of the job object. Make sure to provide the path soon after receiving it (in particular before allowing any D-Bus signals to be processed). """ assert self.__signal_match is not None, \ "Registering for jobs when not watching for results" self.__pending_jobs.add(job_path) def all_jobs_done_future(self): """Return a future for results of registered jobs. :returns: a future. The value eventually set as a result is a dictionary of unit name -> job result string. """ assert self.__signal_match is not None and len(self.__pending_jobs) != 0 return self.__future
class RemoteTask(TaskInstance): def __init__(self, taskdef: TaskDefinition, cluster): kwargs = taskdef.serialize() super().__init__(**kwargs) self.conn = None self.nonce = 0 self.cluster = cluster self.future = Future() self.awaitable = asyncio.wrap_future(self.future) def __await__(self): return self.awaitable.__await__() @property def done(self) -> bool: return self.future.done() def destroy(self) -> None: self.cluster.destroy(self.id) def set_status(self, status: str) -> None: # sanity checks if self.status == FAIL and status == DONE: raise RuntimeError('Cant complete a failed task') if self.status == DONE and status == FAIL: raise RuntimeError('Cant fail a completed task') if status == STOP and not self.future.done(): self.future.set_exception( StoppedError(f'Remote task {self.id} was stopped')) # update status self.status = status def set_error(self, error: str) -> None: self.set_status(FAIL) self.error = error if not self.future.done(): self.future.set_exception(TaskError(error)) def set_result(self, result: any, result_type: any = 'any') -> None: # unpack type & deserialize result result_type = type_from_description(result_type) result = result_type.deserialize(result) self.set_status(DONE) self.result = result if not self.future.done(): self.future.set_result(result) async def wait_for_scheduling(self) -> None: pass async def wait_for_init(self, timeout=30) -> None: if self.status != WAIT: raise RuntimeError(f'Cant await task with status {self.status}') await self.wait_for_scheduling() slept = 0 interval = 0.2 while True: if self.status == WORK: return if self.status == FAIL: raise RuntimeError( f'Awaited task failed with error: {self.error}') if slept > timeout: raise TimeoutError('Task took to long to initialize') await asyncio.sleep(interval) slept += interval async def call(self, method, args={}) -> any: if self.status == WAIT: await self.wait_for_init() elif self.status != WORK: raise RuntimeError( f'RPC is only available when status = WORK, was {self.status}. ' f'Attempted to call {method}') return await self.conn.rpc.call(method, args) async def stop(self) -> None: if self.status == STOP: return await self.call('stop') def logs(self): return self.cluster.logs(self.id) def __getattr__(self, method): if method[0] == '_': return super().__getattr__(method) async def magic_rpc(*args, **kwargs): if len(args) > 0: raise TypeError( 'Positional arguments are not supported for RPC methods') return await self.call(method, kwargs) return magic_rpc def __str__(self): return f'RemoteTask({self.id}, {self.status}, {self.inputs})' def __repr__(self): return self.__str__()
class Preplanner(object): def __init__(self, ctrl, max_plan_time = 60 * 60 * 24, max_loop_time = 300): self.ctrl = ctrl self.log = ctrl.log.get('Preplanner') self.max_plan_time = max_plan_time self.max_loop_time = max_loop_time path = self.ctrl.get_plan() if not os.path.exists(path): os.mkdir(path) self.started = Future() self.plans = {} def start(self): if not self.started.done(): self.log.info('Preplanner started') self.started.set_result(True) def invalidate(self, filename): if filename in self.plans: self.plans[filename].terminate() del self.plans[filename] def invalidate_all(self): for filename, plan in self.plans.items(): plan.terminate() self.plans = {} def delete_all_plans(self): files = glob.glob(self.ctrl.get_plan('*')) for path in files: safe_remove(path) self.invalidate_all() def delete_plans(self, filename): if filename in self.plans: self.plans[filename].delete() self.invalidate(filename) @gen.coroutine def get_plan(self, filename): if filename is None: raise Exception('Filename cannot be None') # Wait until state is fully initialized yield self.started if filename in self.plans: plan = self.plans[filename] else: plan = Plan(self, self.ctrl, filename) self.plans[filename] = plan data = yield plan.future return data def get_plan_progress(self, filename): return self.plans[filename].progress if filename in self.plans else 0