class BaseRuntime: """A Jina Runtime is a procedure that blocks the main process once running (i.e. :meth:`run_forever`), therefore should be put into a separated thread/process, or inside the main process of a docker container. Any program/library/package/module that blocks the main process, can be formulated into a :class:`BaseRuntime` class and then be started from a :class:`Pod`. In the sequel, we call the main process/thread as ``M``, the process/thread blocked :class:`Runtime` as ``S``. In Jina, a :class:`Pod` object is used to manage a :class:`Runtime` object's lifecycle. A :class:`Pod` acts as a :class:`multiprocessing.Process` or :class:`threading.Thread`, it starts from ``M`` and once the ``S`` is spawned, it uses :class:`Runtime` as a context manager: 0. :meth:`__init__` 1. :meth: `__enter__` 2. :meth:`run_forever`. Note that this will block ``S``, step 3 won't be reached until it is unblocked by :meth:`cancel`. 3. When an error occurs during `run_forever` or `cancel` signal is reached by the `runtime`. The `run_forever` method is cancelled and the managed context is closed. The `__exit__` of `Runtime` guarantees that the `Runtime` is properly shut by calling `teardown`. The :meth:`__init__` and :meth:`teardown` pair together, which defines instructions that will be executed before and after. In subclasses, `teardown` is optional. In order to cancel the `run_forever` method of a `Runtime`, you can use their `static` `cancel` method that will make sure that the runtime is properly cancelled. - Use :class:`threading.Event` or `multiprocessing.Event`, while :meth:`run_forever` polls for this event - Use GrpcConnectionPool to send a TERMINATE message, while :meth:`run_forever` polls for this message Note, another way to jump out from :meth:`run_forever` is raise exceptions from it. This will immediately move to :meth:`teardown`. .. note:: Rule of thumb on exception handling: if you are not sure if you should handle exception inside :meth:`run_forever`, :meth:`cancel`, :meth:`teardown`, then DO NOT catch exception in them. Exception is MUCH better handled by :class:`Pod`. .. seealso:: :class:`Pod` for managing a :class:`Runtime` object's lifecycle. """ def __init__( self, args: 'argparse.Namespace', **kwargs, ): super().__init__() self.args = args if args.name: self.name = f'{args.name}/{self.__class__.__name__}' else: self.name = self.__class__.__name__ self.logger = JinaLogger(self.name, **vars(self.args)) def run_forever(self): """Running the blocking procedure inside ``S``. Note, once this method is called, ``S`` is blocked. .. note:: If this method raises any exception, :meth:`teardown` will be called. .. seealso:: :meth:`cancel` for cancelling the forever loop. """ raise NotImplementedError def teardown(self): """Method called immediately after :meth:`run_forever` is unblocked. You can tidy up things here. Optional in subclasses. The default implementation does nothing. """ self.logger.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if exc_type == RuntimeTerminated: self.logger.debug(f'{self!r} is ended') elif exc_type == KeyboardInterrupt: self.logger.debug(f'{self!r} is interrupted by user') elif exc_type and issubclass(exc_type, Exception): self.logger.error( f'{exc_val!r} during {self.run_forever!r}' + f'\n add "--quiet-error" to suppress the exception details' if not self.args.quiet_error else '', exc_info=not self.args.quiet_error, ) try: self.teardown() except OSError: # OSError(Stream is closed) already pass except Exception as ex: self.logger.error( f'{ex!r} during {self.teardown!r}' + f'\n add "--quiet-error" to suppress the exception details' if not self.args.quiet_error else '', exc_info=not self.args.quiet_error, ) # https://stackoverflow.com/a/28158006 # return True will silent all exception stack trace here, silence is desired here as otherwise it is too # noisy # # doc: If an exception is supplied, and the method wishes to suppress the exception (i.e., prevent it # from being propagated), it should return a true value. Otherwise, the exception will be processed normally # upon exit from this method. return True
class BasePod(ABC): """ :class:`BasePod` is an interface from which all the classes managing the lifetime of a Runtime inside a local process, container or in a remote JinaD instance (to come) must inherit. It exposes the required APIs so that the `BasePod` can be handled by the `cli` api as a context manager or by a `Deployment`. What makes a BasePod a BasePod is that it manages the lifecycle of a Runtime (gateway or not gateway) """ def __init__(self, args: 'argparse.Namespace'): self.args = args if hasattr(self.args, 'port_expose'): self.args.port_in = self.args.port_expose self.args.parallel = self.args.shards self.name = self.args.name or self.__class__.__name__ self.is_forked = False self.logger = JinaLogger(self.name, **vars(self.args)) if self.args.runtime_backend == RuntimeBackendType.THREAD: self.logger.warning( f' Using Thread as runtime backend is not recommended for production purposes. It is ' f'just supposed to be used for easier debugging. Besides the performance considerations, it is' f'specially dangerous to mix `Executors` running in different types of `RuntimeBackends`.' ) self._envs = {'JINA_DEPLOYMENT_NAME': self.name} if self.args.quiet: self._envs['JINA_LOG_CONFIG'] = 'QUIET' if self.args.env: self._envs.update(self.args.env) # arguments needed to create `runtime` and communicate with it in the `run` in the stack of the new process # or thread.f test_worker = { RuntimeBackendType.THREAD: threading.Thread, RuntimeBackendType.PROCESS: multiprocessing.Process, }.get(getattr(args, 'runtime_backend', RuntimeBackendType.THREAD))() self.is_ready = _get_event(test_worker) self.is_shutdown = _get_event(test_worker) self.cancel_event = _get_event(test_worker) self.is_started = _get_event(test_worker) self.ready_or_shutdown = ConditionalEvent( getattr(args, 'runtime_backend', RuntimeBackendType.THREAD), events_list=[self.is_ready, self.is_shutdown], ) self.daemon = self.args.daemon self.runtime_ctrl_address = self._get_control_address() self._timeout_ctrl = self.args.timeout_ctrl def _get_control_address(self): return f'{self.args.host}:{self.args.port_in}' def close(self) -> None: """Close the Pod This method makes sure that the `Process/thread` is properly finished and its resources properly released """ self.logger.debug('waiting for ready or shutdown signal from runtime') if not self.is_shutdown.is_set() and self.is_started.is_set(): try: self.logger.debug(f'terminate') self._terminate() if not self.is_shutdown.wait(timeout=self._timeout_ctrl if not __windows__ else 1.0): if not __windows__: raise Exception( f'Shutdown signal was not received for {self._timeout_ctrl} seconds' ) else: self.logger.warning( 'Pod was forced to close after 1 second. Graceful closing is not available on Windows.' ) except Exception as ex: self.logger.error( f'{ex!r} during {self.close!r}' + f'\n add "--quiet-error" to suppress the exception details' if not self.args.quiet_error else '', exc_info=not self.args.quiet_error, ) else: # here shutdown has been set already, therefore `run` will gracefully finish self.logger.debug( f'{"shutdown is is already set" if self.is_shutdown.is_set() else "Runtime was never started"}. Runtime will end gracefully on its own' ) pass self.is_shutdown.set() self.logger.debug(__stop_msg__) self.logger.close() def __enter__(self): return self.start() def __exit__(self, exc_type, exc_val, exc_tb): self.close() def _wait_for_ready_or_shutdown(self, timeout: Optional[float]): """ Waits for the process to be ready or to know it has failed. :param timeout: The time to wait before readiness or failure is determined .. # noqa: DAR201 """ return AsyncNewLoopRuntime.wait_for_ready_or_shutdown( timeout=timeout, ready_or_shutdown_event=self.ready_or_shutdown.event, ctrl_address=self.runtime_ctrl_address, timeout_ctrl=self._timeout_ctrl, ) def _fail_start_timeout(self, timeout): """ Closes the Pod and raises a TimeoutError with the corresponding warning messages :param timeout: The time to wait before readiness or failure is determined .. # noqa: DAR201 """ _timeout = timeout or -1 self.logger.warning( f'{self} timeout after waiting for {self.args.timeout_ready}ms, ' f'if your executor takes time to load, you may increase --timeout-ready' ) self.close() raise TimeoutError( f'{typename(self)}:{self.name} can not be initialized after {_timeout * 1e3}ms' ) def _check_failed_to_start(self): """ Raises a corresponding exception if failed to start """ if self.is_shutdown.is_set(): # return too early and the shutdown is set, means something fails!! if not self.is_started.is_set(): raise RuntimeFailToStart else: raise RuntimeRunForeverEarlyError def wait_start_success(self): """Block until all pods starts successfully. If not success, it will raise an error hoping the outer function to catch it """ _timeout = self.args.timeout_ready if _timeout <= 0: _timeout = None else: _timeout /= 1e3 if self._wait_for_ready_or_shutdown(_timeout): self._check_failed_to_start() self.logger.debug(__ready_msg__) else: self._fail_start_timeout(_timeout) async def async_wait_start_success(self): """ Wait for the `Pod` to start successfully in a non-blocking manner """ import asyncio _timeout = self.args.timeout_ready if _timeout <= 0: _timeout = None else: _timeout /= 1e3 timeout_ns = 1e9 * _timeout if _timeout else None now = time.time_ns() while timeout_ns is None or time.time_ns() - now < timeout_ns: if self.ready_or_shutdown.event.is_set(): self._check_failed_to_start() self.logger.debug(__ready_msg__) return else: await asyncio.sleep(0.1) self._fail_start_timeout(_timeout) @property def role(self) -> 'PodRoleType': """Get the role of this pod in a deployment .. #noqa: DAR201""" return self.args.pod_role @abstractmethod def start(self): """Start the BasePod. This method calls :meth:`start` in :class:`threading.Thread` or :class:`multiprocesssing.Process`. .. #noqa: DAR201 """ ... @abstractmethod def _terminate(self): ... @abstractmethod def join(self, *args, **kwargs): """Joins the BasePod. Wait for the BasePod to properly terminate :param args: extra positional arguments :param kwargs: extra keyword arguments """ ...