Exemple #1
0
class PartialStore(ABC):
    """A store spawned inside partial-daemon container"""
    def __init__(self):
        self._logger = JinaLogger(self.__class__.__name__, **vars(jinad_args))
        self.item = PartialStoreItem()
        self.object: Union['Pea', 'Pod', 'Flow'] = None

    @abstractmethod
    def add(self, *args, **kwargs) -> PartialStoreItem:
        """Add a new element to the store. This method needs to be overridden by the subclass


        .. #noqa: DAR101"""
        ...

    def delete(self) -> None:
        """Terminates the object in the store & stops the server"""
        try:
            if hasattr(self.object, 'close'):
                self.object.close()
            else:
                self._logger.warning(f'nothing to close. exiting')
        except Exception as e:
            self._logger.error(f'{e!r}')
            raise
Exemple #2
0
class PartialStore(ABC):
    """A store spawned inside partial-daemon container"""
    def __init__(self):
        self._logger = JinaLogger(self.__class__.__name__, **vars(jinad_args))
        self.item = PartialStoreItem()
        self.object: Union[Type['BasePod'], Type['BaseDeployment'],
                           'Flow'] = None

    @abstractmethod
    def add(self, *args, **kwargs) -> PartialStoreItem:
        """Add a new element to the store. This method needs to be overridden by the subclass


        .. #noqa: DAR101"""
        ...

    def delete(self) -> None:
        """Terminates the object in the store & stops the server"""
        try:
            if hasattr(self.object, 'close'):
                self.object.close()
                self._logger.info(self.item.arguments)
                if self.item.arguments.get('identity'):
                    self._logger.success(
                        f'{colored(self.item.arguments["identity"], "cyan")} is removed!'
                    )
                else:
                    self._logger.success('object is removed!')
            else:
                self._logger.warning(f'nothing to close. exiting')
        except Exception as e:
            self._logger.error(f'{e!r}')
            raise
        else:
            self.item = PartialStoreItem()
Exemple #3
0
class PartialStore:
    """A store spawned inside mini-jinad container"""
    def __init__(self):
        self._logger = JinaLogger(self.__class__.__name__, **vars(jinad_args))
        self.item = PartialStoreItem()

    def add(self, *args, **kwargs) -> PartialStoreItem:
        """Add a new element to the store. This method needs to be overridden by the subclass


        .. #noqa: DAR101"""
        raise NotImplementedError

    def update(self, *args, **kwargs) -> PartialStoreItem:
        """Updates the element to the store. This method needs to be overridden by the subclass


        .. #noqa: DAR101"""
        raise NotImplementedError

    def delete(self) -> None:
        """Terminates the object in the store & stops the server"""
        try:
            if hasattr(self, 'object'):
                self.object.close()
            else:
                self._logger.warning(f'nothing to close. exiting')
        except Exception as e:
            self._logger.error(f'{e!r}')
            raise
Exemple #4
0
def log(logger: JinaLogger):
    logger.debug('this is test debug message')
    logger.info('this is test info message')
    logger.success('this is test success message')
    logger.warning('this is test warning message')
    logger.error('this is test error message')
    logger.critical('this is test critical message')
Exemple #5
0
def _list(
        logger: JinaLogger,
        image_name: Optional[str] = None,
        image_kind: Optional[str] = None,
        image_type: Optional[str] = None,
        image_keywords: Sequence = (),
) -> Optional[List[Dict[str, Any]]]:
    """Use Hub api to get the list of filtered images.

    :param logger: logger to use
    :param image_name: name of hub image
    :param image_kind: kind of hub image (indexer/encoder/segmenter/crafter/evaluator/ranker etc)
    :param image_type: type of hub image (pod/app)
    :param image_keywords: keywords added in the manifest yml
    :return: a dict of manifest specifications, each coresponds to a hub image
    """
    with open(os.path.join(__resources_path__, 'hubapi.yml')) as fp:
        hubapi_yml = JAML.load(fp)
        hubapi_url = hubapi_yml['hubapi']['url'] + hubapi_yml['hubapi']['list']

    params = {
        'name': image_name,
        'kind': image_kind,
        'type': image_type,
        'keywords': image_keywords,
    }
    params = {k: v for k, v in params.items() if v}
    if params:
        data = urlencode(params, doseq=True)
        request = Request(f'{hubapi_url}?{data}')
        with TimeContext('searching', logger):
            try:
                with urlopen(request) as resp:
                    response = json.load(resp)
            except HTTPError as err:
                if err.code == 400:
                    logger.warning(
                        'no matched executors found. please use different filters and retry.'
                    )
                elif err.code == 500:
                    logger.error(f'server is down: {err.reason}')
                else:
                    logger.error(f'unknown error: {err.reason}')
                return

        local_manifest = _load_local_hub_manifest()
        if local_manifest:
            tb = _make_hub_table_with_local(response, local_manifest)
        else:
            tb = _make_hub_table(response)
        logger.info('\n'.join(tb))
        return response
Exemple #6
0
class QueryExecutor(Executor):
    def __init__(self, dump_path: Optional[str] = None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.logger = JinaLogger('QueryExecutor')
        self._dump_path = dump_path or kwargs.get('runtime_args', {}).get(
            'dump_path', None)
        if self._dump_path is not None and os.path.exists(self._dump_path):
            self.logger.success(
                f'loading Executor from dump path: {self._dump_path}')
            self._docs = DocumentArray.load(self._dump_path)
        else:
            self.logger.warning(f'no dump path passed. Loading an empty index')
            self._docs = DocumentArray()

    @requests(on='/search')
    def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
        if len(self._docs) > 0:
            top_k = int(parameters.get('top_k', 5))
            docs.match(self._docs, limit=top_k)
Exemple #7
0
def run(
    args: 'argparse.Namespace',
    name: str,
    container_name: str,
    net_mode: Optional[str],
    runtime_ctrl_address: str,
    envs: Dict,
    is_started: Union['multiprocessing.Event', 'threading.Event'],
    is_shutdown: Union['multiprocessing.Event', 'threading.Event'],
    is_ready: Union['multiprocessing.Event', 'threading.Event'],
):
    """Method to be run in a process that stream logs from a Container

    This method is the target for the Pod's `thread` or `process`

    .. note::
        :meth:`run` is running in subprocess/thread, the exception can not be propagated to the main process.
        Hence, please do not raise any exception here.

    .. note::
        Please note that env variables are process-specific. Subprocess inherits envs from
        the main process. But Subprocess's envs do NOT affect the main process. It does NOT
        mess up user local system envs.

    :param args: namespace args from the Pod
    :param name: name of the Pod to have proper logging
    :param container_name: name to set the Container to
    :param net_mode: The network mode where to run the container
    :param runtime_ctrl_address: The control address of the runtime in the container
    :param envs: Dictionary of environment variables to be set in the docker image
    :param is_started: concurrency event to communicate runtime is properly started. Used for better logging
    :param is_shutdown: concurrency event to communicate runtime is terminated
    :param is_ready: concurrency event to communicate runtime is ready to receive messages
    """
    import docker

    log_kwargs = copy.deepcopy(vars(args))
    log_kwargs['log_config'] = 'docker'
    logger = JinaLogger(name, **log_kwargs)

    cancel = threading.Event()
    fail_to_start = threading.Event()

    if not __windows__:
        try:
            for signame in {signal.SIGINT, signal.SIGTERM}:
                signal.signal(signame, lambda *args, **kwargs: cancel.set())
        except (ValueError, RuntimeError) as exc:
            logger.warning(
                f' The process starting the container for {name} will not be able to handle termination signals. '
                f' {repr(exc)}')
    else:
        with ImportExtensions(
                required=True,
                logger=logger,
                help_text=
                '''If you see a 'DLL load failed' error, please reinstall `pywin32`.
                If you're using conda, please use the command `conda install -c anaconda pywin32`''',
        ):
            import win32api

        win32api.SetConsoleCtrlHandler(lambda *args, **kwargs: cancel.set(),
                                       True)

    client = docker.from_env()

    try:
        container = _docker_run(
            client=client,
            args=args,
            container_name=container_name,
            envs=envs,
            net_mode=net_mode,
            logger=logger,
        )
        client.close()

        def _is_ready():
            return AsyncNewLoopRuntime.is_ready(runtime_ctrl_address)

        def _is_container_alive(container) -> bool:
            import docker.errors

            try:
                container.reload()
            except docker.errors.NotFound:
                return False
            return True

        async def _check_readiness(container):
            while (_is_container_alive(container) and not _is_ready()
                   and not cancel.is_set()):
                await asyncio.sleep(0.1)
            if _is_container_alive(container):
                is_started.set()
                is_ready.set()
            else:
                fail_to_start.set()

        async def _stream_starting_logs(container):
            for line in container.logs(stream=True):
                if (not is_started.is_set() and not fail_to_start.is_set()
                        and not cancel.is_set()):
                    await asyncio.sleep(0.01)
                msg = line.decode().rstrip()  # type: str
                logger.debug(re.sub(r'\u001b\[.*?[@-~]', '', msg))

        async def _run_async(container):
            await asyncio.gather(*[
                _check_readiness(container),
                _stream_starting_logs(container)
            ])

        asyncio.run(_run_async(container))
    finally:
        client.close()
        if not is_started.is_set():
            logger.error(
                f' Process terminated, the container fails to start, check the arguments or entrypoint'
            )
        is_shutdown.set()
        logger.debug(f'process terminated')
Exemple #8
0
class BaseExecutor(JAMLCompatible, metaclass=ExecutorType):
    """
    The base class of all Executors, can be used to build encoder, indexer, etc.

    :class:`jina.Executor` as an alias for this class.

    EXAMPLE USAGE

    .. code-block:: python

        from jina import Executor, requests, Flow


        class MyExecutor(Executor):
            @requests
            def foo(self, docs, **kwargs):
                print(docs)  # process docs here


        f = Flow().add(uses=Executor)  # you can add your Executor to a Flow

    Any executor inherited from :class:`BaseExecutor` always has the **meta** defined in :mod:`jina.executors.metas.defaults`.

    All arguments in the :func:`__init__` can be specified with a ``with`` map in the YAML config. Example:

    .. highlight:: python
    .. code-block:: python

        class MyAwesomeExecutor(Executor):
            def __init__(awesomeness=5):
                pass

    is equal to

    .. highlight:: yaml
    .. code-block:: yaml

        jtype: MyAwesomeExecutor
        with:
            awesomeness: 5

    """
    def __init__(
        self,
        metas: Optional[Dict] = None,
        requests: Optional[Dict] = None,
        runtime_args: Optional[Dict] = None,
        **kwargs,
    ):
        """`metas` and `requests` are always auto-filled with values from YAML config.

        :param metas: a dict of metas fields
        :param requests: a dict of endpoint-function mapping
        :param runtime_args: a dict of arguments injected from :class:`Runtime` during runtime
        :param kwargs: additional extra keyword arguments to avoid failing when extra params ara passed that are not expected
        """
        self._add_metas(metas)
        self._add_requests(requests)
        self._add_runtime_args(runtime_args)
        self._init_monitoring()
        self.logger = JinaLogger(self.__class__.__name__)
        if __dry_run_endpoint__ not in self.requests:
            self.requests[__dry_run_endpoint__] = self._dry_run_func
        else:
            self.logger.warning(
                f' Endpoint {__dry_run_endpoint__} is defined by the Executor. Be aware that this endpoint is usually reserved to enable health checks from the Client through the gateway.'
                f' So it is recommended not to expose this endpoint. ')

    def _dry_run_func(self, *args, **kwargs):
        pass

    def _add_runtime_args(self, _runtime_args: Optional[Dict]):
        if _runtime_args:
            self.runtime_args = SimpleNamespace(**_runtime_args)
        else:
            self.runtime_args = SimpleNamespace()

    def _init_monitoring(self):
        if (hasattr(self.runtime_args, 'metrics_registry')
                and self.runtime_args.metrics_registry):
            with ImportExtensions(
                    required=True,
                    help_text=
                    'You need to install the `prometheus_client` to use the montitoring functionality of jina',
            ):
                from prometheus_client import Summary

            self._summary_method = Summary(
                'process_request_seconds',
                'Time spent when calling the executor request method',
                registry=self.runtime_args.metrics_registry,
                namespace='jina',
                labelnames=('executor', 'executor_endpoint', 'runtime_name'),
            )
            self._metrics_buffer = {
                'process_request_seconds': self._summary_method
            }

        else:
            self._summary_method = None
            self._metrics_buffer = None

    def _add_requests(self, _requests: Optional[Dict]):
        if not hasattr(self, 'requests'):
            self.requests = {}

        if _requests:
            func_names = {f.__name__: e for e, f in self.requests.items()}
            for endpoint, func in _requests.items():
                # the following line must be `getattr(self.__class__, func)` NOT `getattr(self, func)`
                # this to ensure we always have `_func` as unbound method
                if func in func_names:
                    del self.requests[func_names[func]]

                _func = getattr(self.__class__, func)
                if callable(_func):
                    # the target function is not decorated with `@requests` yet
                    self.requests[endpoint] = _func
                elif typename(
                        _func) == 'jina.executors.decorators.FunctionMapper':
                    # the target function is already decorated with `@requests`, need unwrap with `.fn`
                    self.requests[endpoint] = _func.fn
                else:
                    raise TypeError(
                        f'expect {typename(self)}.{func} to be a function, but receiving {typename(_func)}'
                    )

    def _add_metas(self, _metas: Optional[Dict]):
        from jina.serve.executors.metas import get_default_metas

        tmp = get_default_metas()

        if _metas:
            tmp.update(_metas)

        unresolved_attr = False
        target = SimpleNamespace()
        # set self values filtered by those non-exist, and non-expandable
        for k, v in tmp.items():
            if k == 'workspace' and not (v is None or v == ''):
                warnings.warn(
                    'Setting `workspace` via `metas.workspace` is deprecated. '
                    'Instead, use `f.add(..., workspace=...)` when defining a a Flow in Python; '
                    'the `workspace` parameter when defining a Flow using YAML; '
                    'or `--workspace` when starting an Executor using the CLI.',
                    category=DeprecationWarning,
                )
            if not hasattr(target, k):
                if isinstance(v, str):
                    if not env_var_regex.findall(v):
                        setattr(target, k, v)
                    else:
                        unresolved_attr = True
                else:
                    setattr(target, k, v)
            elif type(getattr(target, k)) == type(v):
                setattr(target, k, v)

        if unresolved_attr:
            _tmp = vars(self)
            _tmp['metas'] = tmp
            new_metas = JAML.expand_dict(_tmp)['metas']

            for k, v in new_metas.items():
                if not hasattr(target, k):
                    if isinstance(v, str):
                        if not (env_var_regex.findall(v)
                                or internal_var_regex.findall(v)):
                            setattr(target, k, v)
                        else:
                            raise ValueError(
                                f'{k}={v} is not substitutable or badly referred'
                            )
                    else:
                        setattr(target, k, v)
        # `name` is important as it serves as an identifier of the executor
        # if not given, then set a name by the rule
        if not getattr(target, 'name', None):
            setattr(target, 'name', self.__class__.__name__)

        self.metas = target

    def close(self) -> None:
        """
        Always invoked as executor is destroyed.

        You can write destructor & saving logic here.
        """
        pass

    def __call__(self, req_endpoint: str, **kwargs):
        """
        # noqa: DAR101
        # noqa: DAR102
        # noqa: DAR201
        """
        if req_endpoint in self.requests:
            return self.requests[req_endpoint](
                self, **kwargs)  # unbound method, self is required
        elif __default_endpoint__ in self.requests:
            return self.requests[__default_endpoint__](
                self, **kwargs)  # unbound method, self is required

    async def __acall__(self, req_endpoint: str, **kwargs):
        """
        # noqa: DAR101
        # noqa: DAR102
        # noqa: DAR201
        """
        if req_endpoint in self.requests:
            return await self.__acall_endpoint__(req_endpoint, **kwargs)
        elif __default_endpoint__ in self.requests:
            return await self.__acall_endpoint__(__default_endpoint__,
                                                 **kwargs)

    async def __acall_endpoint__(self, req_endpoint, **kwargs):
        func = self.requests[req_endpoint]

        runtime_name = (self.runtime_args.name if hasattr(
            self.runtime_args, 'name') else None)

        _summary = (self._summary_method.labels(
            self.__class__.__name__, req_endpoint, runtime_name).time()
                    if self._summary_method else contextlib.nullcontext())

        with _summary:
            if iscoroutinefunction(func):
                return await func(self, **kwargs)
            else:
                return func(self, **kwargs)

    @property
    def workspace(self) -> Optional[str]:
        """
        Get the workspace directory of the Executor.

        :return: returns the workspace of the current shard of this Executor.
        """
        workspace = (getattr(self.runtime_args, 'workspace', None)
                     or getattr(self.metas, 'workspace')
                     or os.environ.get('JINA_DEFAULT_WORKSPACE_BASE'))
        if workspace:
            complete_workspace = os.path.join(workspace, self.metas.name)
            shard_id = getattr(
                self.runtime_args,
                'shard_id',
                None,
            )
            if shard_id is not None and shard_id != -1:
                complete_workspace = os.path.join(complete_workspace,
                                                  str(shard_id))
            if not os.path.exists(complete_workspace):
                os.makedirs(complete_workspace)
            return os.path.abspath(complete_workspace)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    @classmethod
    def from_hub(
        cls: Type[T],
        uri: str,
        context: Optional[Dict[str, Any]] = None,
        uses_with: Optional[Dict] = None,
        uses_metas: Optional[Dict] = None,
        uses_requests: Optional[Dict] = None,
        **kwargs,
    ) -> T:
        """Construct an Executor from Hub.

        :param uri: a hub Executor scheme starts with `jinahub://`
        :param context: context replacement variables in a dict, the value of the dict is the replacement.
        :param uses_with: dictionary of parameters to overwrite from the default config's with field
        :param uses_metas: dictionary of parameters to overwrite from the default config's metas field
        :param uses_requests: dictionary of parameters to overwrite from the default config's requests field
        :param kwargs: other kwargs accepted by the CLI ``jina hub pull``
        :return: the Hub Executor object.

        .. highlight:: python
        .. code-block:: python

            from jina import Executor
            from docarray import Document, DocumentArray

            executor = Executor.from_hub(
                uri='jinahub://CLIPImageEncoder', install_requirements=True
            )

        """
        from jina.hubble.helper import is_valid_huburi

        _source = None
        if is_valid_huburi(uri):
            from jina.hubble.hubio import HubIO
            from jina.parsers.hubble import set_hub_pull_parser

            _args = ArgNamespace.kwargs2namespace(
                {
                    'no_usage': True,
                    **kwargs
                },
                set_hub_pull_parser(),
                positional_args=(uri, ),
            )
            _source = HubIO(args=_args).pull()

        if not _source or _source.startswith('docker://'):
            raise ValueError(
                f'Can not construct a native Executor from {uri}. Looks like you want to use it as a '
                f'Docker container, you may want to use it in the Flow via `.add(uses={uri})` instead.'
            )
        return cls.load_config(
            _source,
            context=context,
            uses_with=uses_with,
            uses_metas=uses_metas,
            uses_requests=uses_requests,
        )

    @classmethod
    def serve(
        cls,
        uses_with: Optional[Dict] = None,
        uses_metas: Optional[Dict] = None,
        uses_requests: Optional[Dict] = None,
        stop_event: Optional[Union[threading.Event,
                                   multiprocessing.Event]] = None,
        **kwargs,
    ):
        """Serve this Executor in a temporary Flow. Useful in testing an Executor in remote settings.

        :param uses_with: dictionary of parameters to overwrite from the default config's with field
        :param uses_metas: dictionary of parameters to overwrite from the default config's metas field
        :param uses_requests: dictionary of parameters to overwrite from the default config's requests field
        :param stop_event: a threading event or a multiprocessing event that once set will resume the control Flow
            to main thread.
        :param kwargs: other kwargs accepted by the Flow, full list can be found `here <https://docs.jina.ai/api/jina.orchestrate.flow.base/>`

        """
        from jina import Flow

        f = Flow(**kwargs).add(
            uses=cls,
            uses_with=uses_with,
            uses_metas=uses_metas,
            uses_requests=uses_requests,
        )
        with f:
            f.block(stop_event)

    class StandaloneExecutorType(BetterEnum):
        """
        Type of standalone Executors
        """

        EXTERNAL = 0  # served by a gateway
        SHARED = 1  # not served by a gateway, served by head/worker

    @staticmethod
    def to_kubernetes_yaml(
        uses: str,
        output_base_path: str,
        k8s_namespace: Optional[str] = None,
        executor_type: Optional[
            StandaloneExecutorType] = StandaloneExecutorType.EXTERNAL,
        uses_with: Optional[Dict] = None,
        uses_metas: Optional[Dict] = None,
        uses_requests: Optional[Dict] = None,
        **kwargs,
    ):
        """
        Converts the Executor into a set of yaml deployments to deploy in Kubernetes.

        If you don't want to rebuild image on Jina Hub,
        you can set `JINA_HUB_NO_IMAGE_REBUILD` environment variable.

        :param uses: the Executor to use. Has to be containerized and accessible from K8s
        :param output_base_path: The base path where to dump all the yaml files
        :param k8s_namespace: The name of the k8s namespace to set for the configurations. If None, the name of the Flow will be used.
        :param executor_type: The type of Executor. Can be external or shared. External Executors include the Gateway. Shared Executors don't. Defaults to External
        :param uses_with: dictionary of parameters to overwrite from the default config's with field
        :param uses_metas: dictionary of parameters to overwrite from the default config's metas field
        :param uses_requests: dictionary of parameters to overwrite from the default config's requests field
        :param kwargs: other kwargs accepted by the Flow, full list can be found `here <https://docs.jina.ai/api/jina.orchestrate.flow.base/>`
        """
        from jina import Flow

        Flow(**kwargs).add(
            uses=uses,
            uses_with=uses_with,
            uses_metas=uses_metas,
            uses_requests=uses_requests,
        ).to_kubernetes_yaml(
            output_base_path=output_base_path,
            k8s_namespace=k8s_namespace,
            include_gateway=executor_type ==
            BaseExecutor.StandaloneExecutorType.EXTERNAL,
        )

    to_k8s_yaml = to_kubernetes_yaml

    @staticmethod
    def to_docker_compose_yaml(
        uses: str,
        output_path: Optional[str] = None,
        network_name: Optional[str] = None,
        executor_type: Optional[
            StandaloneExecutorType] = StandaloneExecutorType.EXTERNAL,
        uses_with: Optional[Dict] = None,
        uses_metas: Optional[Dict] = None,
        uses_requests: Optional[Dict] = None,
        **kwargs,
    ):
        """
        Converts the Executor into a yaml file to run with `docker-compose up`
        :param uses: the Executor to use. Has to be containerized
        :param output_path: The output path for the yaml file
        :param network_name: The name of the network that will be used by the deployment name
        :param executor_type: The type of Executor. Can be external or shared. External Executors include the Gateway. Shared Executors don't. Defaults to External
        :param uses_with: dictionary of parameters to overwrite from the default config's with field
        :param uses_metas: dictionary of parameters to overwrite from the default config's metas field
        :param uses_requests: dictionary of parameters to overwrite from the default config's requests field
        :param kwargs: other kwargs accepted by the Flow, full list can be found `here <https://docs.jina.ai/api/jina.orchestrate.flow.base/>`
        """
        from jina import Flow

        f = Flow(**kwargs).add(
            uses=uses,
            uses_with=uses_with,
            uses_metas=uses_metas,
            uses_requests=uses_requests,
        )
        f.to_docker_compose_yaml(
            output_path=output_path,
            network_name=network_name,
            include_gateway=executor_type ==
            BaseExecutor.StandaloneExecutorType.EXTERNAL,
        )

    def monitor(self,
                name: Optional[str] = None,
                documentation: Optional[str] = None) -> Optional['Summary']:
        """
        Get a given prometheus metric, if it does not exist yet, it will create it and store it in a buffer.
        :param name: the name of the metrics
        :param documentation:  the description of the metrics

        :return: the given prometheus metrics or None if monitoring is not enable.
        """

        if self._metrics_buffer:
            if name not in self._metrics_buffer:
                from prometheus_client import Summary

                self._metrics_buffer[name] = Summary(
                    name,
                    documentation,
                    registry=self.runtime_args.metrics_registry,
                    namespace='jina',
                    labelnames=('runtime_name', ),
                ).labels(self.runtime_args.name)
            return self._metrics_buffer[name].time()
        else:
            return contextlib.nullcontext()
Exemple #9
0
class CrudIndexer(Executor):
    """Simple indexer class"""
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.logger = JinaLogger('CrudIndexer')
        self._docs = DocumentArray()
        self._dump_location = os.path.join(self.metas.workspace, 'docs.json')
        if os.path.exists(self._dump_location):
            self._docs = DocumentArray.load_json(self._dump_location)
            self.logger.debug(
                f'Loaded {len(self._docs)} from {self._dump_location}')
        else:
            self.logger.warning(f'No data found at {self._dump_location}')

    @requests(on='/index')
    def index(self, docs: 'DocumentArray', **kwargs):
        self._docs.extend(docs)

    @requests(on='/update')
    def update(self, docs: 'DocumentArray', **kwargs):
        self.delete(docs)
        self.index(docs)

    def close(self) -> None:
        self.logger.debug(
            f'Dumping {len(self._docs)} to {self._dump_location}')
        self._docs.save_json(self._dump_location)

    @requests(on='/delete')
    def delete(self, docs: 'DocumentArray', **kwargs):
        # TODO we can do del _docs[d.id] once
        # tests.unit.types.arrays.test_documentarray.test_delete_by_id is fixed
        ids_to_delete = [d.id for d in docs]
        idx_to_delete = []
        for i, doc in enumerate(self._docs):
            if doc.id in ids_to_delete:
                idx_to_delete.append(i)
        for i in sorted(idx_to_delete, reverse=True):
            del self._docs[i]

    @requests(on='/search')
    def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
        top_k = int(parameters.get('top_k', 1))
        a = np.stack(docs[:, 'embedding'])
        b = np.stack(self._docs[:, 'embedding'])
        q_emb = _ext_A(_norm(a))
        d_emb = _ext_B(_norm(b))
        dists = _cosine(q_emb, d_emb)
        idx, dist = self._get_sorted_top_k(dists, top_k)
        for _q, _ids, _dists in zip(docs, idx, dist):
            for _id, _dist in zip(_ids, _dists):
                d = Document(self._docs[int(_id)], copy=True)
                d.scores['cosine'].value = 1 - _dist
                _q.matches.append(d)

    @staticmethod
    def _get_sorted_top_k(dist: 'np.array',
                          top_k: int) -> Tuple['np.ndarray', 'np.ndarray']:
        if top_k >= dist.shape[1]:
            idx = dist.argsort(axis=1)[:, :top_k]
            dist = np.take_along_axis(dist, idx, axis=1)
        else:
            idx_ps = dist.argpartition(kth=top_k, axis=1)[:, :top_k]
            dist = np.take_along_axis(dist, idx_ps, axis=1)
            idx_fs = dist.argsort(axis=1)
            idx = np.take_along_axis(idx_ps, idx_fs, axis=1)
            dist = np.take_along_axis(dist, idx_fs, axis=1)

        return idx, dist
Exemple #10
0
class HubIO:
    """:class:`HubIO` provides the way to interact with Jina Hub registry.
    You can use it with CLI to package a directory into a Jina Hub and publish it to the world.
    Examples:
        - :command:`jina hub push my_executor/` to push the executor package to Jina Hub
        - :command:`jina hub pull UUID8` to download the executor identified by UUID8

    To create a :class:`HubIO` object, simply:

        .. highlight:: python
        .. code-block:: python
            hubio = HubIO(args)

    :param args: arguments
    """
    def __init__(self, args: Optional[argparse.Namespace] = None, **kwargs):
        if args and isinstance(args, argparse.Namespace):
            self.args = args
        else:
            self.args = ArgNamespace.kwargs2namespace(kwargs, set_hub_parser())
        self.logger = JinaLogger(self.__class__.__name__, **vars(args))

        with ImportExtensions(required=True):
            import cryptography
            import filelock
            import rich

            assert rich  #: prevent pycharm auto remove the above line
            assert cryptography
            assert filelock

    def new(self) -> None:
        """Create a new executor folder interactively."""

        from rich import box, print
        from rich.panel import Panel
        from rich.progress import track
        from rich.prompt import Confirm, Prompt
        from rich.syntax import Syntax
        from rich.table import Table

        console = get_rich_console()

        print(
            Panel.fit(
                '''
[bold green]Executor[/bold green] is how Jina processes [bold]Document[/bold].

This guide helps you to create your own Executor in 30 seconds.''',
                title='Create New Executor',
            ))

        exec_name = (self.args.name if self.args.name else Prompt.ask(
            ':grey_question: What is the [bold]name[/bold] of your executor?\n'
            '[dim]CamelCase is required[/dim]',
            default=f'MyExecutor{random.randint(0, 100)}',
        ))

        exec_path = (self.args.path if self.args.path else Prompt.ask(
            ':grey_question: [bold]Which folder[/bold] to store your executor?',
            default=os.path.join(os.getcwd(), exec_name),
        ))
        exec_description = '{{}}'
        exec_keywords = '{{}}'
        exec_url = '{{}}'

        is_dockerfile = False

        if self.args.advance_configuration or Confirm.ask(
                '[green]That\'s all we need to create an Executor![/green]\n'
                ':grey_question: Or do you want to proceed to advanced configuration',
                default=False,
        ):
            exec_description = (
                self.args.description if self.args.description else
                (Prompt.ask(
                    ':grey_question: Please give a [bold]short description[/bold] of your executor?\n'
                    f'[dim]Example: {exec_name} embeds images into 128-dim vectors using ResNet.[/dim]'
                )))

            exec_keywords = (self.args.keywords if self.args.keywords else (
                Prompt.ask(
                    ':grey_question: Please give some [bold]keywords[/bold] to help people search your executor [dim](separated by comma)[/dim]\n'
                    f'[dim]Example: image cv embedding encoding resnet[/dim]'))
                             )

            exec_url = (self.args.url if self.args.url else (Prompt.ask(
                ':grey_question: What is the [bold]URL[/bold] for GitHub repo?\n'
                f'[dim]Example: https://github.com/yourname/my-executor[/dim]')
                                                             ))

            print(
                Panel.fit(
                    '''
[bold]Dockerfile[/bold] describes how this executor will be built. It is useful when
your executor has non-trivial dependencies or must be run under certain environment.

- If the [bold]Dockerfile[/bold] is missing, Jina automatically generates one for you.
- If you provide one, then Jina will respect the given [bold]Dockerfile[/bold].''',
                    title='[Optional] [bold]Dockerfile[/bold]',
                    width=80,
                ))

            is_dockerfile = self.args.add_dockerfile or Confirm.ask(
                ':grey_question: Do you need to write your own [bold]Dockerfile[/bold] instead of the auto-generated one?',
                default=False,
            )
            print('[green]That\'s all we need to create an Executor![/green]')

        def mustache_repl(srcs):
            for src in track(srcs,
                             description=f'Creating {exec_name}...',
                             total=len(srcs)):
                with open(
                        os.path.join(__resources_path__, 'executor-template',
                                     src)) as fp, open(
                                         os.path.join(exec_path, src),
                                         'w') as fpw:
                    f = (fp.read().replace('{{exec_name}}', exec_name).replace(
                        '{{exec_description}}', exec_description).replace(
                            '{{exec_keywords}}',
                            str(exec_keywords.split(','))).replace(
                                '{{exec_url}}', exec_url))

                    f = [
                        v + '\n' for v in f.split('\n')
                        if not ('{{' in v or '}}' in v)
                    ]
                    fpw.writelines(f)

        Path(exec_path).mkdir(parents=True, exist_ok=True)
        pkg_files = [
            'executor.py',
            'manifest.yml',
            'README.md',
            'requirements.txt',
            'config.yml',
        ]

        if is_dockerfile:
            pkg_files.append('Dockerfile')

        mustache_repl(pkg_files)

        table = Table(box=box.SIMPLE)
        table.add_column('Filename', style='cyan', no_wrap=True)
        table.add_column('Description', no_wrap=True)

        # adding the columns in order of `ls` output
        table.add_row(
            'config.yml',
            'The YAML config file of the Executor. You can define [bold]__init__[/bold] arguments using [bold]with[/bold] keyword.',
        )

        table.add_row(
            '',
            Panel(
                Syntax(
                    f'''
jtype: {exec_name}
with:
    foo: 1
    bar: hello
metas:
    py_modules:
        - executor.py
                ''',
                    'yaml',
                    theme='monokai',
                    line_numbers=True,
                    word_wrap=True,
                ),
                title='config.yml',
                width=50,
                expand=False,
            ),
        )

        if is_dockerfile:
            table.add_row(
                'Dockerfile',
                'The Dockerfile describes how this executor will be built.',
            )

        table.add_row('executor.py', 'The main logic file of the Executor.')
        table.add_row(
            'manifest.yml',
            'Metadata for the Executor, for better appeal on Jina Hub.',
        )

        manifest_fields_table = Table(box=box.SIMPLE)
        manifest_fields_table.add_column('Field', style='cyan', no_wrap=True)
        manifest_fields_table.add_column('Description', no_wrap=True)
        manifest_fields_table.add_row('name',
                                      'Human-readable title of the Executor')
        manifest_fields_table.add_row(
            'description', 'Human-readable description of the Executor')
        manifest_fields_table.add_row(
            'url',
            'URL to find more information on the Executor (e.g. GitHub repo URL)',
        )
        manifest_fields_table.add_row(
            'keywords', 'Keywords that help user find the Executor')

        table.add_row('', manifest_fields_table)
        table.add_row('README.md', 'A usage guide of the Executor.')
        table.add_row('requirements.txt',
                      'The Python dependencies of the Executor.')

        final_table = Table(box=None)

        final_table.add_row(
            'Congrats! You have successfully created an Executor! Here are the next steps:'
        )

        p0 = Panel(
            Syntax(
                f'cd {exec_path}\nls',
                'console',
                theme='monokai',
                line_numbers=True,
                word_wrap=True,
            ),
            title='1. Check out the generated Executor',
            width=120,
            expand=False,
        )

        p1 = Panel(
            table,
            title='2. Understand folder structure',
            width=120,
            expand=False,
        )

        p2 = Panel(
            Syntax(
                f'jina hub push {exec_path}',
                'console',
                theme='monokai',
                line_numbers=True,
                word_wrap=True,
            ),
            title='3. Share it to Jina Hub',
            width=120,
            expand=False,
        )

        final_table.add_row(p0)
        final_table.add_row(p1)
        final_table.add_row(p2)

        p = Panel(
            final_table,
            title=':tada: Next steps',
            width=130,
            expand=False,
        )
        console.print(p)

    def push(self) -> None:
        """Push the executor package to Jina Hub."""

        work_path = Path(self.args.path)

        exec_tags = None
        exec_immutable_tags = None
        if self.args.tag:
            exec_tags = ','.join(self.args.tag)
        if self.args.protected_tag:
            exec_immutable_tags = ','.join(self.args.protected_tag)

        dockerfile = None
        if self.args.dockerfile:
            dockerfile = Path(self.args.dockerfile)
            if not dockerfile.exists():
                raise Exception(
                    f'The given Dockerfile `{dockerfile}` does not exist!')
            if dockerfile.parent != work_path:
                raise Exception(
                    f'The Dockerfile must be placed at the given folder `{work_path}`'
                )

            dockerfile = dockerfile.relative_to(work_path)

        console = get_rich_console()
        with console.status(f'Pushing `{self.args.path}` ...') as st:
            req_header = get_request_header()
            try:
                st.update(f'Packaging {self.args.path} ...')
                md5_hash = hashlib.md5()
                bytesio = archive_package(work_path)
                content = bytesio.getvalue()
                md5_hash.update(content)
                md5_digest = md5_hash.hexdigest()

                # upload the archived package
                form_data = {
                    'public':
                    'True' if getattr(self.args, 'public', None) else 'False',
                    'private':
                    'True' if getattr(self.args, 'private', None) else 'False',
                    'md5sum': md5_digest,
                }

                if self.args.verbose:
                    form_data['verbose'] = 'True'

                if self.args.no_cache:
                    form_data['buildWithNoCache'] = 'True'

                if exec_tags:
                    form_data['tags'] = exec_tags

                if exec_immutable_tags:
                    form_data['immutableTags'] = exec_immutable_tags

                if dockerfile:
                    form_data['dockerfile'] = str(dockerfile)

                uuid8, secret = load_secret(work_path)
                if self.args.force_update or uuid8:
                    form_data['id'] = self.args.force_update or uuid8
                if self.args.secret or secret:
                    form_data['secret'] = self.args.secret or secret

                st.update(f'Connecting to Jina Hub ...')
                if form_data.get('id') and form_data.get('secret'):
                    hubble_url = get_hubble_url_v2() + '/rpc/executor.update'
                else:
                    hubble_url = get_hubble_url_v2() + '/rpc/executor.create'

                # upload the archived executor to Jina Hub
                st.update(f'Uploading...')
                resp = upload_file(
                    hubble_url,
                    'filename',
                    content,
                    dict_data=form_data,
                    headers=req_header,
                    stream=True,
                    method='post',
                )

                image = None
                session_id = req_header.get('jinameta-session-id')
                for stream_line in resp.iter_lines():
                    stream_msg = json.loads(stream_line)

                    t = stream_msg.get('type')
                    subject = stream_msg.get('subject')
                    payload = stream_msg.get('payload', '')
                    if t == 'error':
                        msg = stream_msg.get('message')
                        hubble_err = payload
                        overridden_msg = ''
                        detail_msg = ''
                        if isinstance(hubble_err, dict):
                            (overridden_msg,
                             detail_msg) = get_hubble_error_message(hubble_err)
                            if not msg:
                                msg = detail_msg

                        if overridden_msg and overridden_msg != detail_msg:
                            self.logger.warning(overridden_msg)

                        raise Exception(
                            f'{overridden_msg or msg or "Unknown Error"} session_id: {session_id}'
                        )
                    if t == 'progress' and subject == 'buildWorkspace':
                        legacy_message = stream_msg.get('legacyMessage', {})
                        status = legacy_message.get('status', '')
                        st.update(
                            f'Cloud building ... [dim]{subject}: {t} ({status})[/dim]'
                        )
                    elif t == 'complete':
                        image = stream_msg['payload']
                        st.update(
                            f'Cloud building ... [dim]{subject}: {t} ({stream_msg["message"]})[/dim]'
                        )
                        break
                    elif t and subject:
                        if self.args.verbose and t == 'console':
                            console.log(
                                f'Cloud building ... [dim]{subject}: {payload}[/dim]'
                            )
                        else:
                            st.update(
                                f'Cloud building ... [dim]{subject}: {t} {payload}[/dim]'
                            )

                if image:
                    new_uuid8, new_secret = self._prettyprint_result(
                        console, image)
                    if new_uuid8 != uuid8 or new_secret != secret:
                        dump_secret(work_path, new_uuid8, new_secret)
                else:
                    raise Exception(f'Unknown Error, session_id: {session_id}')

            except KeyboardInterrupt:
                pass

            except Exception as e:  # IO related errors
                self.logger.error(
                    f'''Please report this session_id: [yellow bold]{req_header["jinameta-session-id"]}[/] to https://github.com/jina-ai/jina/issues'''
                )
                raise e

    def _prettyprint_result(self, console, image):
        # TODO: only support single executor now

        from rich.panel import Panel
        from rich.table import Table

        uuid8 = image['id']
        secret = image['secret']
        visibility = image['visibility']
        tag = self.args.tag[0] if self.args.tag else None

        table = Table.grid()
        table.add_column(width=20, no_wrap=True)
        table.add_column(style='cyan', no_wrap=True)
        table.add_row(
            ':link: Hub URL',
            f'[link=https://hub.jina.ai/executor/{uuid8}/]https://hub.jina.ai/executor/{uuid8}/[/link]',
        )
        if 'name' in image:
            table.add_row(':name_badge: Name', image['name'])
        table.add_row(':lock: Secret', secret)
        table.add_row(
            '',
            ':point_up:️ [bold red]Please keep this token in a safe place!',
        )
        table.add_row(':eyes: Visibility', visibility)

        p1 = Panel(
            table,
            title='Published',
            width=80,
            expand=False,
        )
        console.print(p1)

        presented_id = image.get('name', uuid8)
        usage = (f'{presented_id}' if visibility == 'public' else
                 f'{presented_id}:{secret}') + (f'/{tag}' if tag else '')

        if not self.args.no_usage:
            self._get_prettyprint_usage(console, usage)

        return uuid8, secret

    def _get_prettyprint_usage(self, console, executor_name, usage_kind=None):
        from rich.panel import Panel
        from rich.syntax import Syntax

        flow_plain = f'''from jina import Flow

f = Flow().add(uses='jinahub://{executor_name}')
'''

        flow_docker = f'''from jina import Flow

f = Flow().add(uses='jinahub+docker://{executor_name}')
'''

        flow_sandbox = f'''from jina import Flow

f = Flow().add(uses='jinahub+sandbox://{executor_name}')
'''
        panels = [
            Panel(
                Syntax(
                    p[0],
                    'python',
                    theme='monokai',
                    word_wrap=True,
                ),
                title=p[1],
                width=80,
                expand=False,
            ) for p in [
                (flow_plain, 'Use via source'),
                (flow_docker, 'Use in Docker'),
                (flow_sandbox, 'Use in Sandbox'),
            ]
        ]

        if usage_kind == 'docker':
            console.print(panels[1])
        elif usage_kind == 'source':
            console.print(panels[0])
        else:
            console.print(*reversed(panels))

    @staticmethod
    @disk_cache_offline(cache_file=str(get_cache_db()))
    def fetch_meta(
        name: str,
        tag: str,
        *,
        secret: Optional[str] = None,
        image_required: bool = True,
        rebuild_image: bool = True,
        force: bool = False,
    ) -> HubExecutor:
        """Fetch the executor meta info from Jina Hub.
        :param name: the UUID/Name of the executor
        :param tag: the tag of the executor if available, otherwise, use `None` as the value
        :param secret: the access secret of the executor
        :param image_required: it indicates whether a Docker image is required or not
        :param rebuild_image: it indicates whether Jina Hub need to rebuild image or not
        :param force: if set to True, access to fetch_meta will always pull latest Executor metas, otherwise, default
            to local cache
        :return: meta of executor

        .. note::
            The `name` and `tag` should be passed via ``args`` and `force` and `secret` as ``kwargs``, otherwise,
            cache does not work.
        """
        with ImportExtensions(required=True):
            import requests

        @retry(num_retry=3)
        def _send_request_with_retry(url, **kwargs):
            resp = requests.post(url, **kwargs)
            if resp.status_code != 200:
                if resp.text:
                    raise Exception(resp.text)
                resp.raise_for_status()

            return resp

        pull_url = get_hubble_url_v2() + f'/rpc/executor.getPackage'

        payload = {
            'id': name,
            'include': ['code'],
            'rebuildImage': rebuild_image
        }
        if image_required:
            payload['include'].append('docker')
        if secret:
            payload['secret'] = secret
        if tag:
            payload['tag'] = tag

        req_header = get_request_header()

        resp = _send_request_with_retry(pull_url,
                                        json=payload,
                                        headers=req_header)
        resp = resp.json()['data']

        images = resp['package'].get('containers', [])
        image_name = images[0] if images else None
        if image_required and not image_name:
            raise Exception(
                f'No image found for executor "{name}", '
                f'tag: {tag}, commit: {resp.get("commit", {}).get("id")}, '
                f'session_id: {req_header.get("jinameta-session-id")}')

        return HubExecutor(
            uuid=resp['id'],
            name=resp.get('name', None),
            commit_id=resp['commit'].get('id'),
            tag=tag or resp['commit'].get('tags', [None])[0],
            visibility=resp['visibility'],
            image_name=image_name,
            archive_url=resp['package']['download'],
            md5sum=resp['package']['md5'],
        )

    @staticmethod
    def deploy_public_sandbox(args: Union[argparse.Namespace, Dict]) -> str:
        """
        Deploy a public sandbox to Jina Hub.
        :param args: arguments parsed from the CLI

        :return: the host and port of the sandbox
        """
        args_copy = copy.deepcopy(args)
        if not isinstance(args_copy, Dict):
            args_copy = vars(args_copy)

        scheme, name, tag, secret = parse_hub_uri(args_copy.pop('uses', ''))
        payload = {
            'name': name,
            'tag': tag if tag else 'latest',
            'jina': __version__,
            'args': args_copy,
        }

        import requests

        console = get_rich_console()

        host = None
        port = None

        json_response = requests.post(
            url=get_hubble_url_v2() + '/rpc/sandbox.get',
            json=payload,
            headers=get_request_header(),
        ).json()
        if json_response.get('code') == 200:
            host = json_response.get('data', {}).get('host', None)
            port = json_response.get('data', {}).get('port', None)

        if host and port:
            console.log(f"🎉 A sandbox already exists, reusing it.")
            return host, port

        with console.status(
                f"[bold green]🚧 Deploying sandbox for [bold white]{name}[/bold white] since none exists..."
        ):
            try:
                json_response = requests.post(
                    url=get_hubble_url_v2() + '/rpc/sandbox.create',
                    json=payload,
                    headers=get_request_header(),
                ).json()

                data = json_response.get('data') or {}
                host = data.get('host', None)
                port = data.get('port', None)
                if not host or not port:
                    raise Exception(
                        f'Failed to deploy sandbox: {json_response}')

                console.log(f"🎉 Deployment completed, using it.")
            except:
                console.log(
                    "🚨 Deployment failed, feel free to raise an issue. https://github.com/jina-ai/jina/issues/new"
                )
                raise

        return host, port

    def _pull_with_progress(self, log_streams, console):
        from rich.progress import BarColumn, DownloadColumn, Progress

        with Progress(
                "[progress.description]{task.description}",
                BarColumn(),
                DownloadColumn(),
                "[progress.percentage]{task.percentage:>3.0f}%",
                console=console,
                transient=True,
        ) as progress:
            tasks = {}
            for log in log_streams:
                if 'status' not in log:
                    continue
                status = log['status']
                status_id = log.get('id', None)
                pg_detail = log.get('progressDetail', None)

                if (pg_detail is None) or (status_id is None):
                    self.logger.debug(status)
                    continue

                if status_id not in tasks:
                    tasks[status_id] = progress.add_task(status, total=0)

                task_id = tasks[status_id]

                if ('current' in pg_detail) and ('total' in pg_detail):
                    progress.update(
                        task_id,
                        completed=pg_detail['current'],
                        total=pg_detail['total'],
                        description=status,
                    )
                elif not pg_detail:
                    progress.update(task_id, advance=0, description=status)

    def _load_docker_client(self):
        with ImportExtensions(required=True):
            import docker.errors
            from docker import APIClient

            from jina import __windows__

            try:
                self._client = docker.from_env()
                # low-level client
                self._raw_client = APIClient(
                    base_url=docker.constants.DEFAULT_NPIPE
                    if __windows__ else docker.constants.DEFAULT_UNIX_SOCKET)
            except docker.errors.DockerException:
                self.logger.critical(
                    f'Docker daemon seems not running. Please run Docker daemon and try again.'
                )
                exit(1)

    def pull(self) -> str:
        """Pull the executor package from Jina Hub.

        :return: the `uses` string
        """

        console = get_rich_console()
        cached_zip_file = None
        executor_name = None
        usage_kind = None

        try:
            need_pull = self.args.force_update
            with console.status(f'Pulling {self.args.uri}...') as st:
                scheme, name, tag, secret = parse_hub_uri(self.args.uri)
                image_required = scheme == 'jinahub+docker'

                st.update(f'Fetching [bold]{name}[/bold] from Jina Hub ...')
                executor, from_cache = HubIO.fetch_meta(
                    name,
                    tag,
                    secret=secret,
                    image_required=image_required,
                    force=need_pull,
                )

                presented_id = getattr(executor, 'name', executor.uuid)
                executor_name = (
                    f'{presented_id}' if executor.visibility == 'public' else
                    f'{presented_id}:{secret}') + (f'/{tag}' if tag else '')

                if scheme == 'jinahub+docker':
                    self._load_docker_client()
                    import docker

                    try:
                        self._client.images.get(executor.image_name)
                    except docker.errors.ImageNotFound:
                        need_pull = True

                    if need_pull:
                        st.update(f'Pulling image ...')
                        log_stream = self._raw_client.pull(executor.image_name,
                                                           stream=True,
                                                           decode=True)
                        st.stop()
                        self._pull_with_progress(
                            log_stream,
                            console,
                        )
                    usage_kind = 'docker'
                    return f'docker://{executor.image_name}'
                elif scheme == 'jinahub':
                    import filelock

                    with filelock.FileLock(get_lockfile(), timeout=-1):
                        try:
                            pkg_path, pkg_dist_path = get_dist_path_of_executor(
                                executor)
                            # check commit id to upgrade
                            commit_file_path = (
                                pkg_dist_path /
                                f'PKG-COMMIT-{executor.commit_id or 0}')
                            if (not commit_file_path.exists()) and any(
                                    pkg_dist_path.glob('PKG-COMMIT-*')):
                                raise FileNotFoundError(
                                    f'{pkg_path} need to be upgraded')

                            st.update(
                                'Installing [bold]requirements.txt[/bold]...')
                            install_package_dependencies(
                                install_deps=self.args.install_requirements,
                                pkg_dist_path=pkg_dist_path,
                                pkg_path=pkg_dist_path,
                            )

                        except FileNotFoundError:
                            need_pull = True

                        if need_pull:
                            # pull the latest executor meta, as the cached data would expire
                            if from_cache:
                                executor, _ = HubIO.fetch_meta(
                                    name,
                                    tag,
                                    secret=secret,
                                    image_required=False,
                                    force=True,
                                )

                            st.update(f'Downloading {name} ...')
                            cached_zip_file = download_with_resume(
                                executor.archive_url,
                                get_download_cache_dir(),
                                f'{executor.uuid}-{executor.md5sum}.zip',
                                md5sum=executor.md5sum,
                            )

                            st.update(f'Unpacking {name} ...')
                            install_local(
                                cached_zip_file,
                                executor,
                                install_deps=self.args.install_requirements,
                            )

                            pkg_path, _ = get_dist_path_of_executor(executor)

                        usage_kind = 'source'
                        return f'{pkg_path / "config.yml"}'
                else:
                    raise ValueError(f'{self.args.uri} is not a valid scheme')
        except KeyboardInterrupt:
            executor_name = None
        except Exception:
            executor_name = None
            raise
        finally:
            # delete downloaded zip package if existed
            if cached_zip_file is not None:
                cached_zip_file.unlink()

            if not self.args.no_usage and executor_name:
                self._get_prettyprint_usage(console, executor_name, usage_kind)
Exemple #11
0
class JinaDProcessTarget:
    """Target to be executed on JinaD Process"""
    def __call__(
        self,
        args: 'argparse.Namespace',
        is_started: Union['multiprocessing.Event', 'threading.Event'],
        is_shutdown: Union['multiprocessing.Event', 'threading.Event'],
        is_ready: Union['multiprocessing.Event', 'threading.Event'],
        is_cancelled: Union['multiprocessing.Event', 'threading.Event'],
        envs: Optional[Dict] = None,
    ):
        """Method responsible to manage a remote Pod

        This method is the target for the Pod's `thread` or `process`

        .. note::
            Please note that env variables are process-specific. Subprocess inherits envs from
            the main process. But Subprocess's envs do NOT affect the main process. It does NOT
            mess up user local system envs.

        :param args: namespace args from the Pod
        :param is_started: concurrency event to communicate runtime is properly started. Used for better logging
        :param is_shutdown: concurrency event to communicate runtime is terminated
        :param is_ready: concurrency event to communicate runtime is ready to receive messages
        :param is_cancelled: concurrency event to receive cancelling signal from the Pod. Needed by some runtimes
        :param envs: a dictionary of environment variables to be passed to remote Pod
        """
        self.args = args
        self.envs = envs
        self.is_started = is_started
        self.is_shutdown = is_shutdown
        self.is_ready = is_ready
        self.is_cancelled = is_cancelled
        self.pod_id = None
        self._logger = JinaLogger('RemotePod', **vars(args))
        run_async(self._run)

    async def _run(self):
        """Manage a remote Pod"""
        try:
            await self._create_remote_pod()
        except Exception as ex:
            self._logger.error(
                f'{ex!r} while starting a remote Pod' +
                f'\n add "--quiet-error" to suppress the exception details'
                if not self.args.quiet_error else '',
                exc_info=not self.args.quiet_error,
            )
        else:
            self.is_started.set()
            self.is_ready.set()
            await self._wait_until_cancelled()
        finally:
            await self._terminate_remote_pod()
            self.is_shutdown.set()
            self._logger.debug('JinaDProcessTarget terminated')

    async def _create_remote_pod(self):
        """Create Workspace, Pod on remote JinaD server"""
        with ImportExtensions(required=True):
            # rich & aiohttp are used in `AsyncJinaDClient`
            import rich
            import aiohttp
            from daemon.clients import AsyncJinaDClient

            assert rich
            assert aiohttp

        # NOTE: args.timeout_ready is always set to -1 for JinadRuntime so that wait_for_success doesn't fail in Pod,
        # so it can't be used for Client timeout.
        self.client = AsyncJinaDClient(host=self.args.host,
                                       port=self.args.port_jinad,
                                       logger=self._logger)

        if not await self.client.alive:
            raise DaemonConnectivityError

        # Create a remote workspace with upload_files
        workspace_id = await self.client.workspaces.create(
            paths=self.filepaths,
            id=self.args.workspace_id,
            complete=True,
        )
        if not workspace_id:
            self._logger.critical(f'remote workspace creation failed')
            raise DaemonWorkspaceCreationFailed

        payload = replace_enum_to_str(vars(self._mask_args()))
        # Create a remote Pod in the above workspace
        success, response = await self.client.pods.create(
            workspace_id=workspace_id, payload=payload, envs=self.envs)
        if not success:
            self._logger.critical(f'remote pod creation failed')
            raise DaemonPodCreationFailed(response)
        else:
            self.pod_id = response

    async def _sleep_forever(self):
        """Sleep forever, no prince will come."""
        await asyncio.sleep(1e10)

    async def _wait_until_cancelled(self):
        while not self.is_cancelled.is_set():
            await asyncio.sleep(0.1)

    async def _terminate_remote_pod(self):
        """Removes the remote Pod"""
        if self.pod_id is not None:
            if await self.client.pods.delete(id=self.pod_id):
                self._logger.success(
                    f'Successfully terminated remote Pod {self.pod_id}')
            # Don't delete workspace here, as other Executors might use them.
            # TODO(Deepankar): probably enable an arg here?

    @property
    def filepaths(self) -> List[Path]:
        """Get file/directories to be uploaded to remote workspace

        :return: filepaths to be uploaded to remote
        """
        paths = set()
        if not self.args.upload_files:
            self._logger.warning(f'no files passed to upload to remote')
        else:
            for path in self.args.upload_files:
                try:
                    fullpath = Path(complete_path(path))
                    paths.add(fullpath)
                except FileNotFoundError:
                    self._logger.error(f'invalid path {path} passed')

        return list(paths)

    def _mask_args(self):
        cargs = copy.deepcopy(self.args)

        # TODO:/NOTE this prevents jumping from remote to another remote (Han: 2021.1.17)
        # _args.host = __default_host__
        # host resetting disables dynamic routing. Use `disable_remote` instead
        cargs.disable_remote = True
        cargs.log_config = ''  # do not use local log_config
        cargs.upload_files = []  # reset upload files
        cargs.noblock_on_start = False  # wait until start success

        changes = []
        for k, v in vars(cargs).items():
            if v != getattr(self.args, k):
                changes.append(
                    f'{k:>30s}: {str(getattr(self.args, k)):30s} -> {str(v):30s}'
                )
        if changes:
            changes = [
                'note the following arguments have been masked or altered for remote purpose:'
            ] + changes
            self._logger.debug('\n'.join(changes))

        return cargs
Exemple #12
0
class BasePod(ABC):
    """
    :class:`BasePod` is an interface from which all the classes managing the lifetime of a Runtime inside a local process,
    container or in a remote JinaD instance (to come) must inherit.

    It exposes the required APIs so that the `BasePod` can be handled by the `cli` api as a context manager or by a `Deployment`.

    What makes a BasePod a BasePod is that it manages the lifecycle of a Runtime (gateway or not gateway)
    """
    def __init__(self, args: 'argparse.Namespace'):
        self.args = args

        if hasattr(self.args, 'port_expose'):
            self.args.port_in = self.args.port_expose
        self.args.parallel = self.args.shards
        self.name = self.args.name or self.__class__.__name__
        self.is_forked = False
        self.logger = JinaLogger(self.name, **vars(self.args))

        if self.args.runtime_backend == RuntimeBackendType.THREAD:
            self.logger.warning(
                f' Using Thread as runtime backend is not recommended for production purposes. It is '
                f'just supposed to be used for easier debugging. Besides the performance considerations, it is'
                f'specially dangerous to mix `Executors` running in different types of `RuntimeBackends`.'
            )

        self._envs = {'JINA_DEPLOYMENT_NAME': self.name}
        if self.args.quiet:
            self._envs['JINA_LOG_CONFIG'] = 'QUIET'
        if self.args.env:
            self._envs.update(self.args.env)

        # arguments needed to create `runtime` and communicate with it in the `run` in the stack of the new process
        # or thread.f
        test_worker = {
            RuntimeBackendType.THREAD: threading.Thread,
            RuntimeBackendType.PROCESS: multiprocessing.Process,
        }.get(getattr(args, 'runtime_backend', RuntimeBackendType.THREAD))()
        self.is_ready = _get_event(test_worker)
        self.is_shutdown = _get_event(test_worker)
        self.cancel_event = _get_event(test_worker)
        self.is_started = _get_event(test_worker)
        self.ready_or_shutdown = ConditionalEvent(
            getattr(args, 'runtime_backend', RuntimeBackendType.THREAD),
            events_list=[self.is_ready, self.is_shutdown],
        )
        self.daemon = self.args.daemon
        self.runtime_ctrl_address = self._get_control_address()
        self._timeout_ctrl = self.args.timeout_ctrl

    def _get_control_address(self):
        return f'{self.args.host}:{self.args.port_in}'

    def close(self) -> None:
        """Close the Pod

        This method makes sure that the `Process/thread` is properly finished and its resources properly released
        """
        self.logger.debug('waiting for ready or shutdown signal from runtime')
        if not self.is_shutdown.is_set() and self.is_started.is_set():
            try:
                self.logger.debug(f'terminate')
                self._terminate()
                if not self.is_shutdown.wait(timeout=self._timeout_ctrl
                                             if not __windows__ else 1.0):
                    if not __windows__:
                        raise Exception(
                            f'Shutdown signal was not received for {self._timeout_ctrl} seconds'
                        )
                    else:
                        self.logger.warning(
                            'Pod was forced to close after 1 second. Graceful closing is not available on Windows.'
                        )
            except Exception as ex:
                self.logger.error(
                    f'{ex!r} during {self.close!r}' +
                    f'\n add "--quiet-error" to suppress the exception details'
                    if not self.args.quiet_error else '',
                    exc_info=not self.args.quiet_error,
                )
        else:
            # here shutdown has been set already, therefore `run` will gracefully finish
            self.logger.debug(
                f'{"shutdown is is already set" if self.is_shutdown.is_set() else "Runtime was never started"}. Runtime will end gracefully on its own'
            )
            pass
        self.is_shutdown.set()
        self.logger.debug(__stop_msg__)
        self.logger.close()

    def __enter__(self):
        return self.start()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def _wait_for_ready_or_shutdown(self, timeout: Optional[float]):
        """
        Waits for the process to be ready or to know it has failed.

        :param timeout: The time to wait before readiness or failure is determined
            .. # noqa: DAR201
        """
        return AsyncNewLoopRuntime.wait_for_ready_or_shutdown(
            timeout=timeout,
            ready_or_shutdown_event=self.ready_or_shutdown.event,
            ctrl_address=self.runtime_ctrl_address,
            timeout_ctrl=self._timeout_ctrl,
        )

    def _fail_start_timeout(self, timeout):
        """
        Closes the Pod and raises a TimeoutError with the corresponding warning messages

        :param timeout: The time to wait before readiness or failure is determined
            .. # noqa: DAR201
        """
        _timeout = timeout or -1
        self.logger.warning(
            f'{self} timeout after waiting for {self.args.timeout_ready}ms, '
            f'if your executor takes time to load, you may increase --timeout-ready'
        )
        self.close()
        raise TimeoutError(
            f'{typename(self)}:{self.name} can not be initialized after {_timeout * 1e3}ms'
        )

    def _check_failed_to_start(self):
        """
        Raises a corresponding exception if failed to start
        """
        if self.is_shutdown.is_set():
            # return too early and the shutdown is set, means something fails!!
            if not self.is_started.is_set():
                raise RuntimeFailToStart
            else:
                raise RuntimeRunForeverEarlyError

    def wait_start_success(self):
        """Block until all pods starts successfully.

        If not success, it will raise an error hoping the outer function to catch it
        """
        _timeout = self.args.timeout_ready
        if _timeout <= 0:
            _timeout = None
        else:
            _timeout /= 1e3
        if self._wait_for_ready_or_shutdown(_timeout):
            self._check_failed_to_start()
            self.logger.debug(__ready_msg__)
        else:
            self._fail_start_timeout(_timeout)

    async def async_wait_start_success(self):
        """
        Wait for the `Pod` to start successfully in a non-blocking manner
        """
        import asyncio

        _timeout = self.args.timeout_ready
        if _timeout <= 0:
            _timeout = None
        else:
            _timeout /= 1e3

        timeout_ns = 1e9 * _timeout if _timeout else None
        now = time.time_ns()
        while timeout_ns is None or time.time_ns() - now < timeout_ns:

            if self.ready_or_shutdown.event.is_set():
                self._check_failed_to_start()
                self.logger.debug(__ready_msg__)
                return
            else:
                await asyncio.sleep(0.1)

        self._fail_start_timeout(_timeout)

    @property
    def role(self) -> 'PodRoleType':
        """Get the role of this pod in a deployment
        .. #noqa: DAR201"""
        return self.args.pod_role

    @abstractmethod
    def start(self):
        """Start the BasePod.
        This method calls :meth:`start` in :class:`threading.Thread` or :class:`multiprocesssing.Process`.
        .. #noqa: DAR201
        """
        ...

    @abstractmethod
    def _terminate(self):
        ...

    @abstractmethod
    def join(self, *args, **kwargs):
        """Joins the BasePod. Wait for the BasePod to properly terminate

        :param args: extra positional arguments
        :param kwargs: extra keyword arguments
        """
        ...