Exemple #1
0
class PartialStore(ABC):
    """A store spawned inside partial-daemon container"""
    def __init__(self):
        self._logger = JinaLogger(self.__class__.__name__, **vars(jinad_args))
        self.item = PartialStoreItem()
        self.object: Union[Type['BasePod'], Type['BaseDeployment'],
                           'Flow'] = None

    @abstractmethod
    def add(self, *args, **kwargs) -> PartialStoreItem:
        """Add a new element to the store. This method needs to be overridden by the subclass


        .. #noqa: DAR101"""
        ...

    def delete(self) -> None:
        """Terminates the object in the store & stops the server"""
        try:
            if hasattr(self.object, 'close'):
                self.object.close()
                self._logger.info(self.item.arguments)
                if self.item.arguments.get('identity'):
                    self._logger.success(
                        f'{colored(self.item.arguments["identity"], "cyan")} is removed!'
                    )
                else:
                    self._logger.success('object is removed!')
            else:
                self._logger.warning(f'nothing to close. exiting')
        except Exception as e:
            self._logger.error(f'{e!r}')
            raise
        else:
            self.item = PartialStoreItem()
Exemple #2
0
def log(logger: JinaLogger):
    logger.debug('this is test debug message')
    logger.info('this is test info message')
    logger.success('this is test success message')
    logger.warning('this is test warning message')
    logger.error('this is test error message')
    logger.critical('this is test critical message')
Exemple #3
0
def _register_to_mongodb(logger: JinaLogger, summary: Optional[Dict] = None):
    """Hub API Invocation to run `hub push`.

    :param logger: the logger instance
    :param summary: the summary dict object
    """
    # TODO(Deepankar): implement to jsonschema based validation for summary
    logger.info('registering image to Jina Hub database...')
    with open(os.path.join(__resources_path__, 'hubapi.yml')) as fp:
        hubapi_yml = JAML.load(fp)
        hubapi_url = hubapi_yml['hubapi']['url'] + hubapi_yml['hubapi']['push']

    with ImportExtensions(
            required=True,
            help_text=
            'Missing "requests" dependency, please do pip install "jina[http]"',
    ):
        import requests

    headers = {
        'Accept': 'application/json',
        'authorizationToken': _fetch_access_token(logger),
    }
    response = requests.post(url=f'{hubapi_url}',
                             headers=headers,
                             data=json.dumps(summary))
    if response.status_code == requests.codes.ok:
        logger.success(f'✅ Successfully updated the database. {response.text}')
    else:
        raise HubLoginRequired(
            f'❌ Got an error from the API: {response.text.rstrip()}. '
            f'Please login using command: {colored("jina hub login", attrs=["bold"])}'
        )
Exemple #4
0
class QueryExecutor(Executor):
    def __init__(self, dump_path: Optional[str] = None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.logger = JinaLogger('QueryExecutor')
        self._dump_path = dump_path or kwargs.get('runtime_args', {}).get(
            'dump_path', None)
        if self._dump_path is not None and os.path.exists(self._dump_path):
            self.logger.success(
                f'loading Executor from dump path: {self._dump_path}')
            self._docs = DocumentArray.load(self._dump_path)
        else:
            self.logger.warning(f'no dump path passed. Loading an empty index')
            self._docs = DocumentArray()

    @requests(on='/search')
    def search(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
        if len(self._docs) > 0:
            top_k = int(parameters.get('top_k', 5))
            docs.match(self._docs, limit=top_k)
Exemple #5
0
class BaseStore(MutableMapping):
    """The Base class for Jinad stores"""
    def __init__(self):
        self._items = {}  # type: Dict['uuid.UUID', Dict[str, Any]]
        self._logger = JinaLogger(self.__class__.__name__, **vars(jinad_args))
        self._init_stats()

    def _init_stats(self):
        """Initialize the stats """
        self._time_created = datetime.now()
        self._time_updated = self._time_created
        self._num_add = 0
        self._num_del = 0

    def add(self, *args, **kwargs) -> 'uuid.UUID':
        """Add a new element to the store. This method needs to be overridden by the subclass


        .. #noqa: DAR101"""
        raise NotImplementedError

    def update(self, *args, **kwargs) -> 'uuid.UUID':
        """Updates the element to the store. This method needs to be overridden by the subclass


        .. #noqa: DAR101"""
        raise NotImplementedError

    def delete(
        self,
        id: Union[str, uuid.UUID],
        workspace: bool = False,
        everything: bool = False,
        **kwargs,
    ):
        """delete an object from the store

        :param id: the id of the object
        :param workspace: whether to delete the workdir of the object
        :param everything: whether to delete everything
        :param kwargs: not used
        """
        if isinstance(id, str):
            id = uuid.UUID(id)

        if id in self._items:
            v = self._items[id]
            if 'object' in v and hasattr(v['object'], 'close'):
                v['object'].close()
            if workspace and v.get('workdir', None):
                for path in Path(v['workdir']).rglob('[!logging.log]*'):
                    if path.is_file():
                        self._logger.debug(f'file to be deleted: {path}')
                        path.unlink()
            if everything and v.get('workdir', None):
                self._logger.debug(f'directory to be deleted: {v["workdir"]}')
                shutil.rmtree(v['workdir'])
            del self[id]
            self._logger.success(
                f'{colored(str(id), "cyan")} is released from the store.')
        else:
            raise KeyError(f'{colored(str(id), "cyan")} not found in store.')

    def __iter__(self):
        return iter(self._items)

    def __len__(self):
        return len(self._items)

    def __getitem__(self, key: Union['uuid.UUID', str]):
        if isinstance(key, str):
            key = uuid.UUID(key)
        return self._items[key]

    def __delitem__(self, key: uuid.UUID):
        """Release a Pea/Pod/Flow object from the store

        :param key: the key of the object


        .. #noqa: DAR201"""
        self._items.pop(key)
        self._time_updated = datetime.now()
        self._num_del += 1

    def clear(self) -> None:
        """delete all the objects in the store"""

        keys = list(self._items.keys())
        for k in keys:
            self.delete(id=k, workspace=True)

    def reset(self) -> None:
        """Calling :meth:`clear` and reset all stats """
        self.clear()
        self._init_stats()

    def __setitem__(self, key: 'uuid.UUID', value: Dict) -> None:
        self._items[key] = value
        t = datetime.now()
        value.update({'time_created': t})
        self._time_updated = t
        self._num_add += 1

    @property
    def status(self) -> Dict:
        """Return the status of this store as a dict


        .. #noqa: DAR201"""
        return {
            'size': len(self._items),
            'time_created': self._time_created,
            'time_updated': self._time_updated,
            'num_add': self._num_add,
            'num_del': self._num_del,
            'items': self._items,
        }
Exemple #6
0
class DaemonWorker(Thread):
    """Worker Thread for JinaD"""
    def __init__(self, id: 'DaemonID', files: List[UploadFile], name: str,
                 *args, **kwargs) -> None:
        super().__init__(name=f'{self.__class__.__name__}{name}', daemon=True)
        self.id = id
        self.files = files
        self._logger = JinaLogger(self.name,
                                  workspace_path=self.workdir,
                                  **vars(jinad_args))
        self.start()

    @cached_property
    def arguments(self) -> WorkspaceArguments:
        """sets arguments in workspace store

        :return: pydantic model for workspace arguments
        """
        try:
            _args = store[self.id].arguments.copy(deep=True)
            _args.files.extend([f.filename
                                for f in self.files] if self.files else [])
            _args.jinad.update({
                'dockerfile': self.daemon_file.dockerfile,
            })
            _args.requirements = self.daemon_file.requirements
        except AttributeError:
            _args = WorkspaceArguments(
                files=[f.filename for f in self.files] if self.files else [],
                jinad={
                    'dockerfile': self.daemon_file.dockerfile,
                },
                requirements=self.daemon_file.requirements,
            )
        return _args

    @cached_property
    def metadata(self) -> WorkspaceMetadata:
        """sets metadata in workspace store

        :return: pydantic model for workspace metadata
        """
        image_id = self.generate_image()
        try:
            _metadata = store[self.id].metadata.copy(deep=True)
            _metadata.image_id = image_id
            _metadata.image_name = self.id.tag
        except AttributeError:
            _metadata = WorkspaceMetadata(
                image_id=image_id,
                image_name=self.id.tag,
                network=id_cleaner(self.network_id),
                workdir=self.workdir,
            )
        return _metadata

    @cached_property
    def workdir(self) -> str:
        """sets workdir for current worker thread

        :return: local directory where files would get stored
        """
        return get_workspace_path(self.id)

    @cached_property
    def daemon_file(self) -> DaemonFile:
        """set daemonfile for current worker thread

        :return: DaemonFile object representing current workspace
        """
        return DaemonFile(workdir=self.workdir, logger=self._logger)

    @cached_property
    def network_id(self) -> str:
        """create a docker network

        :return: network id
        """
        return Dockerizer.network(workspace_id=self.id)

    def generate_image(self) -> str:
        """build and create a docker image

        :return: image id
        """
        return Dockerizer.build(
            workspace_id=self.id,
            daemon_file=self.daemon_file,
            logger=JinaLogger(
                context=self.name,
                # identity=self.id,
                workspace_path=self.workdir,
            ),
        )

    @cached_property
    def container_id(self) -> Optional[str]:
        """creates a container if run command is passed in .jinad file

        :return: container id, if created
        """
        if self.daemon_file.run:
            container, _, _ = Dockerizer.run_custom(
                workspace_id=self.id, daemon_file=self.daemon_file)
            return id_cleaner(container.id)
        else:
            return None

    def run(self) -> None:
        """
        Method representing the worker thread's activity
        DaemonWorker is a daemon thread responsible for the following tasks:
        During create:
        - store uploaded files in a local workspace
        - create a docker network for the workspace which would be used by all child containers
        - build a docker image to be used by all child containers
        - create a container if `run` command is passed
        During update:
        - update files in the local workspace
        - removes the workspace container, if any
        - recreate workspace container, if `run` command is passed
        """
        try:
            store.update(
                id=self.id,
                value=RemoteWorkspaceState.UPDATING
                if store[self.id].arguments else RemoteWorkspaceState.CREATING,
            )
            store_files_in_workspace(workspace_id=self.id,
                                     files=self.files,
                                     logger=self._logger)
            store.update(
                id=self.id,
                value=WorkspaceItem(
                    state=RemoteWorkspaceState.UPDATING,
                    metadata=self.metadata,
                    arguments=self.arguments,
                ),
            )

            # this needs to be done after the initial update, otherwise run won't find the necessary metadata
            # If a container exists already, kill it before running again
            previous_container = store[self.id].metadata.container_id
            if previous_container:
                self._logger.info(
                    f'Deleting previous container {previous_container}')
                store[self.id].metadata.container_id = None
                del self.container_id
                Dockerizer.rm_container(previous_container)

            # Create a new container if necessary
            store[self.id].metadata.container_id = self.container_id
            store[self.id].state = RemoteWorkspaceState.ACTIVE

            self._logger.success(
                f'workspace {colored(str(self.id), "cyan")} is updated')
        except DockerNetworkException as e:
            store.update(id=self.id, value=RemoteWorkspaceState.FAILED)
            self._logger.error(
                f'Error while creating the docker network: {e!r}')
        except DockerImageException as e:
            store.update(id=self.id, value=RemoteWorkspaceState.FAILED)
            self._logger.error(f'Error while building the docker image: {e!r}')
        except Exception as e:
            # TODO: how to communicate errors to users? users track it via logs?
            # TODO: Handle cleanup in case of exception
            store.update(id=self.id, value=RemoteWorkspaceState.FAILED)
            self._logger.error(f'{e!r}')
Exemple #7
0
class JinaDProcessTarget:
    """Target to be executed on JinaD Process"""
    def __call__(
        self,
        args: 'argparse.Namespace',
        is_started: Union['multiprocessing.Event', 'threading.Event'],
        is_shutdown: Union['multiprocessing.Event', 'threading.Event'],
        is_ready: Union['multiprocessing.Event', 'threading.Event'],
        is_cancelled: Union['multiprocessing.Event', 'threading.Event'],
        envs: Optional[Dict] = None,
    ):
        """Method responsible to manage a remote Pod

        This method is the target for the Pod's `thread` or `process`

        .. note::
            Please note that env variables are process-specific. Subprocess inherits envs from
            the main process. But Subprocess's envs do NOT affect the main process. It does NOT
            mess up user local system envs.

        :param args: namespace args from the Pod
        :param is_started: concurrency event to communicate runtime is properly started. Used for better logging
        :param is_shutdown: concurrency event to communicate runtime is terminated
        :param is_ready: concurrency event to communicate runtime is ready to receive messages
        :param is_cancelled: concurrency event to receive cancelling signal from the Pod. Needed by some runtimes
        :param envs: a dictionary of environment variables to be passed to remote Pod
        """
        self.args = args
        self.envs = envs
        self.is_started = is_started
        self.is_shutdown = is_shutdown
        self.is_ready = is_ready
        self.is_cancelled = is_cancelled
        self.pod_id = None
        self._logger = JinaLogger('RemotePod', **vars(args))
        run_async(self._run)

    async def _run(self):
        """Manage a remote Pod"""
        try:
            await self._create_remote_pod()
        except Exception as ex:
            self._logger.error(
                f'{ex!r} while starting a remote Pod' +
                f'\n add "--quiet-error" to suppress the exception details'
                if not self.args.quiet_error else '',
                exc_info=not self.args.quiet_error,
            )
        else:
            self.is_started.set()
            self.is_ready.set()
            await self._wait_until_cancelled()
        finally:
            await self._terminate_remote_pod()
            self.is_shutdown.set()
            self._logger.debug('JinaDProcessTarget terminated')

    async def _create_remote_pod(self):
        """Create Workspace, Pod on remote JinaD server"""
        with ImportExtensions(required=True):
            # rich & aiohttp are used in `AsyncJinaDClient`
            import rich
            import aiohttp
            from daemon.clients import AsyncJinaDClient

            assert rich
            assert aiohttp

        # NOTE: args.timeout_ready is always set to -1 for JinadRuntime so that wait_for_success doesn't fail in Pod,
        # so it can't be used for Client timeout.
        self.client = AsyncJinaDClient(host=self.args.host,
                                       port=self.args.port_jinad,
                                       logger=self._logger)

        if not await self.client.alive:
            raise DaemonConnectivityError

        # Create a remote workspace with upload_files
        workspace_id = await self.client.workspaces.create(
            paths=self.filepaths,
            id=self.args.workspace_id,
            complete=True,
        )
        if not workspace_id:
            self._logger.critical(f'remote workspace creation failed')
            raise DaemonWorkspaceCreationFailed

        payload = replace_enum_to_str(vars(self._mask_args()))
        # Create a remote Pod in the above workspace
        success, response = await self.client.pods.create(
            workspace_id=workspace_id, payload=payload, envs=self.envs)
        if not success:
            self._logger.critical(f'remote pod creation failed')
            raise DaemonPodCreationFailed(response)
        else:
            self.pod_id = response

    async def _sleep_forever(self):
        """Sleep forever, no prince will come."""
        await asyncio.sleep(1e10)

    async def _wait_until_cancelled(self):
        while not self.is_cancelled.is_set():
            await asyncio.sleep(0.1)

    async def _terminate_remote_pod(self):
        """Removes the remote Pod"""
        if self.pod_id is not None:
            if await self.client.pods.delete(id=self.pod_id):
                self._logger.success(
                    f'Successfully terminated remote Pod {self.pod_id}')
            # Don't delete workspace here, as other Executors might use them.
            # TODO(Deepankar): probably enable an arg here?

    @property
    def filepaths(self) -> List[Path]:
        """Get file/directories to be uploaded to remote workspace

        :return: filepaths to be uploaded to remote
        """
        paths = set()
        if not self.args.upload_files:
            self._logger.warning(f'no files passed to upload to remote')
        else:
            for path in self.args.upload_files:
                try:
                    fullpath = Path(complete_path(path))
                    paths.add(fullpath)
                except FileNotFoundError:
                    self._logger.error(f'invalid path {path} passed')

        return list(paths)

    def _mask_args(self):
        cargs = copy.deepcopy(self.args)

        # TODO:/NOTE this prevents jumping from remote to another remote (Han: 2021.1.17)
        # _args.host = __default_host__
        # host resetting disables dynamic routing. Use `disable_remote` instead
        cargs.disable_remote = True
        cargs.log_config = ''  # do not use local log_config
        cargs.upload_files = []  # reset upload files
        cargs.noblock_on_start = False  # wait until start success

        changes = []
        for k, v in vars(cargs).items():
            if v != getattr(self.args, k):
                changes.append(
                    f'{k:>30s}: {str(getattr(self.args, k)):30s} -> {str(v):30s}'
                )
        if changes:
            changes = [
                'note the following arguments have been masked or altered for remote purpose:'
            ] + changes
            self._logger.debug('\n'.join(changes))

        return cargs