Esempio n. 1
0
def main(task, num_docs, request_size, data_set, model_name):
    config(model_name)
    workspace = os.environ['JINA_WORKSPACE']
    logger = JinaLogger('cross-modal-search')
    if 'index' in task:
        if os.path.exists(workspace):
            logger.error(
                f'\n +------------------------------------------------------------------------------------+ \
                    \n |                                   ������                                           | \
                    \n | The directory {workspace} already exists. Please remove it before indexing again.  | \
                    \n |                                   ������                                           | \
                    \n +------------------------------------------------------------------------------------+'
            )
            sys.exit(1)

    logger.info(f'### task = {task}')
    if task == 'index':
        with Flow.load_config('flow-index.yml') as f:
            with TimeContext(f'QPS: indexing {num_docs}', logger=f.logger):
                f.index(
                    input_fn=input_index_data(num_docs, request_size, data_set),
                    request_size=request_size
                )
    elif task == 'index_restful':
        index_restful(num_docs)
    elif task == 'query':
        with Flow.load_config('flow-query.yml') as f:
            f.use_rest_gateway()
            f.block()
    elif task == 'query_restful':
        if not os.path.exists(workspace):
            logger.warning(f'The directory {workspace} does not exist. Please index first via `python app.py -t index`')
        query_restful()
    elif task == 'dryrun':
        dryrun()
Esempio n. 2
0
def main(task, num_docs, top_k):
    config()
    workspace = os.environ["JINA_WORKSPACE"]
    logger = JinaLogger('chinese-text-search')
    if 'index' in task:
        if os.path.exists(workspace):
            logger.error(
                f'\n +------------------------------------------------------------------------------------+ \
                    \n |                                   🤖🤖🤖                                           | \
                    \n | The directory {workspace} already exists. Please remove it before indexing again.  | \
                    \n |                                   🤖🤖🤖                                           | \
                    \n +------------------------------------------------------------------------------------+'
            )
            sys.exit(1)

    logger.info(f'### task = {task}')
    if task == "index":
        index(num_docs)
    elif task == "index_restful":
        index_restful(num_docs)
    elif task == "query":
        if not os.path.exists(workspace):
            logger.warning(
                f'The directory {workspace} does not exist. Please index first via `python app.py -t index`'
            )
        query(top_k)
    elif task == "query_restful":
        if not os.path.exists(workspace):
            logger.warning(
                f'The directory {workspace} does not exist. Please index first via `python app.py -t index`'
            )
        query_restful()
    elif task == "dryrun":
        dryrun()
Esempio n. 3
0
def main(task, num_docs, request_size, data_set, model_name):
    config(model_name)
    workspace = os.environ['JINA_WORKSPACE']
    logger = JinaLogger('cross-modal-search')
    if 'index' in task:
        if os.path.exists(workspace):
            logger.error(
                f'\n +------------------------------------------------------------------------------------+ \
                    \n |                                   ������                                           | \
                    \n | The directory {workspace} already exists. Please remove it before indexing again.  | \
                    \n |                                   ������                                           | \
                    \n +------------------------------------------------------------------------------------+'
            )
            sys.exit(1)
    if 'query' in task and not os.path.exists(workspace):
        logger.info(f"The directory {workspace} does not exist. Please index first via `python app.py -t index`")
        sys.exit(1)
    logger.info(f'### task = {task}')
    if task == 'index':
        index(data_set, num_docs, request_size)
    if task == 'query_restful':
        query_restful()
Esempio n. 4
0
class BaseRuntime(metaclass=RuntimeMeta):
    """BaseRuntime is a process or thread providing the support to run different :class:`BasePea` in different environments.
    It manages the lifetime of these `BasePea` objects living in `Local`, `Remote`, or `Container` environment.

    Inherited classes must define their own `run` method that is the one that will be run in a separate process or thread than the main process
    """
    def __init__(self, args: Union['argparse.Namespace', Dict]):
        super().__init__()
        self.args = args
        self.name = self.__class__.__name__  #: this is the process name

        self.is_ready_event = _get_event(self)
        self.is_shutdown = _get_event(self)
        self.ready_or_shutdown = _make_or_event(self, self.is_ready_event,
                                                self.is_shutdown)
        self.is_shutdown.clear()

        if 'daemon' in args:
            self.daemon = args.daemon
        if 'name' in self.args and self.args.name:
            self.name = f'runtime-{self.args.name}'
        if 'role' in self.args and self.args.role == PeaRoleType.PARALLEL:
            self.name = f'runtime-{self.args.name}-{self.args.pea_id}'
        if 'role' in self.args and self.args.role == PeaRoleType.HEAD:
            self.name = f'runtime-{self.args.name}-head'
        if 'role' in self.args and self.args.role == PeaRoleType.TAIL:
            self.name = f'runtime-{self.args.name}-tail'
        if 'host' in self.args and 'port_ctrl' in self.args and 'ctrl_with_ipc' in self.args:
            self.ctrl_addr, self.ctrl_with_ipc = Zmqlet.get_ctrl_address(
                self.args.host, self.args.port_ctrl, self.args.ctrl_with_ipc)

        if 'log_id' in self.args and 'log_config' in self.args:
            self.logger = JinaLogger(self.name,
                                     log_id=self.args.log_id,
                                     log_config=self.args.log_config)
        else:
            self.logger = JinaLogger(self.name)

    def run(self):
        raise NotImplementedError

    def start(self):
        super().start()
        if isinstance(self.args, dict):
            _timeout = getattr(self.args['peas'][0], 'timeout_ready', -1)
        else:
            _timeout = getattr(self.args, 'timeout_ready', -1)

        if _timeout <= 0:
            _timeout = None
        else:
            _timeout /= 1e3

        if self.ready_or_shutdown.wait(_timeout):
            if self.is_shutdown.is_set():
                # return too early and the shutdown is set, means something fails!!
                self.logger.critical(
                    f'fails to start {typename(self)} with name {self.name}, '
                    f'this often means the executor used in the pod is not valid'
                )
                raise PeaFailToStart
            else:
                self.logger.info(f'ready to listen')
            return self
        else:
            raise TimeoutError(
                f'{typename(self)} with name {self.name} can not be initialized after {_timeout * 1e3}ms'
            )

    def set_ready(self):
        """Set the `is_ready_event` to indicate that the `BasePea` managed by the Runtime is ready to start
         receiving messages"""
        self.is_ready_event.set()

    def unset_ready(self):
        """Clear the `is_ready_event` to indicate that the `BasePea` managed by the Runtime is not anymore ready to start
         receiving messages"""
        self.is_ready_event.clear()

    def set_shutdown(self):
        """Set the `is_shutdown` event to indicate that the `BasePea` managed by the Runtime is closed and the parallel process
        can be shutdown"""
        self.is_shutdown.set()

    @property
    def status(self):
        """Send the control signal ``STATUS`` to the manages `BasePea` and return the status """
        return send_ctrl_message(self.ctrl_addr,
                                 'STATUS',
                                 timeout=self.args.timeout_ctrl)

    @property
    def is_ready(self) -> bool:
        status = self.status
        return status and status.is_ready

    @property
    def is_idle(self) -> bool:
        raise NotImplementedError

    def send_terminate_signal(self):
        """Send a terminate signal to the `BasePea` supported by this `Runtime` """
        return send_ctrl_message(self.ctrl_addr,
                                 'TERMINATE',
                                 timeout=self.args.timeout_ctrl)

    def close(self) -> None:
        """Close this `Runtime` by sending a `terminate signal` to the managed `BasePea`. Wait to
         be sure that the `BasePea` is properly closed to join the parallel process """
        self.send_terminate_signal()
        self.is_shutdown.wait()
        self.logger.close()
        if not self.daemon:
            self.join()

    def __enter__(self):
        return self.start()

    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
        self.close()