import uuid from typing import List from fastapi import status, APIRouter, File, UploadFile from jina.logging import JinaLogger from jinad.store import pea_store from jinad.models.pea import PeaModel from jinad.excepts import HTTPException, PeaStartException from jinad.helper import basepea_to_namespace, create_meta_files_from_upload logger = JinaLogger(context='👻 PEAAPI') router = APIRouter() @router.put( path='/pea/upload', summary='Upload pod context yamls & pymodules', ) async def _upload( uses_files: List[UploadFile] = File(()), pymodules_files: List[UploadFile] = File(()) ): """ """ # TODO: This is repetitive code. needs refactoring upload_status = 'nothing to upload' if uses_files: [create_meta_files_from_upload(current_file) for current_file in uses_files] upload_status = 'uploaded'
import os import sys import time from typing import List, Dict import requests from jina import Document from jina.logging import JinaLogger logger = JinaLogger('test') def _query_docs(docs: List[Dict]): logger.info(f'Searching with {len(docs)} documents...') return _send_rest_request('9001', 'search', 'post', docs) def _send_rest_request(port_expose: str, endpoint: str, method: str, data: List[dict], timeout: int = 13): json = {'data': data} url = f'http://localhost:{port_expose}/{endpoint}' r = getattr(requests, method)(url, json=json, timeout=timeout) if r.status_code != 200: raise Exception(f'api request failed, url: {url}, status: {r.status_code}, content: {r.content} data: {data}') return r.json() def test_query_while_indexing(): try: logger.info('starting jinad...') os.system('nohup jinad > jinad.log 2> jinaderr.log &')
def __init__(self): self._items = {} # type: Dict['uuid.UUID', Dict[str, Any]] self._logger = JinaLogger(self.__class__.__name__, **vars(jinad_args)) self._init_stats()
class BaseRuntime(metaclass=RuntimeMeta): """BaseRuntime is a process or thread providing the support to run different :class:`BasePea` in different environments. It manages the lifetime of these `BasePea` objects living in `Local`, `Remote`, or `Container` environment. Inherited classes must define their own `run` method that is the one that will be run in a separate process or thread than the main process """ def __init__(self, args: Union['argparse.Namespace', Dict]): super().__init__() self.args = args self.name = self.__class__.__name__ #: this is the process name self.is_ready_event = _get_event(self) self.is_shutdown = _get_event(self) self.ready_or_shutdown = _make_or_event(self, self.is_ready_event, self.is_shutdown) self.is_shutdown.clear() if 'daemon' in args: self.daemon = args.daemon if 'name' in self.args and self.args.name: self.name = f'runtime-{self.args.name}' if 'role' in self.args and self.args.role == PeaRoleType.PARALLEL: self.name = f'runtime-{self.args.name}-{self.args.pea_id}' if 'role' in self.args and self.args.role == PeaRoleType.HEAD: self.name = f'runtime-{self.args.name}-head' if 'role' in self.args and self.args.role == PeaRoleType.TAIL: self.name = f'runtime-{self.args.name}-tail' if 'host' in self.args and 'port_ctrl' in self.args and 'ctrl_with_ipc' in self.args: self.ctrl_addr, self.ctrl_with_ipc = Zmqlet.get_ctrl_address( self.args.host, self.args.port_ctrl, self.args.ctrl_with_ipc) if 'log_id' in self.args and 'log_config' in self.args: self.logger = JinaLogger(self.name, log_id=self.args.log_id, log_config=self.args.log_config) else: self.logger = JinaLogger(self.name) def run(self): raise NotImplementedError def start(self): super().start() if isinstance(self.args, dict): _timeout = getattr(self.args['peas'][0], 'timeout_ready', -1) else: _timeout = getattr(self.args, 'timeout_ready', -1) if _timeout <= 0: _timeout = None else: _timeout /= 1e3 if self.ready_or_shutdown.wait(_timeout): if self.is_shutdown.is_set(): # return too early and the shutdown is set, means something fails!! self.logger.critical( f'fails to start {typename(self)} with name {self.name}, ' f'this often means the executor used in the pod is not valid' ) raise PeaFailToStart else: self.logger.info(f'ready to listen') return self else: raise TimeoutError( f'{typename(self)} with name {self.name} can not be initialized after {_timeout * 1e3}ms' ) def set_ready(self): """Set the `is_ready_event` to indicate that the `BasePea` managed by the Runtime is ready to start receiving messages""" self.is_ready_event.set() def unset_ready(self): """Clear the `is_ready_event` to indicate that the `BasePea` managed by the Runtime is not anymore ready to start receiving messages""" self.is_ready_event.clear() def set_shutdown(self): """Set the `is_shutdown` event to indicate that the `BasePea` managed by the Runtime is closed and the parallel process can be shutdown""" self.is_shutdown.set() @property def status(self): """Send the control signal ``STATUS`` to the manages `BasePea` and return the status """ return send_ctrl_message(self.ctrl_addr, 'STATUS', timeout=self.args.timeout_ctrl) @property def is_ready(self) -> bool: status = self.status return status and status.is_ready @property def is_idle(self) -> bool: raise NotImplementedError def send_terminate_signal(self): """Send a terminate signal to the `BasePea` supported by this `Runtime` """ return send_ctrl_message(self.ctrl_addr, 'TERMINATE', timeout=self.args.timeout_ctrl) def close(self) -> None: """Close this `Runtime` by sending a `terminate signal` to the managed `BasePea`. Wait to be sure that the `BasePea` is properly closed to join the parallel process """ self.send_terminate_signal() self.is_shutdown.wait() self.logger.close() if not self.daemon: self.join() def __enter__(self): return self.start() def __exit__(self, exc_type, exc_val, exc_tb) -> None: self.close()
import os import shutil import sys from glob import glob import click from jina.flow import Flow from jina import Document from jina.logging.profile import TimeContext from jina.logging import JinaLogger MAX_DOCS = os.environ.get('MAX_DOCS', 16) BATCH_SIZE = 16 logger = JinaLogger('object-search') def config(): os.environ['JINA_DATA_FILE'] = os.environ.get('JINA_DATA_FILE', 'data/**/*.jpg') os.environ['PARALLEL'] = '1' os.environ['SHARDS'] = '1' os.environ['JINA_PORT'] = os.environ.get('JINA_PORT', str(45678)) os.environ['WORKDIR'] = os.environ.get('JINA_WORKDIR', './workspace') def index(data_path, batch_size, num_docs: int): f = Flow.load_config('flow-index.yml') num_docs = min(num_docs, len(glob(data_path))) with f:
def test_logging_syslog(): with JinaLogger('test_logger', log_config=os.path.join(cur_dir, 'yaml/syslog.yml')) as logger: log(logger) assert len(logger.handlers) == 1
def test_logging_level_os_environ_variable(config): with JinaLogger('test_logger', log_config=os.path.join(cur_dir, 'yaml/file.yml')) as logger: log(logger) assert logger.logger.level == LogVerbosity.from_string('SUCCESS')
__license__ = "Apache-2.0" import click import os from collections import defaultdict from functools import partial from jina.flow import Flow from jina import Document from jina.logging import JinaLogger from jina.logging.profile import TimeContext from read_vectors_files import fvecs_read, ivecs_read logger = JinaLogger('advanced-vector-example') def general_config(): os.environ['JINA_PARALLEL'] = os.environ.get('JINA_PARALLEL', '1') os.environ['JINA_SHARDS'] = os.environ.get('JINA_SHARDS', '2') os.environ['JINA_DATASET_NAME'] = os.environ.get('JINA_DATASET_NAME', 'siftsmall') os.environ['JINA_TMP_DATA_DIR'] = os.environ.get('JINA_TMP_DATA_DIR', './') os.environ['JINA_DATA_FILE'] = os.environ.get('JINA_TMP_DATA_DIR', './') os.environ['JINA_REQUEST_SIZE'] = os.environ.get('JINA_REQUEST_SIZE', '100') os.environ['OMP_NUM_THREADS'] = os.environ.get('OMP_NUM_THREADS', '1') def query_config(indexer_query_type: str):
import os import subprocess import threading from collections import namedtuple import pkg_resources from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from uvicorn import Config, Server from jina.logging import JinaLogger from .parser import get_main_parser daemon_logger = JinaLogger(context='👻', log_config=os.getenv( 'JINAD_LOG_CONFIG', pkg_resources.resource_filename( 'jina', '/'.join( ('resources', 'logging.daemon.yml'))))) def _get_app(): from .api.endpoints import common_router, flow, pod, pea, logs from .config import jinad_config, fastapi_config, openapitags_config context = namedtuple('context', ['router', 'openapi_tags', 'tags']) _all_routers = { 'flow': context(router=flow.router, openapi_tags=openapitags_config.FLOW_API_TAGS, tags=[openapitags_config.FLOW_API_TAGS[0]['name']]), 'pod':
__copyright__ = "Copyright (c) 2021 Jina AI Limited. All rights reserved." __license__ = "Apache-2.0" import os import sys import click from jina.flow import Flow from jina.logging import JinaLogger from jina.logging.profile import TimeContext from jina.logging import default_logger as logger logger = JinaLogger('wikipedia-example') MAX_DOCS = int(os.environ.get('JINA_MAX_DOCS', 50)) def config(): os.environ['JINA_DATA_FILE'] = os.environ.get('JINA_DATA_FILE', 'data/toy-input.txt') os.environ['JINA_DATA_FILE_INC'] = os.environ.get( 'JINA_DATA_FILE_INC', 'data/toy-input-incremental.txt') os.environ['JINA_WORKSPACE'] = os.environ.get('JINA_WORKSPACE', 'workspace') os.environ['JINA_PORT'] = os.environ.get('JINA_PORT', str(45678)) def print_topk(resp, sentence): for d in resp.search.docs: print(f'Ta-Dah🔮, here are what we found for: {sentence}') for idx, match in enumerate(d.matches):
import os import sys from typing import Tuple, Generator, BinaryIO, TextIO import numpy as np from jina.logging import JinaLogger BYTE_PADDING = 4 DUMP_DTYPE = np.float64 logger = JinaLogger(__name__) def export_dump_streaming( path: str, shards: int, size: int, data: Generator[Tuple[str, np.array, bytes], None, None], ): """Export the data to a path, based on sharding, :param path: path to dump :param shards: the nr of shards this pea is part of :param size: total amount of entries :param data: the generator of the data (ids, vectors, metadata) """ logger.info(f'Dumping {size} docs to {path} for {shards} shards') _handle_dump(data, path, shards, size)
from jina import __version__ as jina_version from jina.logging import JinaLogger from fastapi import status, APIRouter from jinad.config import server_config logger = JinaLogger(context='👻 JINAD') common_router = APIRouter() @common_router.on_event('startup') async def startup(): logger.success( f'Uvicorn + FastAPI running on {server_config.HOST}:{server_config.PORT}' ) logger.success('Welcome to Jina daemon - the remote manager for jina!') @common_router.get(path='/alive', summary='Get status of jinad', status_code=status.HTTP_200_OK) async def _status(): """ Used to check if the api is running (returns 200 & jina version) """ # TODO(Deepankar): should we add versions of executors? return {'status_code': status.HTTP_200_OK, 'jina_version': jina_version}
import uuid from typing import Dict, List from fastapi import status, APIRouter, File, UploadFile from jina.logging import JinaLogger from jinad.store import pod_store from jinad.models.pod import PodModel from jinad.excepts import HTTPException, PodStartException from jinad.helper import flowpod_to_namespace, basepod_to_namespace, create_meta_files_from_upload logger = JinaLogger(context='👻 PODAPI') router = APIRouter() @router.put( path='/upload', summary='Upload pod context yamls & pymodules', ) async def _upload(uses_files: List[UploadFile] = File(()), pymodules_files: List[UploadFile] = File(())): """ """ upload_status = 'nothing to upload' if uses_files: [ create_meta_files_from_upload(current_file) for current_file in uses_files ] upload_status = 'uploaded'
import uuid import time import json import asyncio from pathlib import Path from typing import Optional from jina.logging import JinaLogger from fastapi import APIRouter, WebSocket, WebSocketDisconnect from jinad.config import log_config from jinad.excepts import HTTPException, TimeoutException, ClientExit logger = JinaLogger(context='👻 LOGS') router = APIRouter() async def tail(file_handler, line_num_from=0, timeout=5): """ asynchronous tail file """ line_number = 0 last_log_time = time.time() while time.time() - last_log_time < timeout: for line in file_handler: line_number += 1 if line_number < line_num_from: continue yield line_number, line last_log_time = time.time() await asyncio.sleep(0.01) else: raise TimeoutException()
def test_logging_syslog(): with JinaLogger('test_logger', log_config='yaml/syslog.yml') as logger: log(logger) assert len(logger.handlers) == 1
import json from jina.parsers import set_gateway_parser from jina.logging import JinaLogger from jina.peapods.runtimes.asyncio.rest.app import get_fastapi_app args = set_gateway_parser().parse_args([]) logger = JinaLogger('') app = get_fastapi_app(args, logger) schema = app.openapi() with open('gateway.json', 'w') as f: json.dump(schema, f)
def test_logging_file(): with JinaLogger('test_logger', log_config='yaml/file.yml') as logger: log(logger) assert os.path.exists(f'jina-{__uptime__}.log') with open(f'jina-{__uptime__}.log') as fp: assert len(fp.readlines()) == 5
from jina import Document, Client from jina.logging import JinaLogger cur_dir = os.path.dirname(os.path.abspath(__file__)) dbms_flow_yml = os.path.join(cur_dir, 'flow_dbms.yml') query_flow_yml = os.path.join(cur_dir, 'flow_query.yml') compose_yml = os.path.join(cur_dir, 'docker-compose.yml') JINAD_PORT_DBMS = '8001' JINAD_PORT_QUERY = '8001' REST_PORT_DBMS = '9000' REST_PORT_QUERY = '9001' DUMP_PATH_DOCKER = '/tmp/dump' logger = JinaLogger('test-dump') SHARDS = 3 EMB_SIZE = 10 # global between threads KEEP_RUNNING = True INDEX_TIMES = 0 QUERY_TIMES = 0 DUMP_ROLL_UPDATE_TIME = 0 class MyThread(Thread): def run(self) -> None: try: super().run()
def test_logging_level_yaml(): with JinaLogger('test_logger', log_config=os.path.join(cur_dir, 'yaml/file.yml')) as logger: log(logger) assert logger.logger.level == LogVerbosity.from_string('INFO')
class BaseStore(MutableMapping): def __init__(self): self._items = {} # type: Dict['uuid.UUID', Dict[str, Any]] self._logger = JinaLogger(self.__class__.__name__, **vars(jinad_args)) self._init_stats() def _init_stats(self): """Initialize the stats """ self._time_created = datetime.now() self._time_updated = self._time_created self._num_add = 0 self._num_del = 0 def add(self, *args, **kwargs) -> 'uuid.UUID': """Add a new element to the store. This method needs to be overridden by the subclass""" raise NotImplementedError def delete(self, id: Union[str, uuid.UUID], workspace: bool = False, everything: bool = False, **kwargs): if isinstance(id, str): id = uuid.UUID(id) if id in self._items: v = self._items[id] if 'object' in v and hasattr(v['object'], 'close'): v['object'].close() if workspace and v.get('workdir', None): for path in Path(v['workdir']).rglob('[!logging.log]*'): if path.is_file(): self._logger.debug(f'file to be deleted: {path}') path.unlink() if everything and v.get('workdir', None): self._logger.debug(f'directory to be deleted: {v["workdir"]}') shutil.rmtree(v['workdir']) del self[id] self._logger.success( f'{colored(str(id), "cyan")} is released from the store.') else: raise KeyError(f'{colored(str(id), "cyan")} not found in store.') def __iter__(self): return iter(self._items) def __len__(self): return len(self._items) def __getitem__(self, key: Union['uuid.UUID', str]): if isinstance(key, str): key = uuid.UUID(key) return self._items[key] def __delitem__(self, key: uuid.UUID): """ Release a Pea/Pod/Flow object from the store """ self._items.pop(key) self._time_updated = datetime.now() self._num_del += 1 def clear(self) -> None: keys = list(self._items.keys()) for k in keys: self.delete(id=k, workspace=True) def reset(self) -> None: """Calling :meth:`clear` and reset all stats """ self.clear() self._init_stats() def __setitem__(self, key: 'uuid.UUID', value: Dict) -> None: self._items[key] = value t = datetime.now() value.update({'time_created': t}) self._time_updated = t self._num_add += 1 @property def status(self) -> Dict: """Return the status of this store as a dict""" return { 'size': len(self._items), 'time_created': self._time_created, 'time_updated': self._time_updated, 'num_add': self._num_add, 'num_del': self._num_del, 'items': self._items }
def test_logging_syslog(): with JinaLogger('test_logger', log_config=str(cur_dir / 'yaml' / 'syslog.yml')) as logger: log(logger) assert len(logger.handlers) == 1
import argparse import os import mock import pytest from jina.logging import JinaLogger from jina.parsers import set_pea_parser from jina.peapods.runtimes.jinad.client import DaemonClient, PodDaemonClient, PeaDaemonClient logger = JinaLogger(context='test-remote') yaml_path = os.path.dirname(os.path.abspath(__file__)) jinad_api = DaemonClient(host='0.0.0.0', port=8000, logger=logger) pod_api = PodDaemonClient(host='0.0.0.0', port=8000, logger=logger) pea_api = PeaDaemonClient(host='0.0.0.0', port=8000, logger=logger) @mock.patch('requests.get') def test_jinad_is_alive(mocker): mocker.return_value.status_code = 200 assert jinad_api.is_alive mocker.return_value.status_code = 404 assert not jinad_api.is_alive @mock.patch('requests.delete') @pytest.mark.parametrize('api', [pea_api, pod_api, jinad_api]) def test_podapi_delete(mocker, api): mocker.return_value.status_code = 200 assert api.delete(remote_id='abcd')
import json import uuid from typing import List, Union from fastapi import status, APIRouter, Body, Response, File, UploadFile from jina.parser import set_client_cli_parser from jina.helper import get_parsed_args from jina.logging import JinaLogger from jina.clients import Client from jinad.store import flow_store from jinad.models.pod import PodModel from jinad.excepts import FlowYamlParseException, FlowCreationException, FlowStartException, \ HTTPException logger = JinaLogger(context='👻 FLOWAPI') router = APIRouter() @router.put( path='/flow/pods', summary='Build & start a Flow using Pods', ) async def _create_from_pods( pods: Union[List[PodModel]] = Body(..., example=json.loads(PodModel().json())) ): """ Build a Flow using a list of `PodModel` [
class BaseStore(MutableMapping): def __init__(self): self._items = {} # type: Dict['uuid.UUID', Dict[str, Any]] self._logger = JinaLogger(self.__class__.__name__, **vars(jinad_args)) self._init_stats() def _init_stats(self): """Initialize the stats """ self._time_created = datetime.now() self._time_updated = self._time_created self._num_add = 0 self._num_del = 0 def add(self, *args, **kwargs) -> 'uuid.UUID': """Add a new element to the store. This method needs to be overridden by the subclass""" raise NotImplementedError def __iter__(self): return iter(self._items) def __len__(self): return len(self._items) def __getitem__(self, key: Union['uuid.UUID', str]): if isinstance(key, str): key = uuid.UUID(key) return self._items[key] def __delitem__(self, key: Union['uuid.UUID', str]): """ Release a Pea/Pod/Flow object from the store """ if isinstance(key, str): key = uuid.UUID(key) if key in self._items: v = self._items[key] if 'object' in v and hasattr(v['object'], 'close'): v['object'].close() if v.get('workdir', None): shutil.rmtree(v['workdir']) self._items.pop(key) self._time_updated = datetime.now() self._logger.success( f'{colored(str(key), "cyan")} is released from the store.') self._num_del += 1 else: raise KeyError(f'{colored(str(key), "cyan")} not found in store.') def clear(self) -> None: keys = list(self._items.keys()) for k in keys: self.pop(k) def reset(self) -> None: """Calling :meth:`clear` and reset all stats """ self.clear() self._init_stats() def __setitem__(self, key: 'uuid.UUID', value: Dict) -> None: self._items[key] = value t = datetime.now() value.update({'time_created': t}) self._time_updated = t self._logger.success(f'{colored(str(key), "cyan")} is added') self._num_add += 1 @property def status(self) -> Dict: """Return the status of this store as a dict""" return { 'size': len(self._items), 'time_created': self._time_created, 'time_updated': self._time_updated, 'num_add': self._num_add, 'num_del': self._num_del, 'items': self._items }
import json import subprocess import threading import pkg_resources from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from uvicorn import Config, Server from daemon.excepts import Runtime400Exception, daemon_runtime_exception_handler from jina import __version__ from jina.logging import JinaLogger from .parser import get_main_parser, _get_run_args jinad_args = get_main_parser().parse_args([]) daemon_logger = JinaLogger('DAEMON', **vars(jinad_args)) def _get_app(): from .api.endpoints import router, flow, pod, pea, logs, workspace app = FastAPI( title='JinaD (Daemon)', description='REST interface for managing distributed Jina', version=__version__, openapi_tags=[{ 'name': 'daemon', 'description': 'API to manage the Daemon', }, { 'name': 'flows', 'description': 'API to manage Flows', }, {
import os import shutil import time import traceback from contextlib import ExitStack from pathlib import Path from typing import List, Dict import click import requests from jina import Document from jina.clients.sugary_io import _input_lines from jina.logging import JinaLogger logger = JinaLogger('jina') curdir = os.getcwd() JINAD_HOST = 'localhost' # change this if you are using remote jinad JINAD_PORT = '8000' # change this if you set a different port DUMP_PATH = '/tmp/jina_dump' # the path where to dump SHARDS = 3 # change this if you change pods/query_indexer.yml DUMP_RELOAD_INTERVAL = 20 # time between dump - rolling update calls DATA_FILE = 'data/toy.txt' # change this if you get the full data DOCS_PER_ROUND = 5 # nr of documents to index in each round DBMS_REST_PORT = '9000' # REST port of DBMS Flow, defined in flows/dbms.yml QUERY_REST_PORT = '9001' # REST port of Query Flow, defined in flows/query.yml def query_restful():