def __init__( self, request: Optional[RequestSourceType] = None, ): self.buffer = None try: if isinstance(request, jina_pb2.DataRequestProto): self._pb_body = request elif isinstance(request, dict): self._pb_body = jina_pb2.DataRequestProto() json_format.ParseDict(request, self._pb_body) elif isinstance(request, str): self._pb_body = jina_pb2.DataRequestProto() json_format.Parse(request, self._pb_body) elif isinstance(request, bytes): self.buffer = request elif request is not None: # note ``None`` is not considered as a bad type raise ValueError(f'{typename(request)} is not recognizable') else: self._pb_body = jina_pb2.DataRequestProto() self._pb_body.header.request_id = random_identity() except Exception as ex: raise BadRequestType( f'fail to construct a {self.__class__} object from {request}' ) from ex
def test_cache_validate_remote_executor(): from .cache_validator import CacheValidator workspace_id = random_identity() # 1st Executor in remote workspace should download the file. f = Flow().add( uses=CacheValidator, host='localhost:8000', py_modules='cache_validator.py', upload_files=cur_dir, workspace_id=workspace_id, ) with f: response = f.post(on='/', inputs=[Document()], show_progress=True, return_results=True) assert not response[0].data.docs[0].tags['exists'] # 2nd Executor in remote workspace should be able to access the file. f = Flow().add( uses=CacheValidator, host='localhost:8000', py_modules='cache_validator.py', upload_files=cur_dir, workspace_id=workspace_id, ) with f: response = f.post(on='/', inputs=[Document()], show_progress=True, return_results=True) assert response[0].data.docs[0].tags['exists']
def mixin_distributed_feature_parser(parser): """Mixing in arguments required by :class:`BasePod` into the given parser. """ gp = add_arg_group(parser, title='Distributed') gp.add_argument( '--silent-remote-logs', action='store_true', default=False, help='Do not display the streaming of remote logs on local console') gp.add_argument('--upload-files', type=str, nargs='*', metavar='FILE', help=''' The files on the host to be uploaded to the remote workspace. This can be useful when your Pod has more file dependencies beyond a single YAML file, e.g. Python files, data files. Note, - currently only flatten structure is supported, which means if you upload `[./foo/a.py, ./foo/b.pp, ./bar/c.yml]`, then they will be put under the _same_ workspace on the remote, losing all hierarchies. - by default, `--uses` YAML file is always uploaded. - uploaded files are by default isolated across the runs. To ensure files are submitted to the same workspace across different runs, use `--workspace-id` to specify the workspace. ''') gp.add_argument( '--workspace-id', type=str, default=random_identity(), help= 'the UUID for identifying the workspace. When not given a random id will be assigned.' 'Multiple Pea/Pod/Flow will work under the same workspace if they share the same ' '`workspace-id`.')
def test_double_dynamic_routing_zmqstreamlet(): args1 = get_args() args2 = get_args() args3 = get_args() logger = JinaLogger('zmq-test') with ZmqStreamlet(args=args1, logger=logger) as z1, ZmqStreamlet( args=args2, logger=logger ) as z2, ZmqStreamlet(args=args3, logger=logger) as z3: assert z1.msg_sent == 0 assert z2.msg_sent == 0 assert z3.msg_sent == 0 req = jina_pb2.RequestProto() req.request_id = random_identity() d = req.data.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') routing_pb = jina_pb2.RoutingTableProto() routing_table = { 'active_pod': 'executor1', 'pods': { 'executor1': { 'host': __default_host__, 'port': args1.port_in, 'expected_parts': 0, 'out_edges': [{'pod': 'executor2'}, {'pod': 'executor3'}], }, 'executor2': { 'host': __default_host__, 'port': args2.port_in, 'expected_parts': 1, 'out_edges': [], }, 'executor3': { 'host': __default_host__, 'port': args3.port_in, 'expected_parts': 1, 'out_edges': [], }, }, } json_format.ParseDict(routing_table, routing_pb) msg.envelope.routing_table.CopyFrom(routing_pb) for pea in [z1, z2, z3]: thread = threading.Thread(target=pea.start, args=(callback,)) thread.daemon = True thread.start() number_messages = 1000 for i in range(number_messages): z1.send_message(msg) time.sleep(5) assert z1.msg_sent == 2 * number_messages assert z1.msg_recv == 0 assert z2.msg_sent == 0 assert z2.msg_recv == number_messages assert z3.msg_sent == 0 assert z3.msg_recv == number_messages
def mixin_essential_parser(parser): """Mixing in arguments required by every module into the given parser. :param parser: the parser instance to which we add arguments """ gp = add_arg_group(parser, title='Essential') gp.add_argument( '--name', type=str, help=''' The name of this object. This will be used in the following places: - how you refer to this object in Python/YAML/CLI - visualization - log message header - ... When not given, then the default naming strategy will apply. ''', ) gp.add_argument( '--workspace', type=str, default=None, help='The working directory for any IO operations in this object. ' 'If not set, then derive from its parent `workspace`.', ) gp.add_argument( '--log-config', type=str, default='default', help='The YAML config of the logger used in this object.', ) gp.add_argument( '--quiet', action='store_true', default=False, help='If set, then no log will be emitted from this object.', ) gp.add_argument( '--quiet-error', action='store_true', default=False, help= 'If set, then exception stack information will not be added to the log', ) gp.add_argument( '--workspace-id', type=str, default=random_identity(), help= 'the UUID for identifying the workspace. When not given a random id will be assigned.' 'Multiple Pod/Deployment/Flow will work under the same workspace if they share the same ' '`workspace-id`.' if _SHOW_ALL_ARGS else argparse.SUPPRESS, )
def dump_secret(work_path: 'Path', uuid8: str, secret: str): """Dump the UUID and Secret into local file :param work_path: the local package directory :param uuid8: the ID of the executor :param secret: the access secret """ from cryptography.fernet import Fernet config = work_path / '.jina' config.mkdir(parents=True, exist_ok=True) local_id_file = config / 'secret.key' if local_id_file.exists(): try: with local_id_file.open() as f: local_id, local_key = f.readline().strip().split('\t') fernet = Fernet(local_key.encode()) except Exception: return else: local_id = str(random_identity()) with local_id_file.open('w') as f: local_key = Fernet.generate_key() fernet = Fernet(local_key) f.write(f'{local_id}\t{local_key.decode()}') local_config_file = get_config_path(local_id) secret_data = { 'uuid8': uuid8, 'encrypted_secret': fernet.encrypt(secret.encode()).decode(), } with local_config_file.open('w') as f: f.write(json.dumps(secret_data))
def _yield_data_request(): req = DataRequest() req.header.request_id = random_identity() da = DocumentArray() da.append(Document()) req.data.docs = da return req
def test_double_dynamic_routing_zmqlet(): args1 = get_args() args2 = get_args() args3 = get_args() logger = JinaLogger('zmq-test') with Zmqlet(args1, logger) as z1, Zmqlet(args2, logger) as z2, Zmqlet( args3, logger ) as z3: assert z1.msg_sent == 0 assert z2.msg_sent == 0 assert z3.msg_sent == 0 req = jina_pb2.RequestProto() req.request_id = random_identity() d = req.data.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') routing_table = { 'active_pod': 'executor1', 'pods': { 'executor1': { 'host': __default_host__, 'port': args1.port_in, 'expected_parts': 0, 'out_edges': [{'pod': 'executor2'}, {'pod': 'executor3'}], }, 'executor2': { 'host': __default_host__, 'port': args2.port_in, 'expected_parts': 1, 'out_edges': [], }, 'executor3': { 'host': __default_host__, 'port': args3.port_in, 'expected_parts': 1, 'out_edges': [], }, }, } msg.envelope.routing_table.CopyFrom(RoutingTable(routing_table).proto) number_messages = 100 trips = 10 for i in range(trips): for j in range(number_messages): z1.send_message(msg) time.sleep(1) for i in range(number_messages): z2.recv_message(callback) z3.recv_message(callback) total_number_messages = number_messages * trips assert z1.msg_sent == 2 * total_number_messages assert z2.msg_sent == 0 assert z2.msg_recv == total_number_messages assert z3.msg_sent == 0 assert z3.msg_recv == total_number_messages
def _get_sync_requests_iterator(num_requests): for i in range(num_requests): req = DataRequest() req.header.request_id = random_identity() da = DocumentArray() da.append(Document()) req.data.docs = da yield req
def test_read_zmqlet(): with MockBasePeaRead(args2), Zmqlet(args1, default_logger) as z: req = jina_pb2.RequestProto() req.request_id = random_identity() d = req.index.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') z.send_message(msg)
def test_not_read_zmqlet(): with MockBasePeaNotRead(args3), Zmqlet(args1, default_logger) as z: req = Request() req.request_id = random_identity() d = req.data.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') z.send_message(msg)
async def test_double_dynamic_routing_async_zmqlet(): args1 = get_args() args2 = get_args() args3 = get_args() logger = JinaLogger('zmq-test') with AsyncZmqlet(args1, logger) as z1, AsyncZmqlet( args2, logger) as z2, AsyncZmqlet(args3, logger) as z3: assert z1.msg_sent == 0 assert z2.msg_sent == 0 assert z3.msg_sent == 0 req = jina_pb2.RequestProto() req.request_id = random_identity() d = req.data.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') routing_pb = jina_pb2.RoutingTableProto() routing_table = { 'active_pod': 'pod1', 'pods': { 'pod1': { 'host': '0.0.0.0', 'port': args1.port_in, 'expected_parts': 0, 'out_edges': [{ 'pod': 'pod2' }, { 'pod': 'pod3' }], }, 'pod2': { 'host': '0.0.0.0', 'port': args2.port_in, 'expected_parts': 1, 'out_edges': [], }, 'pod3': { 'host': '0.0.0.0', 'port': args3.port_in, 'expected_parts': 1, 'out_edges': [], }, }, } json_format.ParseDict(routing_table, routing_pb) msg.envelope.routing_table.CopyFrom(routing_pb) await send_msg(z1, msg) await z2.recv_message(callback) await z3.recv_message(callback) assert z1.msg_sent == 2 assert z1.msg_recv == 0 assert z2.msg_sent == 0 assert z2.msg_recv == 1 assert z3.msg_sent == 0 assert z3.msg_recv == 1
def test_remote_local_dynamic_routing_zmqlet(): args1 = get_args() args2 = get_args() args2.zmq_identity = 'test-identity' args2.hosts_in_connect = [f'{args1.host}:{args1.port_out}'] logger = JinaLogger('zmq-test') with Zmqlet(args1, logger) as z1, Zmqlet(args2, logger) as z2: assert z1.msg_sent == 0 assert z1.msg_recv == 0 assert z2.msg_sent == 0 assert z2.msg_recv == 0 req = jina_pb2.RequestProto() req.request_id = random_identity() d = req.data.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') routing_pb = jina_pb2.RoutingTableProto() routing_table = { 'active_pod': 'pod1', 'pods': { 'pod1': { 'host': '0.0.0.0', 'port': args1.port_in, 'expected_parts': 0, 'out_edges': [{ 'pod': 'pod2', 'send_as_bind': True }], }, 'pod2': { 'host': '0.0.0.0', 'port': args2.port_in, 'expected_parts': 1, 'out_edges': [], 'target_identity': args2.zmq_identity, }, }, } json_format.ParseDict(routing_table, routing_pb) msg.envelope.routing_table.CopyFrom(routing_pb) z2.recv_message(callback) assert z2.msg_sent == 0 assert z2.msg_recv == 0 z1.send_message(msg) z2.recv_message(callback) assert z1.msg_sent == 1 assert z1.msg_recv == 0 assert z2.msg_sent == 0 assert z2.msg_recv == 1
def test_flow_identity(): f = Flow().add().add().add().build() assert len(f.identity) == 4 assert len(set(f.identity.values())) == 4 with pytest.raises(ValueError): f.identity = 'hello' new_id = random_identity() f.identity = new_id assert len(set(f.identity.values())) == 1 assert list(f.identity.values())[0] == new_id
def test_flow_workspace_id(): f = Flow().add().add().add().build() assert len(f.workspace_id) == 3 assert len(set(f.workspace_id.values())) == 3 with pytest.raises(ValueError): f.workspace_id = 'hello' new_id = random_identity() f.workspace_id = new_id assert len(set(f.workspace_id.values())) == 1 assert list(f.workspace_id.values())[0] == new_id
def mixin_hw_base_parser(parser): """Add the arguments for hello world to the parser :param parser: the parser configure """ gp = add_arg_group(parser, title='General') gp.add_argument( '--workdir', type=str, default=random_identity(), help='The workdir for hello-world demo' 'all data, indices, shards and outputs will be saved there', ) gp.add_argument( '--download-proxy', type=str, help='The proxy when downloading sample data' )
def test_simple_dynamic_routing_zmqlet(): args1 = get_args() args2 = get_args() logger = JinaLogger('zmq-test') with Zmqlet(args1, logger) as z1, Zmqlet(args2, logger) as z2: assert z1.msg_sent == 0 assert z1.msg_recv == 0 assert z2.msg_sent == 0 assert z2.msg_recv == 0 req = jina_pb2.RequestProto() req.request_id = random_identity() d = req.data.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') routing_pb = jina_pb2.RoutingTableProto() routing_table = { 'active_pod': 'executor1', 'pods': { 'executor1': { 'host': __default_host__, 'port': args1.port_in, 'expected_parts': 0, 'out_edges': [{'pod': 'executor2'}], }, 'executor2': { 'host': __default_host__, 'port': args2.port_in, 'expected_parts': 1, 'out_edges': [], }, }, } json_format.ParseDict(routing_table, routing_pb) msg.envelope.routing_table.CopyFrom(routing_pb) z2.recv_message(callback) assert z2.msg_sent == 0 assert z2.msg_recv == 0 z1.send_message(msg) z2.recv_message(callback) assert z1.msg_sent == 1 assert z1.msg_recv == 0 assert z2.msg_sent == 0 assert z2.msg_recv == 1
def test_recv_message_zmqlet(mocker): zmqlet1 = Zmqlet(args1, default_logger) zmqlet2 = Zmqlet(args2, default_logger) req = Request() req.request_id = random_identity() doc = req.data.docs.add() doc.tags['id'] = 2 msg = Message(None, req, 'tmp', '') def callback(msg_): assert msg_.request.docs[0].tags['id'] == msg.request.data.docs[0].tags['id'] mock = mocker.Mock() zmqlet1.send_message(msg) time.sleep(1) zmqlet2.recv_message(mock) validate_callback(mock, callback)
def test_simple_zmqlet(): args = set_pea_parser().parse_args( [ '--host-in', '0.0.0.0', '--host-out', '0.0.0.0', '--socket-in', 'PULL_CONNECT', '--socket-out', 'PUSH_CONNECT', '--timeout-ctrl', '-1', ] ) args2 = set_pea_parser().parse_args( [ '--host-in', '0.0.0.0', '--host-out', '0.0.0.0', '--port-in', str(args.port_out), '--port-out', str(args.port_in), '--socket-in', 'PULL_BIND', '--socket-out', 'PUSH_BIND', '--timeout-ctrl', '-1', ] ) logger = JinaLogger('zmq-test') with BasePea(args2), Zmqlet(args, logger) as z: req = jina_pb2.RequestProto() req.request_id = random_identity() d = req.data.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') z.send_message(msg)
def test_not_decompressed_zmqlet(mocker): with MockPea(args2) as pea, Zmqlet(args1, default_logger) as z: req = Request() req.request_id = random_identity() d = req.data.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') mock = mocker.Mock() z.send_message(msg) time.sleep(1) z.recv_message(mock) def callback(msg_): pass validate_callback(mock, callback) print(f' joining pea') pea.join() print(f' joined pea')
def test_simple_zmqlet(): args = set_pea_parser().parse_args([ '--host-in', '0.0.0.0', '--host-out', '0.0.0.0', '--port-in', '12346', '--port-out', '12347', '--socket-in', 'PULL_CONNECT', '--socket-out', 'PUSH_CONNECT', '--timeout-ctrl', '-1' ]) args2 = set_pea_parser().parse_args([ '--host-in', '0.0.0.0', '--host-out', '0.0.0.0', '--port-in', '12347', '--port-out', '12346', '--socket-in', 'PULL_BIND', '--socket-out', 'PUSH_BIND', '--uses', '_logforward', '--timeout-ctrl', '-1' ]) logger = logging.getLogger('zmq-test') with BasePea(args2), Zmqlet(args, logger) as z: req = jina_pb2.RequestProto() req.request_id = random_identity() d = req.index.docs.add() d.tags['id'] = 2 msg = Message(None, req, 'tmp', '') z.send_message(msg)
def __init__( self, command: Optional[str] = None, request: Optional['jina_pb2.jina_pb2.ControlRequestProto'] = None, ): if isinstance(request, jina_pb2.ControlRequestProto): self._pb_body = request elif request is not None: # note ``None`` is not considered as a bad type raise ValueError(f'{typename(request)} is not recognizable') if command: proto = jina_pb2.ControlRequestProto() proto.header.request_id = random_identity() if command in _available_commands: proto.command = getattr(jina_pb2.ControlRequestProto, command) else: raise ValueError( f'command "{command}" is not supported, must be one of {_available_commands}' ) self._pb_body = proto
def __init__(self, args: argparse.Namespace, **kwargs): """Initialize grpc and data request handling. :param args: args from CLI :param kwargs: extra keyword arguments """ super().__init__(args, **kwargs) self._id = random_identity() self._loop = get_or_reuse_loop() self._last_active_time = time.perf_counter() self._pending_msgs = defaultdict(list) # type: Dict[str, List[Message]] self._partial_requests = None self._pending_tasks = [] self._static_routing_table = args.static_routing_table self._data_request_handler = DataRequestHandler(args, self.logger) self._grpclet = Grpclet( args=self.args, message_callback=self._callback, logger=self.logger, )
def validate(cls, value: str) -> str: """Validate DaemonID :param value: str to be validated :return: str of type DaemonID """ if not isinstance(value, str): raise TypeError('Malformed DaemonID: must be a string') jtype, *jid = value.split('-', 1) if jtype not in IDLiterals.values: raise TypeError( f'Malformed DaemonID: \'{jtype}\' not in {IDLiterals.values}' ) if not jid: jid = random_identity() else: try: jid = uuid.UUID(*jid) except ValueError: raise TypeError(f'Malformed DaemonID: {*jid,} is not a valid UUID') return f'{jtype}-{jid}'
def mixin_base_ppr_parser(parser): """Mixing in arguments required by pod/deployment/runtime module into the given parser. :param parser: the parser instance to which we add arguments """ gp = add_arg_group(parser, title='Essential') gp.add_argument( '--name', type=str, help=''' The name of this object. This will be used in the following places: - how you refer to this object in Python/YAML/CLI - visualization - log message header - ... When not given, then the default naming strategy will apply. ''', ) gp.add_argument( '--workspace', type=str, default=None, help='The working directory for any IO operations in this object. ' 'If not set, then derive from its parent `workspace`.', ) from jina import __resources_path__ gp.add_argument( '--log-config', type=str, default=os.path.join(__resources_path__, 'logging.default.yml'), help='The YAML config of the logger used in this object.', ) gp.add_argument( '--quiet', action='store_true', default=False, help='If set, then no log will be emitted from this object.', ) gp.add_argument( '--quiet-error', action='store_true', default=False, help= 'If set, then exception stack information will not be added to the log', ) gp.add_argument( '--workspace-id', type=str, default=random_identity(), help= 'the UUID for identifying the workspace. When not given a random id will be assigned.' 'Multiple Pod/Deployment/Flow will work under the same workspace if they share the same ' '`workspace-id`.' if _SHOW_ALL_ARGS else argparse.SUPPRESS, ) parser.add_argument( '--extra-search-paths', type=str, default=[], nargs='*', help= 'Extra search paths to be used when loading modules and finding YAML config files.' if _SHOW_ALL_ARGS else argparse.SUPPRESS, ) gp.add_argument( '--timeout-ctrl', type=int, default=int(os.getenv('JINA_DEFAULT_TIMEOUT_CTRL', '60')), help= 'The timeout in milliseconds of the control request, -1 for waiting forever', ) parser.add_argument( '--k8s-namespace', type=str, help= 'Name of the namespace where Kubernetes deployment should be deployed, to be filled by flow name' if _SHOW_ALL_ARGS else argparse.SUPPRESS, ) gp.add_argument( '--k8s-disable-connection-pool', action='store_false', dest='k8s_connection_pool', default=True, help= 'Defines if connection pooling for replicas should be disabled in K8s. This mechanism implements load balancing between replicas of the same executor. This should be disabled if a service mesh (like istio) is used for load balancing.' if _SHOW_ALL_ARGS else argparse.SUPPRESS, ) gp.add_argument( '--polling', type=str, default=PollingType.ANY.name, help=''' The polling strategy of the Deployment and its endpoints (when `shards>1`). Can be defined for all endpoints of a Deployment or by endpoint. Define per Deployment: - ANY: only one (whoever is idle) Pod polls the message - ALL: all Pods poll the message (like a broadcast) Define per Endpoint: JSON dict, {endpoint: PollingType} {'/custom': 'ALL', '/search': 'ANY', '*': 'ANY'} ''', )
import uuid import pytest from daemon.stores.base import BaseStore from jina.helper import random_identity, random_uuid keys = [uuid.UUID(random_identity()) for _ in range(3)] store_items = { keys[0]: {'object': 'abc'}, keys[1]: {'object': 'hij'}, keys[2]: {} } def test_base_store_clear(): print(keys) s = BaseStore() old_update = s._time_updated assert s._time_updated s._items.update(store_items) assert len(s) == 3 s.clear() assert len(s) == 0 assert old_update < s._time_updated def test_base_store_del():
def req(): r = jina_pb2.DataRequestProto() r.header.request_id = random_identity() r.data.docs.docs.add() return r
def req(): r = jina_pb2.RequestProto() r.request_id = random_identity() r.index.docs.add() return r
def _generate_request(): req = jina_pb2.RequestProto() req.request_id = random_identity() req.data.docs.add() return req
def control_req(): r = jina_pb2.ControlRequestProto() r.header.request_id = random_identity() return r