def test_lazy_msg_access(): reqs = [ Message( None, r.SerializeToString(), 'test', '123', request_id='123', request_type='IndexRequest', ) for r in request_generator(random_docs(10)) ] for r in reqs: assert not r.request.is_used assert r.envelope assert len(r.dump()) == 3 assert not r.request.is_used for r in reqs: assert not r.request.is_used assert r.request assert len(r.dump()) == 3 assert not r.request.is_used for r in reqs: assert not r.request.is_used assert r.request.index.docs assert len(r.dump()) == 3 assert r.request.is_used
def test_data_request_handler_change_docs_dam(logger, tmpdir): class MemmapExecutor(Executor): @requests def foo(self, docs, **kwargs): dam = DocumentArrayMemmap(tmpdir + '/dam') dam.extend(docs) return dam args = set_pea_parser().parse_args(['--uses', 'MemmapExecutor']) handler = DataRequestHandler(args, logger) req = list( request_generator( '/', DocumentArray([Document(text='input document') for _ in range(10)])))[0] msg = Message(None, req, 'test', '123') assert len(msg.request.docs) == 10 handler.handle( msg=msg, partial_requests=None, peapod_name='name', ) assert len(msg.request.docs) == 10 for doc in msg.request.docs: assert doc.text == 'input document'
def test_lazy_msg_access(): # this test does not make much sense, when `message` is instantiated without `envelope`, the `request` header is accessed and therefore decompressed messages = [ Message( None, r.SerializeToString(), 'test', '123', request_id='123', request_type='DataRequest', ) for r in request_generator('/', random_docs(10)) ] for m in messages: assert m.request.is_decompressed assert m.envelope assert len(m.dump()) == 3 assert m.request.is_decompressed for m in messages: assert m.request.is_decompressed assert m.request assert len(m.dump()) == 3 assert m.request.is_decompressed for m in messages: assert m.request.is_decompressed assert m.request.data.docs assert len(m.dump()) == 3 assert m.request.is_decompressed
def _create_test_data_message(): req = list( request_generator( '/', DocumentArray([Document(text='input document') for _ in range(10)]) ) )[0] return req
def check_input(inputs: Optional['InputType'] = None, **kwargs) -> None: """Validate the inputs and print the first request if success. :param inputs: the inputs :param kwargs: keyword arguments """ if inputs is None: # empty inputs is considered as valid return if hasattr(inputs, '__call__'): # it is a function inputs = inputs() kwargs['data'] = inputs kwargs['exec_endpoint'] = '/' if inspect.isasyncgenfunction(inputs) or inspect.isasyncgen(inputs): raise BadClientInput( 'checking the validity of an async generator is not implemented yet' ) try: from jina.clients.request import request_generator r = next(request_generator(**kwargs)) from jina.types.request import Request if not isinstance(r, Request): raise TypeError(f'{typename(r)} is not a valid Request') except Exception as ex: default_logger.error(f'inputs is not valid!') raise BadClientInput from ex
async def post(body: JinaEndpointRequestModel): """ Post a data request to some endpoint. This is equivalent to the following: from jina import Flow f = Flow().add(...) with f: f.post(endpoint, ...) .. # noqa: DAR201 .. # noqa: DAR101 """ # The above comment is written in Markdown for better rendering in FastAPI from jina.enums import DataInputType bd = body.dict() # type: Dict req_generator_input = bd req_generator_input['data_type'] = DataInputType.DICT if bd['data'] is not None and 'docs' in bd['data']: req_generator_input['data'] = req_generator_input['data'][ 'docs'] result = await _get_singleton_result( request_generator(**req_generator_input)) return result
def test_lazy_request_fields(): reqs = ( Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)) ) for r in reqs: assert list(r.DESCRIPTOR.fields_by_name.keys())
def _create_test_data_message(): req = list( request_generator( '/', DocumentArray([Document(text='input document') for _ in range(10)]) ) )[0] msg = Message(None, req, 'test', '123') return msg
def test_lazy_nested_clear_access(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used # write access r.train r.index.ClearField('docs') # now it is read assert r.is_used
def test_lazy_nested_clear_access(algo): reqs = (Request(r.SerializeToString(), algo) for r in request_generator('/', random_docs(10))) for r in reqs: assert not r.is_decompressed # write access r.train r.data.ClearField('docs') # now it is read assert r.is_decompressed
def test_lazy_append_access(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used r.request_type = 'index' # write access r.train r.docs.append(Document()) # now it is read assert r.is_used
def test_message_size(): reqs = [Message(None, r, 'test', '123') for r in request_generator(random_docs(10))] for r in reqs: assert r.size == 0 assert sys.getsizeof(r.envelope.SerializeToString()) assert sys.getsizeof(r.request.SerializeToString()) assert len(r.dump()) == 3 assert r.size > sys.getsizeof(r.envelope.SerializeToString()) \ + sys.getsizeof(r.request.SerializeToString())
def test_lazy_change_message_type(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used # write access r.train r.control.command = jina_pb2.RequestProto.ControlRequestProto.IDLE # now it is read assert r.is_used assert len(r.index.docs) == 0
def test_lazy_nest_access(): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used # write access r.train r.docs[0].id = '1' * 16 # now it is read assert r.is_used assert r.index.docs[0].id == '1' * 16
def test_lazy_access(field): reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))) for r in reqs: assert not r.is_used # access r.train print(getattr(r, field)) # now it is read assert r.is_used
def test_request_generate_lines_from_list(): def random_lines(num_lines): return [f'i\'m dummy doc {j}' for j in range(1, num_lines + 1)] req = request_generator('', data=random_lines(100), request_size=100) request = next(req) assert len(request.docs) == 100 for index, doc in enumerate(request.docs, 1): assert doc.text == f'i\'m dummy doc {index}'
def test_request_generate_lines(): def random_lines(num_lines): for j in range(1, num_lines + 1): yield f'i\'m dummy doc {j}' req = request_generator('', data=random_lines(100), request_size=100) request = next(req) assert len(request.docs) == 100 assert request.docs[0].text == 'i\'m dummy doc 1'
def test_lazy_nest_access(algo): reqs = (Request(r.SerializeToString(), algo) for r in request_generator('/', random_docs(10))) for r in reqs: assert not r.is_decompressed # write access r.train r.docs[0].id = '1' * 16 # now it is read assert r.is_decompressed assert r.data.docs[0].id == '1' * 16
def test_lazy_change_message_type(algo): reqs = (Request(r.SerializeToString(), algo) for r in request_generator('/', random_docs(10))) for r in reqs: assert not r.is_decompressed # write access r.train r.control.command = jina_pb2.RequestProto.ControlRequestProto.IDLE # now it is read assert r.is_decompressed assert len(r.data.docs) == 0
def test_lazy_append_access(algo): reqs = (Request(r.SerializeToString(), algo) for r in request_generator('/', random_docs(10))) for r in reqs: assert not r.is_decompressed r = Request().as_typed_request('data') # write access r.train r.docs.append(Document()) # now it is read assert r.is_decompressed
def test_multiple_access(): reqs = [Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))] for r in reqs: assert not r.is_used assert r assert not r.is_used for r in reqs: assert not r.is_used assert r.index assert r.is_used
def test_lazy_access(field, algo): reqs = (Request(r.SerializeToString(), algo) for r in request_generator('/', random_docs(10))) for r in reqs: assert not r.is_decompressed # access r.train print(getattr(r, field)) # now it is read assert r.is_decompressed
def test_request_generate_lines(): def random_lines(num_lines): for j in range(1, num_lines + 1): yield f'i\'m dummy doc {j}' req = request_generator(data=random_lines(100), request_size=100) request = next(req) assert len(request.index.docs) == 100 for index, doc in enumerate(request.index.docs, 1): assert doc.mime_type == 'text/plain' assert doc.text == f'i\'m dummy doc {index}'
def doc_req(): """Build a dummy request that has docs """ ds = list(random_docs(num_docs, num_chunks_per_doc)) # add some random matches for d in ds: for _ in range(num_matches_per_doc): d.matches.add(Document(content='hello')) for c in d.chunks: for _ in range(num_matches_per_chunk): c.matches.add(Document(content='world')) req = list(request_generator(ds))[0] yield req
async def test_aync_data_request_handler_new_docs(logger): args = set_pod_parser().parse_args(['--uses', 'AsyncNewDocsExecutor']) handler = DataRequestHandler(args, logger) req = list( request_generator( '/', DocumentArray([Document(text='input document') for _ in range(10)])))[0] assert len(req.docs) == 10 response = await handler.handle(requests=[req]) assert len(response.docs) == 1 assert response.docs[0].text == 'new document'
def test_request_generate_numpy_arrays(): input_array = np.random.random([10, 10]) req = request_generator('', data=input_array, request_size=5) request = next(req) assert len(request.docs) == 5 for index, doc in enumerate(request.docs, 1): assert doc.tensor.shape == (10, ) request = next(req) assert len(request.docs) == 5 for index, doc in enumerate(request.docs, 1): assert doc.tensor.shape == (10, )
def test_multiple_access(algo): reqs = [ Request(r.SerializeToString(), algo) for r in request_generator('/', random_docs(10)) ] for r in reqs: assert not r.is_decompressed assert r assert not r.is_decompressed for r in reqs: assert not r.is_decompressed assert r.data assert r.is_decompressed
async def foo(body: JinaRequestModel): from jina.enums import DataInputType bd = body.dict() if body else {'data': None} bd['exec_endpoint'] = exec_endpoint req_generator_input = bd req_generator_input['data_type'] = DataInputType.DICT if bd['data'] is not None and 'docs' in bd['data']: req_generator_input['data'] = req_generator_input['data'][ 'docs'] result = await _get_singleton_result( request_generator(**req_generator_input)) return result
def test_request_generate_numpy_arrays(): input_array = np.random.random([10, 10]) req = request_generator(data=input_array, request_size=5) request = next(req) assert len(request.index.docs) == 5 for index, doc in enumerate(request.index.docs, 1): assert NdArray(doc.blob).value.shape == (10,) request = next(req) assert len(request.index.docs) == 5 for index, doc in enumerate(request.index.docs, 1): assert NdArray(doc.blob).value.shape == (10,)
def test_compression(compress_algo, low_bytes, high_ratio): no_comp_sizes = [] sizes = [] docs = list(random_docs(100, embed_dim=100)) kwargs = dict( identity='gateway', pod_name='123', compress_min_bytes=2 * sum(no_comp_sizes) if low_bytes else 0, compress_min_ratio=10 if high_ratio else 1, ) with TimeContext(f'no compress'): for r in request_generator(docs): m = Message(None, r, compress=CompressAlgo.NONE, **kwargs) m.dump() no_comp_sizes.append(m.size) kwargs = dict( identity='gateway', pod_name='123', compress_min_bytes=2 * sum(no_comp_sizes) if low_bytes else 0, compress_min_ratio=10 if high_ratio else 1, ) with TimeContext(f'compressing with {str(compress_algo)}') as tc: for r in request_generator(docs): m = Message(None, r, compress=compress_algo, **kwargs) m.dump() sizes.append(m.size) if compress_algo == CompressAlgo.NONE or low_bytes or high_ratio: assert sum(sizes) >= sum(no_comp_sizes) else: assert sum(sizes) < sum(no_comp_sizes) print( f'{str(compress_algo)}: size {sum(sizes) / len(sizes)} (ratio: {sum(no_comp_sizes) / sum(sizes):.2f}) with {tc.duration:.2f}s' )