Exemple #1
0
def test_lazy_msg_access():
    reqs = [
        Message(
            None,
            r.SerializeToString(),
            'test',
            '123',
            request_id='123',
            request_type='IndexRequest',
        )
        for r in request_generator(random_docs(10))
    ]
    for r in reqs:
        assert not r.request.is_used
        assert r.envelope
        assert len(r.dump()) == 3
        assert not r.request.is_used

    for r in reqs:
        assert not r.request.is_used
        assert r.request
        assert len(r.dump()) == 3
        assert not r.request.is_used

    for r in reqs:
        assert not r.request.is_used
        assert r.request.index.docs
        assert len(r.dump()) == 3
        assert r.request.is_used
Exemple #2
0
def test_data_request_handler_change_docs_dam(logger, tmpdir):
    class MemmapExecutor(Executor):
        @requests
        def foo(self, docs, **kwargs):
            dam = DocumentArrayMemmap(tmpdir + '/dam')
            dam.extend(docs)
            return dam

    args = set_pea_parser().parse_args(['--uses', 'MemmapExecutor'])
    handler = DataRequestHandler(args, logger)

    req = list(
        request_generator(
            '/',
            DocumentArray([Document(text='input document')
                           for _ in range(10)])))[0]
    msg = Message(None, req, 'test', '123')
    assert len(msg.request.docs) == 10
    handler.handle(
        msg=msg,
        partial_requests=None,
        peapod_name='name',
    )

    assert len(msg.request.docs) == 10
    for doc in msg.request.docs:
        assert doc.text == 'input document'
Exemple #3
0
def test_lazy_msg_access():
    # this test does not make much sense, when `message` is instantiated without `envelope`, the `request` header is accessed and therefore decompressed
    messages = [
        Message(
            None,
            r.SerializeToString(),
            'test',
            '123',
            request_id='123',
            request_type='DataRequest',
        ) for r in request_generator('/', random_docs(10))
    ]
    for m in messages:
        assert m.request.is_decompressed
        assert m.envelope
        assert len(m.dump()) == 3
        assert m.request.is_decompressed

    for m in messages:
        assert m.request.is_decompressed
        assert m.request
        assert len(m.dump()) == 3
        assert m.request.is_decompressed

    for m in messages:
        assert m.request.is_decompressed
        assert m.request.data.docs
        assert len(m.dump()) == 3
        assert m.request.is_decompressed
def _create_test_data_message():
    req = list(
        request_generator(
            '/', DocumentArray([Document(text='input document') for _ in range(10)])
        )
    )[0]
    return req
Exemple #5
0
    def check_input(inputs: Optional['InputType'] = None, **kwargs) -> None:
        """Validate the inputs and print the first request if success.

        :param inputs: the inputs
        :param kwargs: keyword arguments
        """

        if inputs is None:
            # empty inputs is considered as valid
            return

        if hasattr(inputs, '__call__'):
            # it is a function
            inputs = inputs()

        kwargs['data'] = inputs
        kwargs['exec_endpoint'] = '/'

        if inspect.isasyncgenfunction(inputs) or inspect.isasyncgen(inputs):
            raise BadClientInput(
                'checking the validity of an async generator is not implemented yet'
            )

        try:
            from jina.clients.request import request_generator

            r = next(request_generator(**kwargs))
            from jina.types.request import Request

            if not isinstance(r, Request):
                raise TypeError(f'{typename(r)} is not a valid Request')
        except Exception as ex:
            default_logger.error(f'inputs is not valid!')
            raise BadClientInput from ex
Exemple #6
0
        async def post(body: JinaEndpointRequestModel):
            """
            Post a data request to some endpoint.

            This is equivalent to the following:

                from jina import Flow

                f = Flow().add(...)

                with f:
                    f.post(endpoint, ...)

            .. # noqa: DAR201
            .. # noqa: DAR101
            """
            # The above comment is written in Markdown for better rendering in FastAPI
            from jina.enums import DataInputType

            bd = body.dict()  # type: Dict
            req_generator_input = bd
            req_generator_input['data_type'] = DataInputType.DICT
            if bd['data'] is not None and 'docs' in bd['data']:
                req_generator_input['data'] = req_generator_input['data'][
                    'docs']

            result = await _get_singleton_result(
                request_generator(**req_generator_input))
            return result
Exemple #7
0
def test_lazy_request_fields():
    reqs = (
        Request(r.SerializeToString(), EnvelopeProto())
        for r in request_generator(random_docs(10))
    )
    for r in reqs:
        assert list(r.DESCRIPTOR.fields_by_name.keys())
Exemple #8
0
def _create_test_data_message():
    req = list(
        request_generator(
            '/', DocumentArray([Document(text='input document') for _ in range(10)])
        )
    )[0]
    msg = Message(None, req, 'test', '123')
    return msg
Exemple #9
0
def test_lazy_nested_clear_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.index.ClearField('docs')
        # now it is read
        assert r.is_used
Exemple #10
0
def test_lazy_nested_clear_access(algo):
    reqs = (Request(r.SerializeToString(), algo)
            for r in request_generator('/', random_docs(10)))
    for r in reqs:
        assert not r.is_decompressed
        # write access r.train
        r.data.ClearField('docs')
        # now it is read
        assert r.is_decompressed
Exemple #11
0
def test_lazy_append_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        r.request_type = 'index'
        # write access r.train
        r.docs.append(Document())
        # now it is read
        assert r.is_used
Exemple #12
0
def test_message_size():
    reqs = [Message(None, r, 'test', '123') for r in request_generator(random_docs(10))]
    for r in reqs:
        assert r.size == 0
        assert sys.getsizeof(r.envelope.SerializeToString())
        assert sys.getsizeof(r.request.SerializeToString())
        assert len(r.dump()) == 3
        assert r.size > sys.getsizeof(r.envelope.SerializeToString()) \
               + sys.getsizeof(r.request.SerializeToString())
Exemple #13
0
def test_lazy_change_message_type():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.control.command = jina_pb2.RequestProto.ControlRequestProto.IDLE
        # now it is read
        assert r.is_used
        assert len(r.index.docs) == 0
Exemple #14
0
def test_lazy_nest_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.docs[0].id = '1' * 16
        # now it is read
        assert r.is_used
        assert r.index.docs[0].id == '1' * 16
Exemple #15
0
def test_lazy_access(field):
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used

        # access r.train
        print(getattr(r, field))

        # now it is read
        assert r.is_used
Exemple #16
0
def test_request_generate_lines_from_list():
    def random_lines(num_lines):
        return [f'i\'m dummy doc {j}' for j in range(1, num_lines + 1)]

    req = request_generator('', data=random_lines(100), request_size=100)

    request = next(req)
    assert len(request.docs) == 100
    for index, doc in enumerate(request.docs, 1):
        assert doc.text == f'i\'m dummy doc {index}'
Exemple #17
0
def test_request_generate_lines():
    def random_lines(num_lines):
        for j in range(1, num_lines + 1):
            yield f'i\'m dummy doc {j}'

    req = request_generator('', data=random_lines(100), request_size=100)

    request = next(req)
    assert len(request.docs) == 100
    assert request.docs[0].text == 'i\'m dummy doc 1'
Exemple #18
0
def test_lazy_nest_access(algo):
    reqs = (Request(r.SerializeToString(), algo)
            for r in request_generator('/', random_docs(10)))
    for r in reqs:
        assert not r.is_decompressed
        # write access r.train
        r.docs[0].id = '1' * 16
        # now it is read
        assert r.is_decompressed
        assert r.data.docs[0].id == '1' * 16
Exemple #19
0
def test_lazy_change_message_type(algo):
    reqs = (Request(r.SerializeToString(), algo)
            for r in request_generator('/', random_docs(10)))
    for r in reqs:
        assert not r.is_decompressed
        # write access r.train
        r.control.command = jina_pb2.RequestProto.ControlRequestProto.IDLE
        # now it is read
        assert r.is_decompressed
        assert len(r.data.docs) == 0
Exemple #20
0
def test_lazy_append_access(algo):
    reqs = (Request(r.SerializeToString(), algo)
            for r in request_generator('/', random_docs(10)))
    for r in reqs:
        assert not r.is_decompressed
        r = Request().as_typed_request('data')
        # write access r.train
        r.docs.append(Document())
        # now it is read
        assert r.is_decompressed
Exemple #21
0
def test_multiple_access():
    reqs = [Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))]
    for r in reqs:
        assert not r.is_used
        assert r
        assert not r.is_used

    for r in reqs:
        assert not r.is_used
        assert r.index
        assert r.is_used
Exemple #22
0
def test_lazy_access(field, algo):
    reqs = (Request(r.SerializeToString(), algo)
            for r in request_generator('/', random_docs(10)))
    for r in reqs:
        assert not r.is_decompressed

        # access r.train
        print(getattr(r, field))

        # now it is read
        assert r.is_decompressed
Exemple #23
0
def test_request_generate_lines():
    def random_lines(num_lines):
        for j in range(1, num_lines + 1):
            yield f'i\'m dummy doc {j}'

    req = request_generator(data=random_lines(100), request_size=100)

    request = next(req)
    assert len(request.index.docs) == 100
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.mime_type == 'text/plain'
        assert doc.text == f'i\'m dummy doc {index}'
Exemple #24
0
def doc_req():
    """Build a dummy request that has docs """
    ds = list(random_docs(num_docs, num_chunks_per_doc))
    # add some random matches
    for d in ds:
        for _ in range(num_matches_per_doc):
            d.matches.add(Document(content='hello'))
        for c in d.chunks:
            for _ in range(num_matches_per_chunk):
                c.matches.add(Document(content='world'))
    req = list(request_generator(ds))[0]
    yield req
Exemple #25
0
async def test_aync_data_request_handler_new_docs(logger):
    args = set_pod_parser().parse_args(['--uses', 'AsyncNewDocsExecutor'])
    handler = DataRequestHandler(args, logger)
    req = list(
        request_generator(
            '/',
            DocumentArray([Document(text='input document')
                           for _ in range(10)])))[0]
    assert len(req.docs) == 10
    response = await handler.handle(requests=[req])

    assert len(response.docs) == 1
    assert response.docs[0].text == 'new document'
Exemple #26
0
def test_request_generate_numpy_arrays():
    input_array = np.random.random([10, 10])

    req = request_generator('', data=input_array, request_size=5)

    request = next(req)
    assert len(request.docs) == 5
    for index, doc in enumerate(request.docs, 1):
        assert doc.tensor.shape == (10, )

    request = next(req)
    assert len(request.docs) == 5
    for index, doc in enumerate(request.docs, 1):
        assert doc.tensor.shape == (10, )
Exemple #27
0
def test_multiple_access(algo):
    reqs = [
        Request(r.SerializeToString(), algo)
        for r in request_generator('/', random_docs(10))
    ]
    for r in reqs:
        assert not r.is_decompressed
        assert r
        assert not r.is_decompressed

    for r in reqs:
        assert not r.is_decompressed
        assert r.data
        assert r.is_decompressed
Exemple #28
0
        async def foo(body: JinaRequestModel):
            from jina.enums import DataInputType

            bd = body.dict() if body else {'data': None}
            bd['exec_endpoint'] = exec_endpoint
            req_generator_input = bd
            req_generator_input['data_type'] = DataInputType.DICT
            if bd['data'] is not None and 'docs' in bd['data']:
                req_generator_input['data'] = req_generator_input['data'][
                    'docs']

            result = await _get_singleton_result(
                request_generator(**req_generator_input))
            return result
Exemple #29
0
def test_request_generate_numpy_arrays():
    input_array = np.random.random([10, 10])

    req = request_generator(data=input_array, request_size=5)

    request = next(req)
    assert len(request.index.docs) == 5
    for index, doc in enumerate(request.index.docs, 1):
        assert NdArray(doc.blob).value.shape == (10,)

    request = next(req)
    assert len(request.index.docs) == 5
    for index, doc in enumerate(request.index.docs, 1):
        assert NdArray(doc.blob).value.shape == (10,)
Exemple #30
0
def test_compression(compress_algo, low_bytes, high_ratio):
    no_comp_sizes = []
    sizes = []
    docs = list(random_docs(100, embed_dim=100))
    kwargs = dict(
        identity='gateway',
        pod_name='123',
        compress_min_bytes=2 * sum(no_comp_sizes) if low_bytes else 0,
        compress_min_ratio=10 if high_ratio else 1,
    )

    with TimeContext(f'no compress'):
        for r in request_generator(docs):
            m = Message(None, r, compress=CompressAlgo.NONE, **kwargs)
            m.dump()
            no_comp_sizes.append(m.size)

    kwargs = dict(
        identity='gateway',
        pod_name='123',
        compress_min_bytes=2 * sum(no_comp_sizes) if low_bytes else 0,
        compress_min_ratio=10 if high_ratio else 1,
    )
    with TimeContext(f'compressing with {str(compress_algo)}') as tc:
        for r in request_generator(docs):
            m = Message(None, r, compress=compress_algo, **kwargs)
            m.dump()
            sizes.append(m.size)

    if compress_algo == CompressAlgo.NONE or low_bytes or high_ratio:
        assert sum(sizes) >= sum(no_comp_sizes)
    else:
        assert sum(sizes) < sum(no_comp_sizes)
    print(
        f'{str(compress_algo)}: size {sum(sizes) / len(sizes)} (ratio: {sum(no_comp_sizes) / sum(sizes):.2f}) with {tc.duration:.2f}s'
    )