Exemple #1
0
def test_request_docs_mutable_iterator():
    """To test the weak reference work in docs"""
    r = Request()
    r.request_type = 'index'
    for d in random_docs(10):
        r.docs.append(d)

    for idx, d in enumerate(r.docs):
        assert isinstance(d, Document)
        d.text = f'look I changed it! {idx}'

    # iterate it again should see the change
    doc_pointers = []
    for idx, d in enumerate(r.docs):
        assert isinstance(d, Document)
        assert d.text == f'look I changed it! {idx}'
        doc_pointers.append(d)

    # pb-lize it should see the change
    rpb = r.proto

    for idx, d in enumerate(rpb.index.docs):
        assert isinstance(d, DocumentProto)
        assert d.text == f'look I changed it! {idx}'

    # change again by following the pointers
    for d in doc_pointers:
        d.text = 'now i change it back'

    # iterate it again should see the change
    for idx, d in enumerate(rpb.index.docs):
        assert isinstance(d, DocumentProto)
        assert d.text == 'now i change it back'
Exemple #2
0
def test_request_extend_queryset():
    q1 = SliceQL(start=3, end=4)
    q2 = QueryLang(SliceQL(start=3, end=4, priority=1))
    q3 = jina_pb2.QueryLangProto()
    q3.name = 'SliceQL'
    q3.parameters['start'] = 3
    q3.parameters['end'] = 4
    q3.priority = 2
    r = Request()
    r.queryset.extend([q1, q2, q3])
    assert isinstance(r.queryset, Sequence)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    # q1 and q2 refer to the same
    assert len({id(q) for q in r.queryset}) == 2

    r2 = Request()
    r2.queryset.extend(r.queryset)
    assert len({id(q) for q in r2.queryset}) == 2

    r = Request()
    r.queryset.append(q1)
    r.queryset.append(q2)
    r.queryset.append(q3)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    with pytest.raises(TypeError):
        r.queryset.extend(1)
Exemple #3
0
def test_add_doc_to_type(typ, pb_typ):
    r = Request()
    r.request_type = typ
    for _ in range(10):
        r.docs.append(Document())
        r.groundtruths.append(Document())
    assert len(r.docs) == 10
    assert len(r.groundtruths) == 10
Exemple #4
0
def chunks(document_factory):
    req = Request()
    req.request_type = 'index'
    req.docs.extend([
        document_factory.create(1, 'test 1'),
        document_factory.create(2, 'test 1'),
        document_factory.create(3, 'test 3'),
    ])
    return req.proto.index.docs
Exemple #5
0
def test_empty_request_type(typ, pb_typ):
    r = Request()
    assert r.request_type is None
    with pytest.raises(ValueError):
        print(r.body)

    r.request_type = typ
    assert r._request_type == typ
    assert isinstance(r.body, pb_typ)
Exemple #6
0
def matches(document_factory):
    req = Request()
    req.request_type = 'index'
    req.docs.extend([
        document_factory.create(1, 'test 1'),
        document_factory.create(2, 'test 1'),
        document_factory.create(3, 'test 3')
    ])
    return req.as_pb_object.index.docs
Exemple #7
0
def eval_request():
    req = Request()
    req.request_type = 'search'
    # doc: 1
    # doc: 2
    # doc: 3
    # doc: 4
    # doc: 5 - will be missing from KV indexer
    for idx in range(5):
        dp = Document()
        dp.id = f'0{str(idx + 1)}' * 8
        req.docs.append(dp)
    return req
Exemple #8
0
def test_lazy_request_fields():
    reqs = (
        Request(r.SerializeToString(), EnvelopeProto())
        for r in request_generator(random_docs(10))
    )
    for r in reqs:
        assert list(r.DESCRIPTOR.fields_by_name.keys())
Exemple #9
0
def validate_texts(resp: Request, top_k: int = 10, **kwargs):
    try:
        from steps import StepItems
        resp_dict = resp.dict()
        task = 'index' if 'index' in resp_dict.keys() else 'search'
        timestamp_from_tags = float(
            resp_dict[task]['docs'][0]['tags']['timestamp'])
        _log_time_per_pod(routes=resp_dict['routes'],
                          timestamp=timestamp_from_tags,
                          state=StepItems.state,
                          task=task,
                          num_docs=len(resp_dict[task]['docs']),
                          **kwargs)

        for d in resp.search.docs:
            if len(d.matches) != top_k:
                logger.error(
                    f'Number of actual matches: {len(d.matches)} vs expected number: {top_k}'
                )
            for m in d.matches:
                if 'timestamp' not in m.tags.keys():
                    logger.error(f'timestamp not in tags: {m.tags}')
                # to test that the data from the KV store is retrieved
                if 'filename' not in m.tags.keys(
                ) and 'title' not in m.tags.keys():
                    logger.error(
                        f'did not find "filename/title" in tags: {m.tags}')
    except Exception as e:
        logger.exception(
            f'Got an exception during `validate_images`. Continuing (not raising)'
        )
Exemple #10
0
def test_pprint_routes(capfd):
    result = []
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.ERROR
    r.status.exception.stacks.extend(['r1\nline1', 'r2\nline2'])
    result.append(r)
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.ERROR_CHAINED
    r.status.exception.stacks.extend(['line1', 'line2'])
    result.append(r)
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.SUCCESS
    result.append(r)
    rr = Request()
    rr.routes.extend(result)
    pprint_routes(rr)
    out, err = capfd.readouterr()
    assert '⚪' in out
    assert '🟢' in out
    assert 'Pod' in out
    assert 'Time' in out
    assert 'Exception' in out
    assert 'r1' in out
    assert 'line1r2' in out
    assert 'line2' in out
    assert 'line1line2' in out
Exemple #11
0
def test_lazy_append_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.docs.append(jina_pb2.DocumentProto())
        # now it is read
        assert r.is_used
Exemple #12
0
def test_lazy_nested_clear_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.index.ClearField('docs')
        # now it is read
        assert r.is_used
Exemple #13
0
def test_lazy_nest_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.docs[0].id = '1' * 16
        # now it is read
        assert r.is_used
        assert r.index.docs[0].id == '1' * 16
Exemple #14
0
def test_lazy_change_message_type():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.control.command = jina_pb2.RequestProto.ControlRequestProto.IDLE
        # now it is read
        assert r.is_used
        assert len(r.index.docs) == 0
Exemple #15
0
def test_lazy_append_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        r.request_type = 'index'
        # write access r.train
        r.docs.append(Document())
        # now it is read
        assert r.is_used
Exemple #16
0
def test_lazy_access(field):
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10)))
    for r in reqs:
        assert not r.is_used

        # access r.train
        print(getattr(r, field))

        # now it is read
        assert r.is_used
def eval_request():
    num_docs = 10
    req = jina_pb2.RequestProto()
    for idx in range(num_docs):
        doc = Document(req.index.docs.add())
        gt = Document(req.index.groundtruths.add())
        chunk_doc = doc.chunks.new()
        chunk_gt = gt.chunks.new()
        chunk_doc.embedding = np.array([1, 1])
        chunk_gt.embedding = np.array([2, 2])
    return Request(req)
Exemple #18
0
def test_multiple_access():
    reqs = [Request(r.SerializeToString(), EnvelopeProto()) for r in request_generator(random_docs(10))]
    for r in reqs:
        assert not r.is_used
        assert r
        assert not r.is_used

    for r in reqs:
        assert not r.is_used
        assert r.index
        assert r.is_used
Exemple #19
0
def test_request_docs_chunks_mutable_iterator():
    """Test if weak reference work in nested docs"""
    r = Request()
    r.request_type = 'index'
    for d in random_docs(10):
        r.docs.append(d)

    for d in r.docs:
        assert isinstance(d, Document)
        for idx, c in enumerate(d.chunks):
            assert isinstance(d, Document)
            c.text = f'look I changed it! {idx}'

    # iterate it again should see the change
    doc_pointers = []
    for d in r.docs:
        assert isinstance(d, Document)
        for idx, c in enumerate(d.chunks):
            assert c.text == f'look I changed it! {idx}'
            doc_pointers.append(c)

    # pb-lize it should see the change
    rpb = r.as_pb_object

    for d in rpb.index.docs:
        assert isinstance(d, DocumentProto)
        for idx, c in enumerate(d.chunks):
            assert isinstance(c, DocumentProto)
            assert c.text == f'look I changed it! {idx}'

    # change again by following the pointers
    for d in doc_pointers:
        d.text = 'now i change it back'

    # iterate it again should see the change
    for d in rpb.index.docs:
        assert isinstance(d, DocumentProto)
        for c in d.chunks:
            assert c.text == 'now i change it back'
Exemple #20
0
def test_pprint_routes(capfd):
    result = []
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.ERROR
    r.status.exception.stacks.extend(['r1\nline1', 'r2\nline2'])
    result.append(r)
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.ERROR_CHAINED
    r.status.exception.stacks.extend(['line1', 'line2'])
    result.append(r)
    r = jina_pb2.RouteProto()
    r.status.code = jina_pb2.StatusProto.SUCCESS
    result.append(r)
    rr = Request()
    rr.routes.extend(result)
    pprint_routes(rr)
    out, err = capfd.readouterr()
    assert out == '''+-----+------+------------+
Exemple #21
0
 def request(field_type):
     num_docs = 10
     req = jina_pb2.RequestProto()
     for idx in range(num_docs):
         doc = req.index.docs.add()
         gt = req.index.groundtruths.add()
         chunk_doc = Document(doc.chunks.add())
         chunk_gt = Document(gt.chunks.add())
         chunk_doc.granularity = 1
         chunk_gt.granularity = 1
         if field_type == 'text':
             chunk_doc.text = 'aaa'
             chunk_gt.text = 'aaaa'
         elif field_type == 'buffer':
             chunk_doc.buffer = b'\x01\x02\x03'
             chunk_gt.buffer = b'\x01\x02\x03\x04'
         elif field_type == 'blob':
             chunk_doc.blob = np.array([1, 1, 1])
             chunk_gt.blob = np.array([1, 1, 1, 1])
     return Request(req).as_typed_request('index')
Exemple #22
0
import numpy as np
import pytest

from jina import Document, Request, QueryLang, NdArray
from jina.types.score import NamedScore
from jina.types.arrays import ChunkArray
from jina.types.arrays.match import MatchArray


@pytest.mark.parametrize(
    'obj',
    [
        Document(),
        Request(),
        QueryLang(),
        NamedScore(),
        NdArray(),
        MatchArray([Document()], Document()),
        ChunkArray([Document()], Document()),
    ],
)
def test_builtin_str_repr_no_content(obj):
    print(obj)
    print(f'{obj!r}')


@pytest.mark.parametrize(
    'obj',
    [
        Document(content='123', chunks=[Document(content='abc')]),
        QueryLang({
Exemple #23
0
def test_request_extend_queryset():
    q1 = SliceQL(start=3, end=4)
    q2 = QueryLang(SliceQL(start=3, end=4, priority=1))
    q3 = jina_pb2.QueryLangProto()
    q3.name = 'SliceQL'
    q3.parameters['start'] = 3
    q3.parameters['end'] = 4
    q3.priority = 2
    r = Request()
    r.extend_queryset([q1, q2, q3])
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    r = Request()
    r.extend_queryset(q1)
    r.extend_queryset(q2)
    r.extend_queryset(q3)
    for idx, q in enumerate(r.queryset):
        assert q.priority == idx
        assert q.parameters['start'] == 3
        assert q.parameters['end'] == 4

    with pytest.raises(TypeError):
        r.extend_queryset(1)