Esempio n. 1
0
def test_envelope_in_sep_request():
    """ ser/des on envelope only much faster

    :return:
    """
    recv = [(rr.envelope, rr.request.SerializeToString())
            for rr in (add_envelope(r, 'test', '123')
                       for r in _generate(random_docs(num_docs)))]
    with TimeContext('serialize and deserialize'):
        for _ in range(
                num_reqs
        ):  # mimic chaining _pass, no need to deserialize request
            sent = [(msg[0].SerializeToString(), msg[1])
                    for msg in recv]  # mimic sent

            # mimic receive
            recv.clear()
            for m in sent:
                msg = jina_pb2.EnvelopeProto()
                msg.ParseFromString(m[0])
                msg.request_id += 'r'
                recv.append((msg, m[1]))

    for r in recv:
        assert r[0].request_id.endswith('r' * num_reqs)
Esempio n. 2
0
def test_lazy_msg_access():
    reqs = [
        ProtoMessage(None,
                     r.SerializeToString(),
                     'test',
                     '123',
                     request_id='123',
                     request_type='IndexRequest')
        for r in _generate(random_docs(10))
    ]
    for r in reqs:
        assert not r.request.is_used
        assert r.envelope
        assert len(r.dump()) == 3
        assert not r.request.is_used

    for r in reqs:
        assert not r.request.is_used
        assert r.request
        assert len(r.dump()) == 3
        assert not r.request.is_used

    for r in reqs:
        assert not r.request.is_used
        assert r.request.index.docs
        assert len(r.dump()) == 3
        assert r.request.is_used
Esempio n. 3
0
    def test_request_generate_numpy_arrays_iterator(self):

        input_array = np.random.random([10, 10])

        def generator():
            for array in input_array:
                yield array

        req = _generate(data=generator(), batch_size=5)

        request = next(req)
        self.assertEqual(len(request.index.docs), 5)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.id, index)
            self.assertEqual(doc.length, 5)
            self.assertEqual(doc.granularity, 0)
            self.assertEqual(pb2array(doc.blob).shape, (10, ))
            self.assertEqual(doc.blob.shape, [10])

        request = next(req)
        self.assertEqual(len(request.index.docs), 5)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.id, 5 + index)
            self.assertEqual(doc.length, 5)
            self.assertEqual(doc.granularity, 0)
            self.assertEqual(pb2array(doc.blob).shape, (10, ))
            self.assertEqual(doc.blob.shape, [10])
Esempio n. 4
0
def test_lazy_append_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.docs.append(jina_pb2.DocumentProto())
        # now it is read
        assert r.is_used
Esempio n. 5
0
    def test_request_generate(self):
        def random_lines(num_lines):
            for j in range(num_lines):
                yield "https://github.com 'i\'m dummy doc %d'" % j

        req = _generate(data=random_lines(100), batch_size=100)

        assert len(list(req)) == 1
Esempio n. 6
0
def test_lazy_nested_clear_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.index.ClearField('docs')
        # now it is read
        assert r.is_used
Esempio n. 7
0
def test_lazy_change_message_type():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.control.command = jina_pb2.RequestProto.ControlRequestProto.IDLE
        # now it is read
        assert r.is_used
        assert len(r.index.docs) == 0
Esempio n. 8
0
def test_lazy_nest_access():
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10)))
    for r in reqs:
        assert not r.is_used
        # write access r.train
        r.docs[0].id = '1'
        # now it is read
        assert r.is_used
        assert r.index.docs[0].id == '1'
Esempio n. 9
0
def test_message_size():
    reqs = [Message(None, r, 'test', '123') for r in _generate(random_docs(10))]
    for r in reqs:
        assert r.size == 0
        assert sys.getsizeof(r.envelope.SerializeToString())
        assert sys.getsizeof(r.request.SerializeToString())
        assert len(r.dump()) == 3
        assert r.size > sys.getsizeof(r.envelope.SerializeToString()) \
               + sys.getsizeof(r.request.SerializeToString())
Esempio n. 10
0
def test_lazy_access(field):
    reqs = (Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10)))
    for r in reqs:
        assert not r.is_used

        # access r.train
        print(getattr(r, field))

        # now it is read
        assert r.is_used
Esempio n. 11
0
def test_multiple_access():
    reqs = [Request(r.SerializeToString(), EnvelopeProto()) for r in _generate(random_docs(10))]
    for r in reqs:
        assert not r.is_used
        assert r
        assert not r.is_used

    for r in reqs:
        assert not r.is_used
        assert r.index
        assert r.is_used
Esempio n. 12
0
    def test_request_generate_lines_from_list(self):
        def random_lines(num_lines):
            return [f'i\'m dummy doc {j}' for j in range(1, num_lines + 1)]

        req = _generate(data=random_lines(100), batch_size=100)

        request = next(req)
        self.assertEqual(len(request.index.docs), 100)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.length, 100)
            self.assertEqual(doc.mime_type, 'text/plain')
            self.assertEqual(doc.text, f'i\'m dummy doc {index}')
Esempio n. 13
0
def test_request_generate_lines_from_list():
    def random_lines(num_lines):
        return [f'i\'m dummy doc {j}' for j in range(1, num_lines + 1)]

    req = _generate(data=random_lines(100), batch_size=100)

    request = next(req)
    assert len(request.index.docs) == 100
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 100
        assert doc.mime_type == 'text/plain'
        assert doc.text == f'i\'m dummy doc {index}'
Esempio n. 14
0
    def test_request_generate_bytes(self):
        def random_lines(num_lines):
            for j in range(1, num_lines + 1):
                yield f'i\'m dummy doc {j}'.encode('utf8')

        req = _generate(data=random_lines(100), batch_size=100)

        request = next(req)
        self.assertEqual(len(request.index.docs), 100)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.length, 100)
            self.assertEqual(doc.mime_type, 'text/plain')
            self.assertEqual(doc.buffer.decode(), f'i\'m dummy doc {index}')
Esempio n. 15
0
def test_request_generate_lines_with_fake_url():
    def random_lines(num_lines):
        for j in range(1, num_lines + 1):
            yield f'https://github.com i\'m dummy doc {j}'

    req = _generate(data=random_lines(100), batch_size=100)

    request = next(req)
    assert len(request.index.docs) == 100
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 100
        assert doc.mime_type == 'text/plain'
        assert doc.text == f'https://github.com i\'m dummy doc {index}'
Esempio n. 16
0
def test_request_generate_bytes():
    def random_lines(num_lines):
        for j in range(1, num_lines + 1):
            yield f'i\'m dummy doc {j}'.encode('utf8')

    req = _generate(data=random_lines(100), batch_size=100)

    request = next(req)
    assert len(request.index.docs) == 100
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 100
        assert doc.mime_type == 'text/plain'
        assert doc.buffer.decode() == f'i\'m dummy doc {index}'
Esempio n. 17
0
    def test_request_generate_lines_with_fake_url(self):
        def random_lines(num_lines):
            for j in range(1, num_lines + 1):
                yield f'https://github.com i\'m dummy doc {j}'

        req = _generate(data=random_lines(100), batch_size=100)

        request = next(req)
        self.assertEqual(len(request.index.docs), 100)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.length, 100)
            self.assertEqual(doc.mime_type, 'text/plain')
            self.assertEqual(doc.text,
                             f'https://github.com i\'m dummy doc {index}')
Esempio n. 18
0
    def test_request_generate_lines(self):
        def random_lines(num_lines):
            for j in range(1, num_lines + 1):
                yield f'i\'m dummy doc {j}'

        req = _generate(data=random_lines(100), batch_size=100)

        request = next(req)
        self.assertEqual(len(request.index.docs), 100)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.id, index)
            self.assertEqual(doc.length, 100)
            self.assertEqual(doc.mime_type, 'text/plain')
            self.assertEqual(doc.granularity, 0)
            self.assertEqual(doc.text, f'i\'m dummy doc {index}')
Esempio n. 19
0
def test_request_generate_numpy_arrays():
    input_array = np.random.random([10, 10])

    req = _generate(data=input_array, batch_size=5)

    request = next(req)
    assert len(request.index.docs) == 5
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 5
        assert GenericNdArray(doc.blob).value.shape == (10,)

    request = next(req)
    assert len(request.index.docs) == 5
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 5
        assert GenericNdArray(doc.blob).value.shape == (10,)
Esempio n. 20
0
    def test_request_generate_docs(self):
        def random_docs(num_docs):
            for j in range(1, num_docs + 1):
                doc = jina_pb2.Document()
                doc.text = f'i\'m dummy doc {j}'
                doc.offset = 1000
                doc.tags['id'] = 1000  # this will be ignored
                doc.mime_type = 'mime_type'
                yield doc

        req = _generate(data=random_docs(100), batch_size=100)

        request = next(req)
        self.assertEqual(len(request.index.docs), 100)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.length, 100)
            self.assertEqual(doc.mime_type, 'mime_type')
            self.assertEqual(doc.text, f'i\'m dummy doc {index}')
            self.assertEqual(doc.offset, 1000)
Esempio n. 21
0
def test_all_in_one_request():
    recv = [
        add_envelope(r, 'test', '123')
        for r in _generate(random_docs(num_docs))
    ]
    with TimeContext('serialize and deserialize'):
        for _ in range(num_reqs):  # mimic multipic pods
            sent = [msg.SerializeToString() for msg in recv]  # mimic sent

            # mimic receive
            recv.clear()
            for m in sent:
                msg = jina_pb2.MessageProto()
                msg.ParseFromString(m)
                msg.envelope.request_id += 'r'
                recv.append(msg)

    for r in recv:
        assert r.envelope.request_id.endswith('r' * num_reqs)
Esempio n. 22
0
def test_request_generate_docs():
    def random_docs(num_docs):
        for j in range(1, num_docs + 1):
            doc = jina_pb2.Document()
            doc.text = f'i\'m dummy doc {j}'
            doc.offset = 1000
            doc.tags['id'] = 1000  # this will be ignored
            doc.mime_type = 'mime_type'
            yield doc

    req = _generate(data=random_docs(100), batch_size=100)

    request = next(req)
    assert len(request.index.docs) == 100
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 100
        assert doc.mime_type == 'mime_type'
        assert doc.text == f'i\'m dummy doc {index}'
        assert doc.offset == 1000
Esempio n. 23
0
    def test_request_generate_numpy_arrays(self):

        input_array = np.random.random([10, 10])

        req = _generate(data=input_array, batch_size=5)

        request = next(req)
        self.assertEqual(len(request.index.docs), 5)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.length, 5)
            self.assertEqual(pb2array(doc.blob).shape, (10, ))
            self.assertEqual(doc.blob.shape, [10])

        request = next(req)
        self.assertEqual(len(request.index.docs), 5)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.length, 5)
            self.assertEqual(pb2array(doc.blob).shape, (10, ))
            self.assertEqual(doc.blob.shape, [10])
Esempio n. 24
0
def test_request_generate_numpy_arrays_iterator():
    input_array = np.random.random([10, 10])

    def generator():
        for array in input_array:
            yield array

    req = _generate(data=generator(), batch_size=5)

    request = next(req)
    assert len(request.index.docs) == 5
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 5
        assert NdArray(doc.blob).value.shape == (10, )

    request = next(req)
    assert len(request.index.docs) == 5
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.length == 5
        assert NdArray(doc.blob).value.shape == (10, )
Esempio n. 25
0
    def test_request_generate_docs_with_different_granularity(self):
        def random_docs(num_docs):
            for j in range(1, num_docs + 1):
                doc = jina_pb2.Document()
                doc.text = f'i\'m dummy doc {j}'
                doc.offset = 1000
                doc.id = 1000  # this will be ignored
                doc.granularity = 3  # this will be overriden by _generate granularity param
                doc.mime_type = 'mime_type'
                yield doc

        req = _generate(data=random_docs(100), batch_size=100, granularity=5)

        request = next(req)
        self.assertEqual(len(request.index.docs), 100)
        for index, doc in enumerate(request.index.docs, 1):
            self.assertEqual(doc.id, index)
            self.assertEqual(doc.length, 100)
            self.assertEqual(doc.mime_type, 'mime_type')
            self.assertEqual(doc.granularity, 5)
            self.assertEqual(doc.text, f'i\'m dummy doc {index}')
            self.assertEqual(doc.offset, 1000)
Esempio n. 26
0
def test_request_generate_dict_str():
    import json

    def random_docs(num_docs):
        for j in range(1, num_docs + 1):
            doc = {
                'text':
                f'i\'m dummy doc {j}',
                'offset':
                1000,
                'tags': {
                    'id': 1000
                },
                'chunks': [
                    {
                        'text': f'i\'m chunk 1',
                        'modality': 'text'
                    },
                    {
                        'text': f'i\'m chunk 2',
                        'modality': 'image'
                    },
                ]
            }
            yield json.dumps(doc)

    req = _generate(data=random_docs(100), batch_size=100)

    request = next(req)
    assert len(request.index.docs) == 100
    for index, doc in enumerate(request.index.docs, 1):
        assert doc.text == f'i\'m dummy doc {index}'
        assert doc.offset == 1000
        assert doc.tags['id'] == 1000
        assert len(doc.chunks) == 2
        assert doc.chunks[0].modality == 'text'
        assert doc.chunks[0].text == f'i\'m chunk 1'
        assert doc.chunks[1].modality == 'image'
        assert doc.chunks[1].text == f'i\'m chunk 2'
Esempio n. 27
0
def test_lazy_request_fields():
    reqs = (LazyRequest(r.SerializeToString(), Envelope())
            for r in _generate(random_docs(10)))
    for r in reqs:
        assert list(r.DESCRIPTOR.fields_by_name.keys())