Example #1
0
def test_index_remote_rpi(test_workspace):
    f_args = set_gateway_parser().parse_args(['--host', '0.0.0.0'])

    def start_gateway():
        with GatewayPod(f_args):
            time.sleep(3)

    t = mp.Process(target=start_gateway)
    t.daemon = True
    t.start()

    f = Flow(optimize_level=FlowOptimizeLevel.IGNORE_GATEWAY).add(
        uses=os.path.join(cur_dir, 'yaml/test-index-remote.yml'),
        parallel=3,
        separated_workspace=True,
        host='0.0.0.0',
        port_expose=random_port())

    with f:
        f.index(input_fn=random_docs(1000))
Example #2
0
def test_flow_topo_mixed(docker_image_built, _logforward):
    f = (
        Flow()
        .add(
            name='d4',
            uses='docker://jinaai/jina:test-pip',
            entrypoint='jina executor',
        )
        .add(name='d5', uses=_logforward)
        .add(
            name='d6',
            uses='docker://jinaai/jina:test-pip',
            needs='d4',
            entrypoint='jina executor',
        )
        .join(['d6', 'd5'])
    )

    with f:
        f.post(on='/index', inputs=random_docs(10))
Example #3
0
def test_shards_insufficient_data(mocker, restful):
    """THIS IS SUPER IMPORTANT FOR TESTING SHARDS

    IF THIS FAILED, DONT IGNORE IT, DEBUG IT
    """
    index_docs = 3
    parallel = 4

    mock = mocker.Mock()

    def validate(req):
        mock()
        assert len(req.docs) == 1
        assert len(req.docs[0].matches) == index_docs

        for d in req.docs[0].matches:
            assert hasattr(d, 'weight')
            assert d.weight

    f = (Flow(restful=restful).add(name='doc_pb',
                                   uses=os.path.join(cur_dir,
                                                     '../yaml/test-docpb.yml'),
                                   parallel=parallel))
    with f:
        f.index(input_fn=random_docs(index_docs))

    time.sleep(2)
    with f:
        pass
    time.sleep(2)
    f = (Flow(restful=restful).add(name='doc_pb',
                                   uses=os.path.join(cur_dir,
                                                     '../yaml/test-docpb.yml'),
                                   parallel=parallel,
                                   polling='all',
                                   uses_after='_merge_chunks'))
    with f:
        f.search(input_fn=random_queries(1, index_docs), on_done=validate)
    time.sleep(2)
    rm_files(['test-docshard-tmp'])
    mock.assert_called_once()
Example #4
0
    def test_shards_insufficient_data(self):
        """THIS IS SUPER IMPORTANT FOR TESTING SHARDS

        IF THIS FAILED, DONT IGNORE IT, DEBUG IT
        """
        index_docs = 3
        parallel = 4

        def validate(req):
            assert len(req.docs) == 1
            assert len(req.docs[0].matches) == index_docs

            for d in req.docs[0].matches:
                self.assertTrue(hasattr(d, 'weight'))
                self.assertIsNotNone(d.weight)
                assert d.meta_info == b'hello world'

        f = Flow().add(name='doc_pb',
                       uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'),
                       parallel=parallel,
                       separated_workspace=True)
        with f:
            f.index(input_fn=random_docs(index_docs), random_doc_id=False)

        time.sleep(2)
        with f:
            pass
        time.sleep(2)
        f = Flow().add(name='doc_pb',
                       uses=os.path.join(cur_dir, '../yaml/test-docpb.yml'),
                       parallel=parallel,
                       separated_workspace=True,
                       polling='all',
                       uses_reducing='_merge_all')
        with f:
            f.search(input_fn=random_queries(1, index_docs),
                     random_doc_id=False,
                     output_fn=validate,
                     callback_on_body=True)
        time.sleep(2)
        self.add_tmpfile('test-docshard-tmp')
Example #5
0
def test_lazy_msg_access_with_envelope():
    envelope_proto = jina_pb2.EnvelopeProto()
    envelope_proto.compression.algorithm = 'NONE'
    envelope_proto.request_type = 'DataRequest'
    messages = [
        Message(
            envelope_proto,
            r.SerializeToString(),
        ) for r in request_generator('/', random_docs(10))
    ]
    for m in messages:
        assert not m.request.is_decompressed
        assert m.envelope
        assert len(m.dump()) == 3
        assert not m.request.is_decompressed
        assert m.request._pb_body is None
        assert m.request._buffer is not None
        assert m.proto
        assert m.request.is_decompressed
        assert m.request._pb_body is not None
        assert m.request._buffer is None
Example #6
0
def test_exec_fn_return_dict(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def encode(self, id):
            encode_mock()
            return [{'id': 'hello'}] * len(id)

    exec = MyExecutor()
    bd = EncodeDriver()

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentSet(docs)

    bd._apply_all(ds)
    encode_mock.assert_called()

    for d in ds:
        assert d.id == 'hello'
Example #7
0
def test_this_will_fail():
    f = (Flow().add(name='a11', uses='DummySegment').add(
        name='a12', uses='DummySegment',
        needs='gateway').add(name='r1',
                             uses='_merge_all',
                             needs=['a11', 'a12']).add(
                                 name='a21',
                                 uses='DummySegment',
                                 needs='gateway').add(
                                     name='a22',
                                     uses='DummySegment',
                                     needs='gateway').add(
                                         name='r2',
                                         uses='_merge_all',
                                         needs=['a21',
                                                'a22']).add(uses='_merge_all',
                                                            needs=['r1',
                                                                   'r2']))

    with f:
        f.index(input_fn=random_docs(10, chunks_per_doc=0), output_fn=validate)
Example #8
0
def test_extract_multi_fields(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def encode(self, id, embedding):
            encode_mock()
            assert isinstance(id, list)
            assert isinstance(embedding, list)
            assert isinstance(id[0], str)
            assert isinstance(embedding[0], np.ndarray)

    exec = MyExecutor()
    bd = EncodeDriver()

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentArray(docs)

    bd._apply_all(ds)
    encode_mock.assert_called()
Example #9
0
def test_flow_topo1(docker_image_built, _logforward):
    f = (Flow().add(
        name='d0',
        uses='docker://jinaai/jina:test-pip',
        uses_internal=_logforward,
        entrypoint='jina pod',
    ).add(
        name='d1',
        uses='docker://jinaai/jina:test-pip',
        uses_internal=_logforward,
        entrypoint='jina pod',
    ).add(
        name='d2',
        uses='docker://jinaai/jina:test-pip',
        uses_internal=_logforward,
        needs='d0',
        entrypoint='jina pod',
    ).join(['d2', 'd1']))

    with f:
        f.index(inputs=random_docs(10))
Example #10
0
def test_flow_topo_parallel():
    f = (
        Flow()
        .add(
            name='d7',
            uses='docker://jinaai/jina:test-pip',
            entrypoint='jina executor',
            parallel=3,
        )
        .add(name='d8', parallel=3)
        .add(
            name='d9',
            uses='docker://jinaai/jina:test-pip',
            entrypoint='jina executor',
            needs='d7',
        )
        .join(['d9', 'd8'])
    )

    with f:
        f.post(on='/index', inputs=random_docs(10))
Example #11
0
def test_this_should_work(mocker, restful):
    f = (Flow(restful=restful).add(name='a1').add(
        name='a11', uses='DummySegment',
        needs='a1').add(name='a12', uses='DummySegment', needs='a1').add(
            name='r1', uses='_merge_chunks',
            needs=['a11', 'a12']).add(name='a2', needs='gateway').add(
                name='a21', uses='DummySegment',
                needs='a2').add(name='a22', uses='DummySegment',
                                needs='a2').add(name='r2',
                                                uses='_merge_chunks',
                                                needs=['a21', 'a22']).add(
                                                    uses='_merge_chunks',
                                                    needs=['r1', 'r2']))

    response_mock = mocker.Mock(wrap=validate)

    with f:
        f.index(input_fn=random_docs(10, chunks_per_doc=0),
                on_done=response_mock)

    response_mock.assert_called()
Example #12
0
def test_exec_fn_return_doc(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def encode(self, id):
            encode_mock()
            return [Document(mime_type='image/png')] * len(id)

    exec = MyExecutor()
    bd = EncodeDriver()

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentArray(docs)

    bd._apply_all(ds)
    encode_mock.assert_called()

    for d in ds:
        assert d.mime_type == 'image/png'
Example #13
0
def test_this_should_work(mocker, protocol):
    f = (Flow(protocol=protocol).add(name='a1').add(
        name='a11', uses='DummySegment',
        needs='a1').add(name='a12', uses='DummySegment', needs='a1').add(
            name='r1', uses=Merger,
            needs=['a11', 'a12']).add(name='a2', needs='gateway').add(
                name='a21', uses='DummySegment',
                needs='a2').add(name='a22', uses='DummySegment',
                                needs='a2').add(name='r2',
                                                uses=Merger,
                                                needs=['a21', 'a22']).add(
                                                    uses=Merger,
                                                    needs=['r1', 'r2']))

    response_mock = mocker.Mock()

    with f:
        f.index(inputs=random_docs(10, chunks_per_doc=0),
                on_done=response_mock)

    validate_callback(response_mock, validate)
Example #14
0
def test_shards_insufficient_data():
    """THIS IS SUPER IMPORTANT FOR TESTING SHARDS

    IF THIS FAILED, DONT IGNORE IT, DEBUG IT
    """
    index_docs = 3
    parallel = 4

    def validate(req):
        assert len(req.docs) == 1
        assert len(req.docs[0].matches) == index_docs

        for d in req.docs[0].matches:
            assert hasattr(d, 'weight')
            assert d.weight
            assert d.meta_info == b'hello world'

    f = Flow().add(name='doc_pb',
                   uses=str(cur_dir.parent / 'yaml' / 'test-docpb.yml'),
                   parallel=parallel,
                   separated_workspace=True)
    with f:
        f.index(input_fn=random_docs(index_docs), override_doc_id=False)

    time.sleep(2)
    with f:
        pass
    time.sleep(2)
    f = Flow().add(name='doc_pb',
                   uses=str(cur_dir.parent / 'yaml' / 'test-docpb.yml'),
                   parallel=parallel,
                   separated_workspace=True,
                   polling='all',
                   uses_after='_merge_all')
    with f:
        f.search(input_fn=random_queries(1, index_docs),
                 override_doc_id=False,
                 callback_on='body')
    time.sleep(2)
    rm_files(['test-docshard-tmp'])
Example #15
0
def test_all_sync_clients(protocol, mocker):
    f = Flow(protocol=protocol).add(uses=MyExec)
    docs = list(random_docs(1000))
    m1 = mocker.Mock()
    m2 = mocker.Mock()
    m3 = mocker.Mock()
    m4 = mocker.Mock()
    with f:
        c = Client(
            host='localhost',
            port=f.port,
            protocol=protocol,
        )
        c.post('/', on_done=m1)
        c.post('/foo', docs, on_done=m2)
        c.post('/foo', on_done=m3)
        c.post('/foo', docs, parameters={'hello': 'world'}, on_done=m4)

    m1.assert_called_once()
    m2.assert_called()
    m3.assert_called_once()
    m4.assert_called()
Example #16
0
def test_request_docs_chunks_mutable_iterator():
    """Test if weak reference work in nested docs"""
    r = Request()
    r.request_type = 'index'
    for d in random_docs(10):
        r.docs.append(d)

    for d in r.docs:
        assert isinstance(d, Document)
        for idx, c in enumerate(d.chunks):
            assert isinstance(d, Document)
            c.text = f'look I changed it! {idx}'

    # iterate it again should see the change
    doc_pointers = []
    for d in r.docs:
        assert isinstance(d, Document)
        for idx, c in enumerate(d.chunks):
            assert c.text == f'look I changed it! {idx}'
            doc_pointers.append(c)

    # pb-lize it should see the change
    rpb = r.proto

    for d in rpb.index.docs:
        assert isinstance(d, DocumentProto)
        for idx, c in enumerate(d.chunks):
            assert isinstance(c, DocumentProto)
            assert c.text == f'look I changed it! {idx}'

    # change again by following the pointers
    for d in doc_pointers:
        d.text = 'now i change it back'

    # iterate it again should see the change
    for d in rpb.index.docs:
        assert isinstance(d, DocumentProto)
        for c in d.chunks:
            assert c.text == 'now i change it back'
Example #17
0
def test_get_set_item(tmpdir, idx1, idx99):
    dam = DocumentArrayMemmap(tmpdir)
    candidates = list(random_docs(100))
    for d in candidates:
        d.id = f'id_{d.id}'
    dam.extend(candidates)
    dam[idx1] = Document(id='id_1', text='hello')
    assert len(dam) == 100
    with pytest.raises(IndexError):
        dam[100] = Document(text='world')
    dam[idx99] = Document(id='id_99', text='world')
    assert len(dam) == 100
    assert dam[1].text == 'hello'
    assert dam[99].text == 'world'
    assert dam['id_1'].text == 'hello'
    assert dam['id_99'].text == 'world'
    for idx, d in enumerate(dam):
        if idx == 1:
            assert d.text == 'hello'
        if idx == 99:
            assert d.text == 'world'
    with pytest.raises(ValueError):
        dam['unknown_new'] = Document()
Example #18
0
def test_jina_document_to_pydantic_document():
    document_proto_model = PROTO_TO_PYDANTIC_MODELS.DocumentProto

    for jina_doc in random_docs(num_docs=10):
        jina_doc = jina_doc.dict()
        pydantic_doc = document_proto_model(**jina_doc)

        assert jina_doc['text'] == pydantic_doc.text
        assert jina_doc['mime_type'] == pydantic_doc.mime_type
        assert (jina_doc['embedding']['dense']['shape'] ==
                pydantic_doc.embedding.dense.shape)
        assert (jina_doc['embedding']['dense']['dtype'] ==
                pydantic_doc.embedding.dense.dtype)

        for jina_doc_chunk, pydantic_doc_chunk in zip(jina_doc['chunks'],
                                                      pydantic_doc.chunks):
            assert jina_doc_chunk['id'] == pydantic_doc_chunk.id
            assert jina_doc_chunk['tags'] == pydantic_doc_chunk.tags
            assert jina_doc_chunk['text'] == pydantic_doc_chunk.text
            assert jina_doc_chunk['mime_type'] == pydantic_doc_chunk.mime_type
            assert jina_doc_chunk['parent_id'] == pydantic_doc_chunk.parent_id
            assert jina_doc_chunk[
                'granularity'] == pydantic_doc_chunk.granularity
Example #19
0
def test_binarypb_in_flow(test_metas, mocker):
    docs = list(random_docs(10))

    def validate(req):
        assert len(docs) == len(req.docs)
        for d, d0 in zip(req.docs, docs):
            np.testing.assert_almost_equal(d.embedding, d0.embedding)

    f = Flow().add(uses='binarypb.yml')

    with f:
        f.index(docs)

    docs_no_embedding = copy.deepcopy(docs)
    for d in docs_no_embedding:
        d.ClearField('embedding')

    mock = mocker.Mock()
    with f:
        f.search(docs_no_embedding, on_done=mock)

    mock.assert_called_once()
    validate_callback(mock, validate)
Example #20
0
def test_client_websocket(mocker, flow_with_websocket):
    with flow_with_websocket:
        time.sleep(0.5)
        client = Client(
            host='localhost',
            port=str(flow_with_websocket.port_expose),
            protocol='websocket',
        )
        # Test that a regular index request triggers the correct callbacks
        on_always_mock = mocker.Mock()
        on_error_mock = mocker.Mock()
        on_done_mock = mocker.Mock()
        client.post(
            '',
            random_docs(1),
            request_size=1,
            on_always=on_always_mock,
            on_error=on_error_mock,
            on_done=on_done_mock,
        )
        on_always_mock.assert_called_once()
        on_done_mock.assert_called_once()
        on_error_mock.assert_not_called()
Example #21
0
    def test_index_remote(self):
        f_args = set_gateway_parser().parse_args(['--allow-spawn'])

        def start_gateway():
            with GatewayPod(f_args):
                time.sleep(20)

        t = mp.Process(target=start_gateway)
        t.daemon = True
        t.start()

        f = Flow().add(uses=os.path.join(cur_dir, 'yaml/test-index.yml'),
                       parallel=3, separated_workspace=True,
                       host='localhost', port_expose=f_args.port_expose)

        with f:
            f.index(input_fn=random_docs(1000))

        time.sleep(3)
        for j in range(3):
            self.assertTrue(os.path.exists(f'test2-{j + 1}/test2.bin'))
            self.assertTrue(os.path.exists(f'test2-{j + 1}/tmp2'))
            self.add_tmpfile(f'test2-{j + 1}/test2.bin', f'test2-{j + 1}/tmp2', f'test2-{j + 1}')
Example #22
0
def test_memmap_get_by_slice(tmpdir):
    def _assert_similar(da1, da2):
        for doc_a, doc_b in zip(da1, da2):
            assert doc_a.id == doc_b.id

    dam = DocumentArrayMemmap(tmpdir)
    candidates = list(random_docs(100))
    for d in candidates:
        d.id = f'id_{d.id}'
    dam.extend(candidates)
    assert len(dam) == 100

    first_10 = dam[:10]
    assert len(first_10) == 10
    _assert_similar(candidates[:10], first_10)

    last_10 = dam[-10:]
    assert len(last_10) == 10
    _assert_similar(candidates[-10:], last_10)

    out_of_bound_1 = dam[-101:-95]
    assert len(out_of_bound_1) == 5
    _assert_similar(candidates[0:5], out_of_bound_1)

    out_of_bound_2 = dam[-101:101]
    assert len(out_of_bound_2) == 100
    _assert_similar(candidates, out_of_bound_2)

    out_of_bound_3 = dam[95:101]
    assert len(out_of_bound_3) == 5
    _assert_similar(candidates[95:], out_of_bound_3)

    assert len(dam[101:105]) == 0

    assert len(dam[-105:-101]) == 0

    assert len(dam[10:0]) == 0
Example #23
0
def test_flow_topo1(docker_image_built):
    f = (
        Flow()
        .add(
            name='d0',
            uses='docker://jinaai/jina:test-pip',
            entrypoint='jina executor',
        )
        .add(
            name='d1',
            uses='docker://jinaai/jina:test-pip',
            entrypoint='jina executor',
        )
        .add(
            name='d2',
            uses='docker://jinaai/jina:test-pip',
            needs='d0',
            entrypoint='jina executor',
        )
        .join(['d2', 'd1'])
    )

    with f:
        f.post(on='/index', inputs=random_docs(10))
Example #24
0
def test_compression(compress_algo, low_bytes, high_ratio):
    no_comp_sizes = []
    sizes = []
    docs = list(random_docs(100, embed_dim=100))
    kwargs = dict(
        identity='gateway',
        pod_name='123',
        compress_min_bytes=2 * sum(no_comp_sizes) if low_bytes else 0,
        compress_min_ratio=10 if high_ratio else 1,
    )

    with TimeContext(f'no compress'):
        for r in request_generator(docs):
            m = Message(None, r, compress=CompressAlgo.NONE, **kwargs)
            m.dump()
            no_comp_sizes.append(m.size)

    kwargs = dict(
        identity='gateway',
        pod_name='123',
        compress_min_bytes=2 * sum(no_comp_sizes) if low_bytes else 0,
        compress_min_ratio=10 if high_ratio else 1,
    )
    with TimeContext(f'compressing with {str(compress_algo)}') as tc:
        for r in request_generator(docs):
            m = Message(None, r, compress=compress_algo, **kwargs)
            m.dump()
            sizes.append(m.size)

    if compress_algo == CompressAlgo.NONE or low_bytes or high_ratio:
        assert sum(sizes) >= sum(no_comp_sizes)
    else:
        assert sum(sizes) < sum(no_comp_sizes)
    print(
        f'{str(compress_algo)}: size {sum(sizes) / len(sizes)} (ratio: {sum(no_comp_sizes) / sum(sizes):.2f}) with {tc.duration:.2f}s'
    )
Example #25
0
def test_this_will_fail(mocker, restful):
    f = (Flow(restful=restful).add(name='a11', uses='DummySegment').add(
        name='a12', uses='DummySegment', needs='gateway').add(
            name='r1', uses='_merge_chunks',
            needs=['a11',
                   'a12']).add(name='a21',
                               uses='DummySegment',
                               needs='gateway').add(
                                   name='a22',
                                   uses='DummySegment',
                                   needs='gateway').add(
                                       name='r2',
                                       uses='_merge_chunks',
                                       needs=['a21',
                                              'a22']).add(uses='_merge_chunks',
                                                          needs=['r1', 'r2']))

    response_mock = mocker.Mock()

    with f:
        f.index(inputs=random_docs(10, chunks_per_doc=0),
                on_done=response_mock)

    validate_callback(response_mock, validate)
Example #26
0
def test_all_sync_clients(protocol, mocker):
    from jina import requests

    class MyExec(Executor):
        @requests
        def foo(self, docs, **kwargs):
            pass

    f = Flow(protocol=protocol).add(uses=MyExec)
    docs = list(random_docs(1000))
    m1 = mocker.Mock()
    m2 = mocker.Mock()
    m3 = mocker.Mock()
    m4 = mocker.Mock()
    with f:
        f.post('/', on_done=m1)
        f.post('/foo', docs, on_done=m2)
        f.post('/foo', on_done=m3)
        f.post('/foo', docs, parameters={'hello': 'world'}, on_done=m4)

    m1.assert_called_once()
    m2.assert_called()
    m3.assert_called_once()
    m4.assert_called()
Example #27
0
def test_binarypb_in_flow(test_metas, mocker):
    docs = list(random_docs(10))

    def validate(req):
        assert len(docs) == len(req.docs)
        for d, d0 in zip(req.docs, docs):
            np.testing.assert_almost_equal(
                NdArray(d.embedding).value,
                NdArray(d0.embedding).value)

    f = Flow(callback_on='body').add(uses='binarypb.yml')

    response_mock = mocker.Mock(wrap=validate)
    with f:
        f.index(docs)

    docs_no_embedding = copy.deepcopy(docs)
    for d in docs_no_embedding:
        d.ClearField('embedding')

    with f:
        f.search(docs_no_embedding, on_done=response_mock)

    response_mock.assert_called()
Example #28
0
def test_flow_with_one_container_pod(docker_image_built):
    f = (Flow()
         .add(name='dummyEncoder1', uses=img_name))

    with f:
        f.index(input_fn=random_docs(10))
Example #29
0
def test_lazy_request_fields():
    reqs = (LazyRequest(r.SerializeToString(), Envelope())
            for r in _generate(random_docs(10)))
    for r in reqs:
        assert list(r.DESCRIPTOR.fields_by_name.keys())
Example #30
0
 def test_simple_route(self):
     f = Flow().add(uses='_forward')
     with f:
         f.index(input_fn=random_docs(10))