Beispiel #1
0
def test_compound_idx(tmpdir):
    os.environ['TEST_WORKDIR'] = str(tmpdir)

    def validate(req, indexer_name):
        assert req.status.code < jina_pb2.StatusProto.ERROR
        assert req.search.docs[0].matches[0].score.op_name == indexer_name

    with Flow().add(uses=os.path.join(cur_dir, 'yaml/test-joint.yml')) as f:
        f.index(random_docs_new_api(100, chunks_per_doc=0))

    with Flow().add(uses=os.path.join(cur_dir, 'yaml/test-joint.yml')) as g:
        g.search(random_docs_new_api(10, chunks_per_doc=0), output_fn=lambda x: validate(x, 'NumpyIndexer'))

    del os.environ['TEST_WORKDIR']
Beispiel #2
0
def test_index(tmpdir):
    os.environ['JINA_TEST_INDEX'] = str(tmpdir)
    workspace_path = os.environ['JINA_TEST_INDEX']
    f = Flow().add(uses=os.path.join(cur_dir, 'yaml/test-index.yml'), parallel=3, separated_workspace=True)
    with f:
        f.index(input_fn=random_docs_new_api(1000))

    for j in range(3):
        path = os.path.join(workspace_path, f'test2-{j + 1}/test2.bin')
        assert os.path.exists(path)
        assert os.path.exists(os.path.join(workspace_path, f'test2-{j + 1}/tmp2'))

    with f:
        f.search(input_fn=random_docs_new_api(2), output_fn=get_result, top_k=50)
    del os.environ['JINA_TEST_INDEX']
Beispiel #3
0
def test_docs_filter():
    s = random_docs_new_api(10)
    ss = QuerySet(s).filter(tags__id__lt=5, tags__id__gt=3)
    ssr = list(ss)
    assert len(ssr) == 1
    for d in ssr:
        assert (3 < d.tags['id'] < 5)
def test_index_remote(test_workspace):
    f_args = set_gateway_parser().parse_args(['--host', '0.0.0.0'])

    def start_gateway():
        with GatewayPod(f_args):
            time.sleep(20)

    t = mp.Process(target=start_gateway)
    t.daemon = True
    t.start()

    f = Flow().add(uses=os.path.join(cur_dir, 'yaml/test-index-remote.yml'),
                   parallel=3,
                   separated_workspace=True,
                   host='0.0.0.0',
                   port_expose=f_args.port_expose)

    with f:
        f.index(input_fn=random_docs_new_api(1000))

    time.sleep(3)
    for j in range(3):
        bin_path = os.path.join(test_workspace, f'test2-{j + 1}/test2.bin')
        index_filename_path = os.path.join(test_workspace,
                                           f'test2-{j + 1}/tmp2')
        assert os.path.exists(bin_path)
        assert os.path.exists(index_filename_path)
Beispiel #5
0
def test_request_docs_mutable_iterator():
    """To test the weak reference work in docs"""
    r = Request()
    r.request_type = 'index'
    for d in random_docs_new_api(10):
        r.docs.append(d)

    for idx, d in enumerate(r.docs):
        assert isinstance(d, Document)
        d.text = f'look I changed it! {idx}'

    # iterate it again should see the change
    doc_pointers = []
    for idx, d in enumerate(r.docs):
        assert isinstance(d, Document)
        assert d.text == f'look I changed it! {idx}'
        doc_pointers.append(d)

    # pb-lize it should see the change
    rpb = r.as_pb_object

    for idx, d in enumerate(rpb.index.docs):
        assert isinstance(d, DocumentProto)
        assert d.text == f'look I changed it! {idx}'

    # change again by following the pointers
    for d in doc_pointers:
        d.text = 'now i change it back'

    # iterate it again should see the change
    for idx, d in enumerate(rpb.index.docs):
        assert isinstance(d, DocumentProto)
        assert d.text == 'now i change it back'
Beispiel #6
0
def test_nested_chunks_filter():
    s = random_docs_new_api(10)
    ss = QuerySet(s).filter(
        Q(chunks__filter=Q(tags__id__lt=35, tags__id__gt=33)))
    ssr = list(ss)
    assert len(ssr) == 1
    for d in ssr:
        assert len(d.chunks) == 5
Beispiel #7
0
def test_docs_filter_equal():
    s = random_docs_new_api(10)
    ss = QuerySet(s).filter(tags__id=4)
    ssr = list(ss)
    assert len(ssr) == 1
    for d in ssr:
        assert int(d.tags['id']) == 4
        assert len(d.chunks) == 5
Beispiel #8
0
def test_random_docs_new_api():
    np.random.seed(42)
    docs1 = list(random_docs(10))
    np.random.seed(42)
    docs2 = list(random_docs_new_api(10))
    for d2, d1 in zip(docs2, docs1):
        np.testing.assert_almost_equal(d2.embedding, NdArray(d1.embedding).value)
        assert d2.text == d1.text
        assert d2.tags['id'] == d1.tags['id']
        for c2, c1 in zip(d2.chunks, d1.chunks):
            np.testing.assert_almost_equal(c2.embedding, NdArray(c1.embedding).value)
            assert c2.text == c1.text
            assert c2.tags['id'] == c1.tags['id']
def test_index_remote_rpi(test_workspace):
    f_args = set_gateway_parser().parse_args(['--host', '0.0.0.0'])

    def start_gateway():
        with GatewayPod(f_args):
            time.sleep(3)

    t = mp.Process(target=start_gateway)
    t.daemon = True
    t.start()

    f = Flow(optimize_level=FlowOptimizeLevel.IGNORE_GATEWAY).add(
        uses=os.path.join(cur_dir, 'yaml/test-index-remote.yml'),
        parallel=3,
        separated_workspace=True,
        host='0.0.0.0',
        port_expose=random_port())

    with f:
        f.index(input_fn=random_docs_new_api(1000))
Beispiel #10
0
 def start_client(fl):
     fl.index(input_fn=random_docs_new_api(10))
Beispiel #11
0
def test_simple_route():
    f = Flow().add()
    with f:
        f.index(input_fn=random_docs_new_api(10))
Beispiel #12
0
def test_doc_iters():
    docs = random_docs_new_api(3, 5)
    for doc in docs:
        assert isinstance(doc, Document)