コード例 #1
0
    def test_flow_with_jump(self):
        f = (Flow().add(name='r1', yaml_path='_forward').add(
            name='r2', yaml_path='_forward').add(
                name='r3', yaml_path='_forward', needs='r1').add(
                    name='r4', yaml_path='_forward',
                    needs='r2').add(name='r5',
                                    yaml_path='_forward',
                                    needs='r3').add(
                                        name='r6',
                                        yaml_path='_forward',
                                        needs='r4').add(
                                            name='r8',
                                            yaml_path='_forward',
                                            needs='r6').add(
                                                name='r9',
                                                yaml_path='_forward',
                                                needs='r5').add(
                                                    name='r10',
                                                    yaml_path='_merge',
                                                    needs=['r9', 'r8']))

        with f:
            f.dry_run()
        f.save_config('tmp.yml')
        Flow.load_config('tmp.yml')

        with Flow.load_config('tmp.yml') as fl:
            fl.dry_run()

        self.add_tmpfile('tmp.yml')
コード例 #2
0
def test_indexer_with_ref_indexer_compound_move(random_workspace_move,
                                                parallel, index_docs, mocker,
                                                uses_no_docker):
    top_k = 10
    with Flow.load_config(os.path.join(cur_dir,
                                       'compound-index.yml')) as index_flow:
        index_flow.index(input_fn=index_docs, request_size=10)

    mock = mocker.Mock()

    shutil.copytree(os.environ['JINA_TEST_INDEXER_WITH_REF_INDEXER'],
                    os.environ['JINA_TEST_INDEXER_WITH_REF_INDEXER_QUERY'])

    shutil.rmtree(os.environ['JINA_TEST_INDEXER_WITH_REF_INDEXER'])

    def validate_response(resp):
        assert len(resp.search.docs) == 1
        assert len(resp.search.docs[0].matches) == top_k

    query_document = Document()
    query_document.embedding = np.array([1, 1])
    with Flow.load_config(os.path.join(cur_dir,
                                       'compound-query.yml')) as query_flow:
        query_flow.search(input_fn=[query_document], on_done=mock, top_k=top_k)

    mock.assert_called_once()
    validate_callback(mock, validate_response)
コード例 #3
0
def main(task, num_docs):
    config()
    if task == 'index':
        workspace = os.environ['JINA_WORKSPACE']
        if os.path.exists(workspace):
            print(
                f'\n +---------------------------------------------------------------------------------+ \
                    \n |                                   ������                                        | \
                    \n | The directory {workspace} already exists. Please remove it before indexing again. | \
                    \n |                                   ������                                        | \
                    \n +---------------------------------------------------------------------------------+'
            )
            sys.exit(1)

        f = Flow.load_config('flows/index.yml')
        with f:
            with TimeContext(f'QPS: indexing {num_docs}', logger=f.logger):
                f.index_files('data/*.wav', batch_size=2, size=num_docs)
    elif task == 'query':
        f = Flow.load_config('flows/query.yml')
        with f:
            # no perf measurement here, as it opens the REST API and blocks
            f.block()
    elif task == 'dryrun':
        f = Flow.load_config('flows/query.yml')
        with f:
            pass
    else:
        raise NotImplementedError(
            f'unknown task: {task}. A valid task is either `index` or `query` or `dryrun`.'
        )
コード例 #4
0
def test_delete_kv(config, mocker, has_content):
    flow_file = 'flow_kv.yml'

    def validate_result_factory(num_matches):
        def validate_results(resp):
            mock()
            assert len(resp.docs) == num_matches

        return validate_results

    with Flow.load_config(flow_file) as index_flow:
        index_flow.index(input_fn=random_docs(0, 10))
    validate_index_size(10)
    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(input_fn=chain(random_docs(2, 5),
                                          random_docs(100, 120)),
                           output_fn=validate_result_factory(3))
    mock.assert_called_once()

    with Flow.load_config(flow_file) as index_flow:
        index_flow.delete(input_fn=random_docs(0, 3, has_content=has_content))
    validate_index_size(7)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(input_fn=random_docs(2, 4),
                           output_fn=validate_result_factory(1))
    mock.assert_called_once()
コード例 #5
0
ファイル: app.py プロジェクト: noobhacker6969/examples
def main(task, return_image, data_path, num_docs, batch_size,
         overwrite_workspace):
    config()
    if task == 'index':
        workspace = os.environ['WORKDIR']
        if os.path.exists(workspace):
            print(
                f'\n +---------------------------------------------------------------------------------+ \
                    \n |                                   ������                                        | \
                    \n | The directory {workspace} already exists. Please remove it before indexing again. | \
                    \n |                                   ������                                        | \
                    \n +---------------------------------------------------------------------------------+'
            )
        if overwrite_workspace:
            clean_workdir()
        f = Flow.load_config('flow-index.yml')
        with f:
            f.index_files(data_path,
                          batch_size=batch_size,
                          read_mode='rb',
                          size=num_docs)
    elif task == 'query':
        f = Flow.load_config(f'flow-query-{return_image}.yml')
        with f:
            f.block()
コード例 #6
0
def test_delete_vector(config, mocker, flow_file, has_content):
    NUMBER_OF_SEARCHES = 5

    def validate_result_factory(num_matches):
        def validate_results(resp):
            mock()
            assert len(resp.docs) == NUMBER_OF_SEARCHES
            for doc in resp.docs:
                assert len(doc.matches) == num_matches

        return validate_results

    with Flow.load_config(flow_file) as index_flow:
        index_flow.index(input_fn=random_docs(0, 10))
    validate_index_size(10)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(input_fn=random_docs(0, NUMBER_OF_SEARCHES),
                           output_fn=validate_result_factory(9))
    mock.assert_called_once()

    with Flow.load_config(flow_file) as index_flow:
        index_flow.delete(input_fn=random_docs(0, 10, has_content=has_content))
    validate_index_size(0)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(input_fn=random_docs(0, NUMBER_OF_SEARCHES),
                           output_fn=validate_result_factory(0))
    mock.assert_called_once()
コード例 #7
0
def test_update_kv(config, mocker):
    flow_file = 'flow_kv.yml'
    NUMBER_OF_SEARCHES = 1
    docs_before = list(random_docs(0, 10))
    docs_updated = list(random_docs(0, 10))

    def validate_results(resp):
        mock()
        assert len(resp.docs) == NUMBER_OF_SEARCHES

    with Flow.load_config(flow_file) as index_flow:
        index_flow.index(input_fn=docs_before)
    validate_index_size(10)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_docs = list(random_docs(0, NUMBER_OF_SEARCHES))
        search_flow.search(input_fn=search_docs, output_fn=validate_results)
    mock.assert_called_once()

    with Flow.load_config(flow_file) as index_flow:
        index_flow.update(input_fn=docs_updated)
    validate_index_size(10)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(input_fn=random_docs(0, NUMBER_OF_SEARCHES),
                           output_fn=validate_results)
    mock.assert_called_once()
コード例 #8
0
def test_delete_kv(config, mocker, as_string):
    flow_file = 'flow_kv.yml'

    def validate_result_factory(num_matches):
        def validate_results(resp):
            assert len(resp.docs) == num_matches

        return validate_results

    with Flow.load_config(flow_file) as index_flow:
        index_flow.index(inputs=random_docs(0, 10))
    validate_index_size(10)
    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(inputs=chain(random_docs(2, 5),
                                        random_docs(100, 120)),
                           on_done=mock)
    mock.assert_called_once()
    validate_callback(mock, validate_result_factory(3))

    with Flow.load_config(flow_file) as index_flow:
        index_flow.delete(ids=get_ids_to_delete(0, 3, as_string))
    validate_index_size(7)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(inputs=random_docs(2, 4), on_done=mock)
    mock.assert_called_once()
    validate_callback(mock, validate_result_factory(1))
コード例 #9
0
    def test_flow_identical(self):
        with open(os.path.join(cur_dir, 'yaml/test-flow.yml')) as fp:
            a = Flow.load_config(fp)

        b = (Flow().add(name='chunk_seg',
                        replicas=3).add(name='wqncode1', replicas=2).add(
                            name='encode2', replicas=2,
                            needs='chunk_seg').join(['wqncode1', 'encode2']))

        a.save_config('test2.yml')

        c = Flow.load_config('test2.yml')

        self.assertEqual(a, b)
        self.assertEqual(a, c)

        self.add_tmpfile('test2.yml')

        with a as f:
            self.assertEqual(f._pod_nodes['gateway'].head_args.socket_in,
                             SocketType.PULL_CONNECT)
            self.assertEqual(f._pod_nodes['gateway'].tail_args.socket_out,
                             SocketType.PUSH_CONNECT)

            self.assertEqual(f._pod_nodes['chunk_seg'].head_args.socket_in,
                             SocketType.PULL_BIND)
            self.assertEqual(f._pod_nodes['chunk_seg'].head_args.socket_out,
                             SocketType.ROUTER_BIND)
            for arg in f._pod_nodes['chunk_seg'].peas_args['peas']:
                self.assertEqual(arg.socket_in, SocketType.DEALER_CONNECT)
                self.assertEqual(arg.socket_out, SocketType.PUSH_CONNECT)
            self.assertEqual(f._pod_nodes['chunk_seg'].tail_args.socket_in,
                             SocketType.PULL_BIND)
            self.assertEqual(f._pod_nodes['chunk_seg'].tail_args.socket_out,
                             SocketType.PUB_BIND)

            self.assertEqual(f._pod_nodes['wqncode1'].head_args.socket_in,
                             SocketType.SUB_CONNECT)
            self.assertEqual(f._pod_nodes['wqncode1'].head_args.socket_out,
                             SocketType.ROUTER_BIND)
            for arg in f._pod_nodes['wqncode1'].peas_args['peas']:
                self.assertEqual(arg.socket_in, SocketType.DEALER_CONNECT)
                self.assertEqual(arg.socket_out, SocketType.PUSH_CONNECT)
            self.assertEqual(f._pod_nodes['wqncode1'].tail_args.socket_in,
                             SocketType.PULL_BIND)
            self.assertEqual(f._pod_nodes['wqncode1'].tail_args.socket_out,
                             SocketType.PUSH_CONNECT)

            self.assertEqual(f._pod_nodes['encode2'].head_args.socket_in,
                             SocketType.SUB_CONNECT)
            self.assertEqual(f._pod_nodes['encode2'].head_args.socket_out,
                             SocketType.ROUTER_BIND)
            for arg in f._pod_nodes['encode2'].peas_args['peas']:
                self.assertEqual(arg.socket_in, SocketType.DEALER_CONNECT)
                self.assertEqual(arg.socket_out, SocketType.PUSH_CONNECT)
            self.assertEqual(f._pod_nodes['encode2'].tail_args.socket_in,
                             SocketType.PULL_BIND)
            self.assertEqual(f._pod_nodes['encode2'].tail_args.socket_out,
                             SocketType.PUSH_CONNECT)
コード例 #10
0
def test_dimensionality_search_wrong(tmp_path, mocker):
    """will fail because search docs have diff shape in embedding"""
    config_environ(path=tmp_path)
    flow_file = 'flow.yml'
    flow_query_file = 'flow.yml'
    docs = list(random_docs_with_shapes(NR_DOCS_INDEX, START_SHAPE))
    docs_update = list(random_docs_with_shapes(NR_DOCS_INDEX, INDEX2_SHAPE, start=len(docs) + 1))
    all_docs_indexed = docs.copy()
    all_docs_indexed.extend(docs_update)
    docs_search = list(
        random_docs_with_shapes(
            NUMBER_OF_SEARCHES,
            INDEX2_SHAPE,
            start=len(docs) + len(docs_update) + 1)
    )
    f_index = Flow.load_config(flow_file)
    f_query = Flow.load_config(flow_query_file)

    def validate_result_factory(num_matches):
        def validate_results(resp):
            mock()
            assert len(resp.docs) == NUMBER_OF_SEARCHES
            for doc in resp.docs:
                assert len(doc.matches) == num_matches

        return validate_results

    with f_index:
        f_index.index(input_fn=docs)
    validate_index_size(NR_DOCS_INDEX, expected_indices=2)

    mock = mocker.Mock()
    with f_query:
        f_query.search(input_fn=docs_search,
                       # 0 because search docs have wrong shape
                       on_done=validate_result_factory(0))
    mock.assert_called_once()

    # this won't increase the index size as the ids are new
    with f_index:
        f_index.update(input_fn=docs_update)
    validate_index_size(NR_DOCS_INDEX, expected_indices=2)

    mock = mocker.Mock()
    with f_query:
        f_query.search(input_fn=docs_search,
                       # 0 because search docs have wrong shape
                       on_done=validate_result_factory(0))
    mock.assert_called_once()

    with f_index:
        f_index.delete(ids=[d.id for d in all_docs_indexed])
    validate_index_size(0, expected_indices=2)

    mock = mocker.Mock()
    with f_query:
        f_query.search(input_fn=docs_search,
                       on_done=validate_result_factory(0))
    mock.assert_called_once()
コード例 #11
0
def test_update_vector(config, mocker, flow_file):
    num_searches = 10
    num_docs = 10
    num_chunks = 5

    docs_before = list(
        document_generator(start=0, num_docs=num_docs, num_chunks=num_chunks))
    docs_updated = list(
        document_generator(start=10, num_docs=20, num_chunks=num_chunks))
    ids_before = list()
    ids_updated = list()

    def validate_result_factory(has_changed, num_matches):
        def validate_results(resp):
            assert len(resp.docs) == num_searches
            for d in docs_before:
                ids_before.append(d.id)
            for d in docs_updated:
                ids_updated.append(d.id)
            for doc in resp.docs:
                assert len(doc.matches) == num_matches
                if has_changed:
                    assert doc.id in ids_updated
                    assert doc.id not in ids_before
                else:
                    assert doc.id in ids_before
                    assert doc.id not in ids_updated

        return validate_results

    with Flow.load_config(flow_file) as index_flow:
        index_flow.index(input_fn=docs_before)
    validate_index_size(
        num_chunks * num_docs)  # num_docs per all its chunks, 50 in this case

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(input_fn=document_generator(start=0,
                                                       num_docs=num_docs,
                                                       num_chunks=num_chunks),
                           on_done=mock)
    mock.assert_called_once()
    validate_callback(
        mock, validate_result_factory(has_changed=False, num_matches=TOP_K))

    with Flow.load_config(flow_file) as index_flow:
        index_flow.update(input_fn=docs_updated)
    validate_index_size(
        num_chunks * num_docs)  # num_docs per all its chunks, 50 in this case

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(input_fn=document_generator(start=10,
                                                       num_docs=20,
                                                       num_chunks=num_chunks),
                           on_done=mock)
    mock.assert_called_once()
    validate_callback(
        mock, validate_result_factory(has_changed=True, num_matches=num_docs))
コード例 #12
0
def test_wrong_mime_type(tmp_path, mocker):
    """we assign text to .text, 'image/jpeg' to .mime_type"""
    config_environ(path=tmp_path)
    flow_file = 'flow-parallel.yml'
    flow_query_file = 'flow.yml'
    docs = list(random_docs_image_mime_text_content(NR_DOCS_INDEX))
    docs_update = list(
        random_docs_image_mime_text_content(NR_DOCS_INDEX,
                                            start=len(docs) + 1))
    all_docs_indexed = docs.copy()
    all_docs_indexed.extend(docs_update)
    docs_search = list(
        random_docs_image_mime_text_content(NUMBER_OF_SEARCHES,
                                            start=len(docs) +
                                            len(docs_update) + 1))
    f_index = Flow.load_config(flow_file)
    f_query = Flow.load_config(flow_query_file)

    def validate_result_factory(num_matches):
        def validate_results(resp):
            mock()
            assert len(resp.docs) == NUMBER_OF_SEARCHES
            for doc in resp.docs:
                assert len(doc.matches) == num_matches
                for m in doc.matches:
                    assert m.mime_type == 'text/plain'

        return validate_results

    with f_index:
        f_index.index(input_fn=docs)
    validate_index_size(NR_DOCS_INDEX, expected_indices=2)

    mock = mocker.Mock()
    with f_query:
        f_query.search(input_fn=docs_search,
                       on_done=validate_result_factory(TOPK))
    mock.assert_called_once()

    # this won't increase the index size as the ids are new
    with f_index:
        f_index.update(input_fn=docs_update)
    validate_index_size(NR_DOCS_INDEX, expected_indices=2)

    mock = mocker.Mock()
    with f_query:
        f_query.search(input_fn=docs_search,
                       on_done=validate_result_factory(TOPK))
    mock.assert_called_once()

    with f_index:
        f_index.delete(ids=[d.id for d in all_docs_indexed])
    validate_index_size(0, expected_indices=2)

    mock = mocker.Mock()
    with f_query:
        f_query.search(input_fn=docs_search,
                       on_done=validate_result_factory(0))
    mock.assert_called_once()
コード例 #13
0
def test_index_depth_0_search_depth_1(tmpdir, mocker):
    os.environ['JINA_TEST_LEVEL_DEPTH_WORKSPACE'] = str(tmpdir)
    index_data = [
        'I am chunk 0 of doc 1, I am chunk 1 of doc 1, I am chunk 2 of doc 1',
        'I am chunk 0 of doc 2, I am chunk 1 of doc 2',
        'I am chunk 0 of doc 3, I am chunk 1 of doc 3, I am chunk 2 of doc 3, I am chunk 3 of doc 3',
    ]

    index_flow = Flow.load_config('flow-index.yml')
    with index_flow:
        index_flow.index(index_data)

    mock = mocker.Mock()
    def validate_granularity_1(resp):
        mock()
        assert len(resp.docs) == 3
        for doc in resp.docs:
            assert doc.granularity == 0
            assert len(doc.matches) == 3
            assert doc.matches[0].granularity == 0

        assert resp.docs[0].text == ' I am chunk 1 of doc 1,'
        assert (
                resp.docs[0].matches[0].text
                == 'I am chunk 0 of doc 1, I am chunk 1 of doc 1, I am chunk 2 of doc 1'
        )

        assert resp.docs[1].text == 'I am chunk 0 of doc 2,'
        assert (
                resp.docs[1].matches[0].text
                == 'I am chunk 0 of doc 2, I am chunk 1 of doc 2'
        )

        assert resp.docs[2].text == ' I am chunk 3 of doc 3'
        assert (
                resp.docs[2].matches[0].text
                == 'I am chunk 0 of doc 3, I am chunk 1 of doc 3, I am chunk 2 of doc 3, I am chunk 3 of doc 3'
        )

    search_data = [
        ' I am chunk 1 of doc 1,',
        'I am chunk 0 of doc 2,',
        ' I am chunk 3 of doc 3',
    ]

    with Flow.load_config('flow-query.yml') as search_flow:
        search_flow.search(
            input_fn=search_data,
            on_done=validate_granularity_1,
            callback_on='body',
        )

    del os.environ['JINA_TEST_LEVEL_DEPTH_WORKSPACE']
    mock.assert_called_once()
コード例 #14
0
ファイル: test_flow.py プロジェクト: helioxgroup/jina
def test_flow_with_jump():
    def _validate(f):
        node = f._pod_nodes['gateway']
        assert node.head_args.socket_in == SocketType.PULL_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r1']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUB_BIND
        node = f._pod_nodes['r2']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r3']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r4']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r5']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r6']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r8']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r9']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
        node = f._pod_nodes['r10']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_BIND
        for name, node in f._pod_nodes.items():
            assert node.peas_args['peas'][0] == node.head_args
            assert node.peas_args['peas'][0] == node.tail_args

    f = (Flow().add(name='r1').add(name='r2').add(name='r3', needs='r1').add(
        name='r4',
        needs='r2').add(name='r5', needs='r3').add(name='r6', needs='r4').add(
            name='r8', needs='r6').add(name='r9',
                                       needs='r5').add(name='r10',
                                                       needs=['r9', 'r8']))

    with f:
        _validate(f)

    f.save_config('tmp.yml')
    Flow.load_config('tmp.yml')

    with Flow.load_config('tmp.yml') as f:
        _validate(f)

    rm_files(['tmp.yml'])
コード例 #15
0
ファイル: test_topk.py プロジェクト: zatcsc/jina
def test_topk_override(config):
    # Making queryset
    top_k_queryset = QueryLang(
        VectorSearchDriver(top_k=int(os.environ['JINA_TOPK_OVERRIDE']),
                           priority=1))

    with Flow.load_config('flow.yml') as index_flow:
        index_flow.index(input_fn=random_docs(100))
    with Flow.load_config('flow.yml') as search_flow:
        search_flow.search(input_fn=random_docs(int(os.environ['JINA_NDOCS'])),
                           output_fn=validate_override_results,
                           queryset=[top_k_queryset])
コード例 #16
0
def test_index_depth_0_search_depth_1(tmpdir, mocker, monkeypatch, restful):
    monkeypatch.setenv("RESTFUL", restful)
    monkeypatch.setenv("JINA_TEST_LEVEL_DEPTH_WORKSPACE", str(tmpdir))

    index_data = [
        'I am chunk 0 of doc 1, I am chunk 1 of doc 1, I am chunk 2 of doc 1',
        'I am chunk 0 of doc 2, I am chunk 1 of doc 2',
        'I am chunk 0 of doc 3, I am chunk 1 of doc 3, I am chunk 2 of doc 3, I am chunk 3 of doc 3',
    ]

    index_flow = Flow.load_config('flow-index.yml')
    with index_flow:
        index_flow.index(index_data)

    mock = mocker.Mock()

    def validate_granularity_1(resp):
        mock()
        assert len(resp.docs) == 3
        for doc in resp.docs:
            assert doc.granularity == 0
            assert len(doc.matches) == 3
            assert doc.matches[0].granularity == 0

        assert resp.docs[0].text == ' I am chunk 1 of doc 1,'
        assert (
            resp.docs[0].matches[0].text ==
            'I am chunk 0 of doc 1, I am chunk 1 of doc 1, I am chunk 2 of doc 1'
        )

        assert resp.docs[1].text == 'I am chunk 0 of doc 2,'
        assert (resp.docs[1].matches[0].text ==
                'I am chunk 0 of doc 2, I am chunk 1 of doc 2')

        assert resp.docs[2].text == ' I am chunk 3 of doc 3'
        assert (
            resp.docs[2].matches[0].text ==
            'I am chunk 0 of doc 3, I am chunk 1 of doc 3, I am chunk 2 of doc 3, I am chunk 3 of doc 3'
        )

    search_data = [
        ' I am chunk 1 of doc 1,',
        'I am chunk 0 of doc 2,',
        ' I am chunk 3 of doc 3',
    ]

    with Flow.load_config('flow-query.yml') as search_flow:
        search_flow.search(input_fn=search_data,
                           on_done=validate_granularity_1,
                           on_error=lambda r: print(r))

    mock.assert_called_once()
コード例 #17
0
def test_delete_vector(config, mocker, flow_file):
    num_searches = 10
    num_docs = 10
    num_chunks = 5

    def validate_result_factory(num_matches):
        def validate_results(resp):
            assert len(resp.docs) == num_searches
            for doc in resp.docs:
                assert len(doc.matches) == num_matches

        return validate_results

    with Flow.load_config(flow_file) as index_flow:
        index_flow.index(inputs=document_generator(
            start=0, num_docs=num_docs, num_chunks=num_chunks))
    validate_index_size(num_chunks *
                        num_docs)  # 5 chunks for each of the 10 docs

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(
            inputs=document_generator(start=0,
                                      num_docs=num_docs,
                                      num_chunks=num_chunks),
            on_done=mock,
        )
    mock.assert_called_once()
    validate_callback(mock, validate_result_factory(TOP_K))

    delete_ids = []
    for d in document_generator(start=0,
                                num_docs=num_docs,
                                num_chunks=num_chunks):
        delete_ids.append(d.id)
        for c in d.chunks:
            delete_ids.append(c.id)

    with Flow.load_config(flow_file) as index_flow:
        index_flow.delete(ids=delete_ids)
    validate_index_size(0)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(
            inputs=document_generator(start=0,
                                      num_docs=num_docs,
                                      num_chunks=num_chunks),
            on_done=mock,
        )
    mock.assert_called_once()
    validate_callback(mock, validate_result_factory(0))
コード例 #18
0
ファイル: app.py プロジェクト: jina-ai/legacy-examples
def run(task, top_k, indexer_query_type):
    general_config()
    query_config(indexer_query_type)

    request_size = int(os.environ['JINA_REQUEST_SIZE'])
    dataset_name = os.environ['JINA_DATASET_NAME']
    data_dir = os.path.join(dataset_name, os.environ['JINA_TMP_DATA_DIR'])

    if task == 'index':
        data_path = os.path.join(data_dir, f'{dataset_name}_base.fvecs')
        data_func = index_generator(data_path)
        data_func_list = list(data_func)

        with Flow.load_config('flow-index.yml') as flow:
            with TimeContext(f'QPS: indexing {len(list(data_func_list))}',
                             logger=flow.logger):
                flow.index(inputs=data_func_list, request_size=request_size)

    elif task == 'query':
        evaluation_results = defaultdict(float)

        def _get_evaluation_results(evaluation_results: dict, resp):
            for d in resp.search.docs:
                for eval in d.evaluations:
                    evaluation_results[eval.op_name] = eval.value

        get_evaluation_results = partial(_get_evaluation_results,
                                         evaluation_results)

        data_path = os.path.join(data_dir, f'{dataset_name}_query.fvecs')
        groundtruth_path = os.path.join(data_dir,
                                        f'{dataset_name}_groundtruth.ivecs')
        query_input = list(evaluate_generator(data_path, groundtruth_path))

        with Flow.load_config('flow-query.yml') as flow:
            with TimeContext(f'QPS: query with {len(query_input)}',
                             logger=flow.logger):
                flow.search(inputs=query_input,
                            request_size=request_size,
                            on_done=get_evaluation_results,
                            top_k=top_k)

        logger.info(f'evaluation: {list(evaluation_results)}')
        evaluation = evaluation_results[list(evaluation_results.keys())[0]]
        # return for test
        logger.info(f'Recall@{top_k} ==> {100 * evaluation}')
        return 100 * evaluation
    else:
        raise NotImplementedError(
            f'unknown task: {task}. A valid task is either `index` or `query`.'
        )
コード例 #19
0
def test_flow_identical():
    with open(os.path.join(cur_dir, '../yaml/test-flow.yml')) as fp:
        a = Flow.load_config(fp)

    b = (Flow()
         .add(name='chunk_seg', parallel=3)
         .add(name='wqncode1', parallel=2)
         .add(name='encode2', parallel=2, needs='chunk_seg')
         .join(['wqncode1', 'encode2']))

    a.save_config('test2.yml')

    c = Flow.load_config('test2.yml')

    assert a == b
    assert a == c

    with a as f:
        node = f._pod_nodes['gateway']
        assert node.head_args.socket_in == SocketType.PULL_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT

        node = f._pod_nodes['chunk_seg']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUB_BIND

        node = f._pod_nodes['wqncode1']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT

        node = f._pod_nodes['encode2']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT

    rm_files(['test2.yml'])
コード例 #20
0
ファイル: app.py プロジェクト: wonkday/examples
def main(task, data_path, num_docs, batch_size, image_path, text_query, overwrite_workspace):
    config()
    image_paths = [image_path]
    text_queries = [text_query]
    if task == 'index':
        if overwrite_workspace:
            clean_workdir()
        f = Flow.load_config('flow-index.yml')
        with f:
            f.index(index_generator(data_path, num_docs), batch_size=batch_size)
    elif task == 'query':
        f = Flow.load_config('flow-query.yml')
        with f:
            f.search(input_fn=query_generator(image_paths, text_queries), on_done=print_result)
コード例 #21
0
def test_update_vector(config, mocker, flow_file):
    NUMBER_OF_SEARCHES = 1
    docs_before = list(random_docs(0, 10))
    docs_updated = list(random_docs(0, 10))

    def validate_result_factory(has_changed):
        def validate_results(resp):
            mock()
            assert len(resp.docs) == NUMBER_OF_SEARCHES
            hash_set_before = [
                hash(d.embedding.tobytes()) for d in docs_before
            ]
            hash_set_updated = [
                hash(d.embedding.tobytes()) for d in docs_updated
            ]
            for doc in resp.docs:
                assert len(doc.matches) == 9
                for match in doc.matches:
                    h = hash(match.embedding.tobytes())
                    if has_changed:
                        assert h not in hash_set_before
                        assert h in hash_set_updated
                    else:
                        assert h in hash_set_before
                        assert h not in hash_set_updated

        return validate_results

    with Flow.load_config(flow_file) as index_flow:
        index_flow.index(input_fn=docs_before)
    validate_index_size(10)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_docs = list(random_docs(0, NUMBER_OF_SEARCHES))
        search_flow.search(
            input_fn=search_docs,
            output_fn=validate_result_factory(has_changed=False))
    mock.assert_called_once()

    with Flow.load_config(flow_file) as index_flow:
        index_flow.update(input_fn=docs_updated)
    validate_index_size(10)

    mock = mocker.Mock()
    with Flow.load_config(flow_file) as search_flow:
        search_flow.search(input_fn=random_docs(0, NUMBER_OF_SEARCHES),
                           output_fn=validate_result_factory(has_changed=True))
    mock.assert_called_once()
コード例 #22
0
ファイル: test_helloworld.py プロジェクト: abaelhe/jina
def test_helloworld_flow_dry_run(tmpdir):
    args = set_hw_parser().parse_args([])

    os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources')
    os.environ['SHARDS'] = str(args.shards)
    os.environ['PARALLEL'] = str(args.parallel)
    os.environ['HW_WORKDIR'] = str(tmpdir)

    # run it!
    with Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml')))):
        pass

    # run it!
    with Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.query.yml')))):
        pass
コード例 #23
0
ファイル: test_helloworld.py プロジェクト: abaelhe/jina
def test_helloworld_flow(tmpdir):
    args = set_hw_parser().parse_args([])

    os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources')
    os.environ['SHARDS'] = str(args.shards)
    os.environ['PARALLEL'] = str(args.parallel)
    os.environ['HW_WORKDIR'] = str(tmpdir)

    f = Flow.load_config(resource_filename('jina', '/'.join(('resources', 'helloworld.flow.index.yml'))))

    targets = {
        'index': {
            'url': args.index_data_url,
            'filename': os.path.join(tmpdir, 'index-original')
        },
        'query': {
            'url': args.query_data_url,
            'filename': os.path.join(tmpdir, 'query-original')
        }
    }

    # download the data
    Path(tmpdir).mkdir(parents=True, exist_ok=True)
    download_data(targets)

    # run it!
    with f:
        f.index(_input_ndarray(targets['index']['data']), request_size=args.index_request_size)
コード例 #24
0
ファイル: test_helloworld.py プロジェクト: tyunist/jina
def test_helloworld_flow(tmpdir):
    args = set_hw_parser().parse_args([])

    os.environ['RESOURCE_DIR'] = resource_filename('jina', 'resources')
    os.environ['SHARDS'] = str(args.shards)
    os.environ['PARALLEL'] = str(args.parallel)
    os.environ['HW_WORKDIR'] = str(tmpdir)
    os.environ['WITH_LOGSERVER'] = str(args.logserver)

    f = Flow.load_config(
        resource_filename('jina', '/'.join(
            ('resources', 'helloworld.flow.index.yml'))))

    targets = {
        'index': {
            'url': args.index_data_url,
            'filename': os.path.join(tmpdir, 'index-original')
        },
        'query': {
            'url': args.query_data_url,
            'filename': os.path.join(tmpdir, 'query-original')
        }
    }

    # download the data
    Path(tmpdir).mkdir(parents=True, exist_ok=True)
    download_data(targets)

    # run it!
    with f:
        py_client(
            host=f.host,
            port_expose=f.port_expose,
        ).index(input_numpy(targets['index']['data']),
                batch_size=args.index_batch_size)
コード例 #25
0
    def test_flow_log_server(self):
        f = Flow.load_config('../yaml/test_log_server.yml')
        with f:
            self.assertTrue(hasattr(JINA_GLOBAL.logserver, 'ready'))

            # Ready endpoint
            a = requests.get(JINA_GLOBAL.logserver.address + '/status/ready',
                             timeout=5)
            assert a.status_code == 200

            # YAML endpoint
            a = requests.get(JINA_GLOBAL.logserver.address + '/data/yaml',
                             timeout=5)
            self.assertTrue(a.text.startswith('!Flow'))
            assert a.status_code == 200

            # Pod endpoint
            a = requests.get(JINA_GLOBAL.logserver.address + '/data/api/pod',
                             timeout=5)
            self.assertTrue('pod' in a.json())
            assert a.status_code == 200

            # Shutdown endpoint
            a = requests.get(JINA_GLOBAL.logserver.address +
                             '/action/shutdown',
                             timeout=5)
            assert a.status_code == 200

            # Check ready endpoint after shutdown, check if server stopped
            with self.assertRaises(requests.exceptions.ConnectionError):
                a = requests.get(JINA_GLOBAL.logserver.address +
                                 '/status/ready',
                                 timeout=5)
コード例 #26
0
 def test_load_flow_from_yaml(self):
     with open(os.path.join(cur_dir, '../yaml/test-flow.yml')) as fp:
         a = Flow.load_config(fp)
         with open(os.path.join(cur_dir, '../yaml/swarm-out.yml'),
                   'w') as fp, a:
             a.to_swarm_yaml(fp)
         self.add_tmpfile(os.path.join(cur_dir, '../yaml/swarm-out.yml'))
コード例 #27
0
ファイル: test_flow.py プロジェクト: alfred297/Pooja-AI
    def test_flow_identical(self):
        with open('yaml/test-flow.yml') as fp:
            a = Flow.load_config(fp)

        b = (Flow().add(name='chunk_seg',
                        replicas=3).add(name='wqncode1', replicas=2).add(
                            name='encode2', replicas=2,
                            needs='chunk_seg').join(['wqncode1', 'encode2']))

        a.save_config('test2.yml')

        c = Flow.load_config('test2.yml')

        self.assertEqual(a, b)
        self.assertEqual(a, c)
        self.add_tmpfile('test2.yml')
コード例 #28
0
def test_flow_with_pod_envs():
    f = Flow.load_config('yaml/flow-with-envs.yml')

    class EnvChecker1(BaseExecutor):
        """Class used in Flow YAML"""
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            # pea/pod-specific
            assert os.environ['key1'] == 'value1'
            assert os.environ['key2'] == 'value2'
            # inherit from parent process
            assert os.environ['key_parent'] == 'value3'

    class EnvChecker2(BaseExecutor):
        """Class used in Flow YAML"""
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            # pea/pod-specific
            assert 'key1' not in os.environ
            assert 'key2' not in os.environ
            # inherit from parent process
            assert os.environ['key_parent'] == 'value3'

    with f:
        pass
コード例 #29
0
def test_flow_log_server():
    f = Flow.load_config(str(cur_dir.parent / 'yaml' / 'test_log_server.yml'))
    with f:
        assert hasattr(JINA_GLOBAL.logserver, 'ready')

        # Ready endpoint
        a = requests.get(JINA_GLOBAL.logserver.address + '/status/ready',
                         timeout=5)
        assert a.status_code == 200

        # YAML endpoint
        a = requests.get(JINA_GLOBAL.logserver.address + '/data/yaml',
                         timeout=5)
        assert a.text.startswith('!Flow')
        assert a.status_code == 200

        # Pod endpoint
        a = requests.get(JINA_GLOBAL.logserver.address + '/data/api/pod',
                         timeout=5)
        assert 'pod' in a.json()
        assert a.status_code == 200

        # Shutdown endpoint
        a = requests.get(JINA_GLOBAL.logserver.address + '/action/shutdown',
                         timeout=5)
        assert a.status_code == 200

        # Check ready endpoint after shutdown, check if server stopped
        with pytest.raises((requests.exceptions.ConnectionError,
                            requests.exceptions.ReadTimeout)):
            requests.get(JINA_GLOBAL.logserver.address + '/status/ready',
                         timeout=5)
コード例 #30
0
ファイル: test_flow.py プロジェクト: zatcsc/jina
def test_load_flow_from_yaml():
    with open(os.path.join(cur_dir, '../yaml/test-flow.yml')) as fp:
        a = Flow.load_config(fp)
        with open(os.path.join(cur_dir, '../yaml/swarm-out.yml'),
                  'w') as fp, a:
            a.to_swarm_yaml(fp)
        rm_files([os.path.join(cur_dir, '../yaml/swarm-out.yml')])