コード例 #1
0
ファイル: test_document.py プロジェクト: vishalbelsare/jina
def test_content_hash():
    d0 = Document(content='a')
    assert d0.content

    empty_doc = Document()
    assert not empty_doc.content
    assert empty_doc.content_hash

    # warning: a Doc with empty content will have a hash -- it hashes ''
    assert empty_doc.content_hash != d0.content_hash

    d1 = Document(content='text')
    init_content_hash = d1.content_hash
    assert init_content_hash
    assert init_content_hash == d1.content_hash

    d2 = Document(content='text')
    assert init_content_hash == d2.content_hash

    d3 = Document(content='text1')
    assert init_content_hash != d3.content_hash

    d4 = Document(id='a')
    d5 = Document(id='b')
    assert d5.content_hash == d4.content_hash

    d6 = Document(d2.proto)
    assert d6.content_hash == d2.content_hash

    d7 = Document(d2)
    assert d6.content_hash == d2.content_hash == d7.content_hash

    # test hash image
    d8 = Document(blob=np.array([1, 3, 5]))
    d9 = Document(blob=np.array([2, 4, 6]))
    d10 = Document(blob=np.array([1, 3, 5]))
    assert d8.content_hash != d9.content_hash
    assert d8.content_hash == d10.content_hash

    # test hash buffer
    d11 = Document(content=b'buffer1')
    d12 = Document(content=b'buffer2')
    d13 = Document(content=b'buffer1')
    assert d11.content_hash != d12.content_hash
    assert d11.content_hash == d13.content_hash

    # document with more fields
    d14 = Document(uri='http://test1.com',
                   tags={'key1': 'value1'},
                   granularity=2,
                   adjacency=2)
    d15 = Document(uri='http://test2.com',
                   tags={'key1': 'value2'},
                   granularity=3,
                   adjacency=2)
    d16 = Document(uri='http://test2.com',
                   tags={'key1': 'value2'},
                   granularity=3,
                   adjacency=2)
    assert d14.content_hash != d15.content_hash
    assert d15.content_hash == d16.content_hash

    nr = 10
    with TimeContext(f'creating {nr} docs without hashing content at init'):
        da = DocumentArray()
        for _ in range(nr):
            d = Document(content='text' * 2)
            da.append(d)

    with TimeContext(f'creating {nr} docs with hashing content at init'):
        da = DocumentArray()
        for _ in range(nr):
            d = Document(content='text' * 2)
            da.append(d)

        with TimeContext(f'iterating through docs with content hash'):
            for d in da:
                assert d.content_hash
コード例 #2
0
def test_time_context():
    with TimeContext('dummy') as tc:
        time.sleep(2)

    assert int(tc.duration) == 2
    assert tc.readable_duration == '2 seconds'
コード例 #3
0
ファイル: test_dump_dbms.py プロジェクト: yaneshtyagi/jina
def assert_dump_data(dump_path, docs, shards, pea_id):
    size_shard = len(docs) // shards
    size_shard_modulus = len(docs) % shards
    ids_dump, vectors_dump = import_vectors(
        dump_path,
        str(pea_id),
    )
    if pea_id == shards - 1:
        docs_expected = docs[(pea_id) * size_shard:(pea_id + 1) * size_shard +
                             size_shard_modulus]
    else:
        docs_expected = docs[(pea_id) * size_shard:(pea_id + 1) * size_shard]
    print(f'### pea {pea_id} has {len(docs_expected)} docs')

    ids_dump = list(ids_dump)
    vectors_dump = list(vectors_dump)
    np.testing.assert_equal(ids_dump, [d.id for d in docs_expected])
    np.testing.assert_allclose(vectors_dump,
                               [d.embedding for d in docs_expected])

    _, metas_dump = import_metas(
        dump_path,
        str(pea_id),
    )
    metas_dump = list(metas_dump)
    np.testing.assert_equal(
        metas_dump,
        [
            DBMSIndexDriver._doc_without_embedding(d).SerializeToString()
            for d in docs_expected
        ],
    )

    # assert with Indexers
    # TODO currently metas are only passed to the parent Compound, not to the inner components
    with TimeContext(f'### reloading {len(docs_expected)}'):
        # noinspection PyTypeChecker
        cp: CompoundQueryExecutor = BaseQueryIndexer.load_config(
            'indexer_query.yml',
            pea_id=pea_id,
            metas={
                'workspace': os.path.join(dump_path, 'new_ws'),
                'dump_path': dump_path,
            },
        )
    for c in cp.components:
        assert c.size == len(docs_expected)

    # test with the inner indexers separate from the Compound
    for i, indexer_file in enumerate(
        ['basic/query_np.yml', 'basic/query_kv.yml']):
        indexer = BaseQueryIndexer.load_config(
            indexer_file,
            pea_id=pea_id,
            metas={
                'workspace':
                os.path.realpath(os.path.join(dump_path, f'new_ws-{i}')),
                'dump_path':
                dump_path,
            },
        )
        assert indexer.size == len(docs_expected)
コード例 #4
0
def test_flow_slow_executor_inter():
    f = (Flow().add(uses='SlowExecutor', parallel=3).add(uses='SlowExecutor',
                                                         parallel=3))

    with f, TimeContext('start flow') as tc:
        assert tc.now() < 8
コード例 #5
0
ファイル: test_asyncflow.py プロジェクト: yk/jina
async def test_run_async_flow_other_task_concurrent():
    with TimeContext('concurrent await') as t:
        await concurrent_main()

    # some dispatch cost, can't be just 5s, usually at <7s
    assert t.duration < 8
コード例 #6
0
ファイル: test_dump_dbms.py プロジェクト: yaneshtyagi/jina
def test_threading_query_while_reloading(tmpdir, nr_docs, emb_size, mocker,
                                         reraise):
    global operations

    def update_rolling(flow, pod_name, dump_path):
        with reraise:
            flow.rolling_update(pod_name, dump_path)

    # TODO better way to test async procedure call order
    # patch
    def _rolling_update(self, dump_path):
        _print_and_append_to_ops(f'### calling patched rolling update')
        for i in range(len(self.replicas)):
            _print_and_append_to_ops(f'### replica {i} -- starting')
            replica = self.replicas[i]
            replica.close()
            _print_and_append_to_ops(f'### replica {i} -- went offline')
            time.sleep(
                3)  # wait for query to hit system when one replica is offline
            _args = self.replicas_args[i]
            _args.noblock_on_start = False
            _args.dump_path = dump_path
            new_replica = Pod(_args)
            self.enter_context(new_replica)
            _print_and_append_to_ops(f'### replica {i} - new instance online')
            self.replicas[i] = new_replica
            time.sleep(5)

    mocker.patch(
        'jina.peapods.pods.compoundpod.CompoundPod.rolling_update',
        new_callable=lambda: _rolling_update,
    )

    docs = list(get_documents(nr=nr_docs, index_start=0, emb_size=emb_size))
    assert len(docs) == nr_docs
    nr_search = 3

    dump_path = os.path.join(str(tmpdir), 'dump_dir')
    os.environ['DBMS_WORKSPACE'] = os.path.join(str(tmpdir), 'index_ws')
    os.environ['QUERY_WORKSPACE'] = os.path.join(str(tmpdir), 'query_ws')

    os.environ['USES_AFTER'] = '_pass'
    os.environ['QUERY_SHARDS'] = str(1)

    with Flow.load_config('flow_dbms.yml') as flow_dbms:
        with Flow.load_config('flow_query.yml') as flow_query:
            client_dbms = get_client(flow_dbms.port_expose)
            client_query = get_client(flow_query.port_expose)

            with TimeContext(f'### indexing {len(docs)} docs'):
                client_dbms.index(docs)

            with TimeContext(f'### dumping {len(docs)} docs'):
                flow_dbms.dump('indexer_dbms', dump_path=dump_path, shards=1)

            dir_size = path_size(dump_path)
            print(f'### dump path size: {dir_size} MBs')

            # test with query while reloading async.
            t = Thread(target=update_rolling,
                       args=(flow_query, 'indexer_query', dump_path))

            # searching on the still empty replica
            t.start()
            time.sleep(1)  # wait a bit for replica 1 to be offline
            _print_and_append_to_ops(f'### querying -- expecting empty')
            result = client_query.search(docs[:nr_search], )
            _validate_results_empty(result[0])

            t.join()

            # done with both -- we should have matches now
            cb = functools.partial(_validate_results_nonempty, nr_search,
                                   nr_docs, emb_size)

            _print_and_append_to_ops(f'### querying -- expecting data')
            result = client_query.search(docs[:nr_search], )
            cb(result[0])

    # collect logs and assert order of operations
    assert _assert_order_ops(
        operations,
        [
            '### replica 0 -- went offline',
            '### querying -- expecting empty',
            '### replica 0 - new instance online',
            '### replica 1 -- went offline',
            '### replica 1 - new instance online',
            '### querying -- expecting data',
        ],
    )
    operations = []
コード例 #7
0
ファイル: test_asyncflow.py プロジェクト: yk/jina
async def test_run_async_flow_other_task_sequential():
    with TimeContext('sequential await') as t:
        await sequential_main()

    assert t.duration >= 10
コード例 #8
0
async def test_run_async_flow_other_task_concurrent(protocol):
    with TimeContext('concurrent await') as t:
        await concurrent_main(protocol)

    # some dispatch cost, can't be just 5s, usually at 7~8s, but must <10s
    assert t.duration < 10
コード例 #9
0
ファイル: test_flow_start_noblock.py プロジェクト: srbhr/jina
def test_flow_slow_executor_inter():
    f = Flow().add(uses='SlowExecutor', shards=3).add(uses='SlowExecutor',
                                                      shards=3)

    with f, TimeContext('start flow') as tc:
        assert tc.now() < 8
コード例 #10
0
ファイル: app.py プロジェクト: jina-ai/legacy-examples
def index(num_doc, target: dict):
    f = Flow.load_config('flows/index.yml')
    with f:
        with TimeContext(f'QPS: indexing {num_doc}', logger=f.logger):
            f.index(index_generator(num_doc, target), request_size=2048)
コード例 #11
0
def index():
    f = Flow.load_config('flows/index.yml')
    with f:
        input_docs = input_fn()
        with TimeContext(f'QPS: indexing {len(input_docs)}', logger=f.logger):
            f.index(input_docs, request_size=8)