def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     doc1 = jina_pb2.Document()
     doc1.id = '01'
     doc1.tags['groundtruth'] = True
     doc2 = jina_pb2.Document()
     doc2.id = '02'
     doc2.tags['groundtruth'] = True
     doc4 = jina_pb2.Document()
     doc4.id = '04'
     doc4.tags['groundtruth'] = True
     self.db = {
         uid.id2hash(doc1.id): doc1.SerializeToString(),
         uid.id2hash(doc2.id): doc2.SerializeToString(),
         uid.id2hash(doc4.id): doc4.SerializeToString()
     }
def test_kv_index_driver(mock_groundtruth_indexer, simple_kv_indexer_driver, documents):
    simple_kv_indexer_driver.attach(executor=mock_groundtruth_indexer, pea=None)
    simple_kv_indexer_driver._apply_all(documents)

    assert len(mock_groundtruth_indexer.docs) == 5
    for idx, doc in enumerate(documents):
        assert mock_groundtruth_indexer.docs[uid.id2hash(doc.id)] == doc.SerializeToString()
Exemple #3
0
def test_redis_db_indexer(metas):
    num_docs = 5
    docs = list(random_docs(num_docs=num_docs, chunks_per_doc=3))
    keys = [uid.id2hash(doc.id) for doc in docs]
    values = [doc.SerializeToString() for doc in docs]

    query_index = random.randint(0, num_docs - 1)
    query_id = docs[query_index].id
    query_key = uid.id2hash(query_id)
    query_text = docs[query_index].text

    with RedisDBIndexer(metas=metas) as idx:
        idx.add(keys=keys, values=values)

    with RedisDBIndexer(metas=metas) as redis_query:
        query_results = redis_query.query(key=query_key)
        for result in query_results:
            assert result is not None
            assert result['key'] == str(query_key).encode()
            d = jina_pb2.Document()
            d.ParseFromString(result['values'])
            assert d.text == query_text
Exemple #4
0
def test_cache_driver_from_file():
    docs = list(random_docs(10))
    with open(filename, 'wb') as fp:
        fp.write(
            np.array([uid.id2hash(d.id) for d in docs],
                     dtype=np.int64).tobytes())

    driver = MockCacheDriver()
    with DocIDCache(filename) as executor:
        assert not executor.handler_mutex
        driver.attach(executor=executor, pea=None)

        with pytest.raises(NotImplementedError):
            # duplicate docs
            driver._traverse_apply(docs)

        # new docs
        docs = list(random_docs(10))
        driver._traverse_apply(docs)

        # check persistence
        assert os.path.exists(filename)
        rm_files([filename])