def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) doc1 = jina_pb2.Document() doc1.id = '01' doc1.tags['groundtruth'] = True doc2 = jina_pb2.Document() doc2.id = '02' doc2.tags['groundtruth'] = True doc4 = jina_pb2.Document() doc4.id = '04' doc4.tags['groundtruth'] = True self.db = { uid.id2hash(doc1.id): doc1.SerializeToString(), uid.id2hash(doc2.id): doc2.SerializeToString(), uid.id2hash(doc4.id): doc4.SerializeToString() }
def test_kv_index_driver(mock_groundtruth_indexer, simple_kv_indexer_driver, documents): simple_kv_indexer_driver.attach(executor=mock_groundtruth_indexer, pea=None) simple_kv_indexer_driver._apply_all(documents) assert len(mock_groundtruth_indexer.docs) == 5 for idx, doc in enumerate(documents): assert mock_groundtruth_indexer.docs[uid.id2hash(doc.id)] == doc.SerializeToString()
def test_redis_db_indexer(metas): num_docs = 5 docs = list(random_docs(num_docs=num_docs, chunks_per_doc=3)) keys = [uid.id2hash(doc.id) for doc in docs] values = [doc.SerializeToString() for doc in docs] query_index = random.randint(0, num_docs - 1) query_id = docs[query_index].id query_key = uid.id2hash(query_id) query_text = docs[query_index].text with RedisDBIndexer(metas=metas) as idx: idx.add(keys=keys, values=values) with RedisDBIndexer(metas=metas) as redis_query: query_results = redis_query.query(key=query_key) for result in query_results: assert result is not None assert result['key'] == str(query_key).encode() d = jina_pb2.Document() d.ParseFromString(result['values']) assert d.text == query_text
def test_cache_driver_from_file(): docs = list(random_docs(10)) with open(filename, 'wb') as fp: fp.write( np.array([uid.id2hash(d.id) for d in docs], dtype=np.int64).tobytes()) driver = MockCacheDriver() with DocIDCache(filename) as executor: assert not executor.handler_mutex driver.attach(executor=executor, pea=None) with pytest.raises(NotImplementedError): # duplicate docs driver._traverse_apply(docs) # new docs docs = list(random_docs(10)) driver._traverse_apply(docs) # check persistence assert os.path.exists(filename) rm_files([filename])