コード例 #1
0
def test_faiss_retrieving(index_factory):
    document_store = FAISSDocumentStore(
        sql_url="sqlite:///haystack_test_faiss.db",
        faiss_index_factory_str=index_factory)
    document_store.delete_all_documents(index="document")
    if "ivf" in index_factory.lower():
        document_store.train_index(DOCUMENTS)
    document_store.write_documents(DOCUMENTS)
    retriever = EmbeddingRetriever(document_store=document_store,
                                   embedding_model="deepset/sentence_bert",
                                   use_gpu=False)
    result = retriever.retrieve(query="How to test this?")
    assert len(result) == len(DOCUMENTS)
    assert type(result[0]) == Document
コード例 #2
0
def test_faiss_passing_index_from_outside():
    d = 768
    nlist = 2
    quantizer = faiss.IndexFlatIP(d)
    faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist,
                                     faiss.METRIC_INNER_PRODUCT)
    faiss_index.nprobe = 2
    document_store = FAISSDocumentStore(
        sql_url="sqlite:///haystack_test_faiss.db", faiss_index=faiss_index)

    document_store.delete_all_documents(index="document")
    # as it is a IVF index we need to train it before adding docs
    document_store.train_index(DOCUMENTS)

    document_store.write_documents(documents=DOCUMENTS, index="document")
    documents_indexed = document_store.get_all_documents(index="document")

    # test document correctness
    check_data_correctness(documents_indexed, DOCUMENTS)
コード例 #3
0
def test_faiss_passing_index_from_outside():
    d = 768
    nlist = 2
    quantizer = faiss.IndexFlatIP(d)
    faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist,
                                     faiss.METRIC_INNER_PRODUCT)
    faiss_index.set_direct_map_type(faiss.DirectMap.Hashtable)
    faiss_index.nprobe = 2
    document_store = FAISSDocumentStore(
        sql_url="sqlite:///haystack_test_faiss.db", faiss_index=faiss_index)

    document_store.delete_all_documents(index="document")
    # as it is a IVF index we need to train it before adding docs
    document_store.train_index(DOCUMENTS)

    document_store.write_documents(documents=DOCUMENTS, index="document")
    documents_indexed = document_store.get_all_documents(index="document")

    # test if vectors ids are associated with docs
    for doc in documents_indexed:
        assert 0 <= int(doc.meta["vector_id"]) <= 7