def test_faiss_index_save_and_load(tmp_path):
    document_store = FAISSDocumentStore(
        sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
        index="haystack_test",
    )
    document_store.write_documents(DOCUMENTS)

    # test saving the index
    document_store.save(tmp_path / "haystack_test_faiss")

    # clear existing faiss_index
    document_store.faiss_indexes[document_store.index].reset()

    # test faiss index is cleared
    assert document_store.faiss_indexes[document_store.index].ntotal == 0

    # test loading the index
    new_document_store = FAISSDocumentStore.load(
        sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
        faiss_file_path=tmp_path / "haystack_test_faiss",
        index=document_store.index)

    # check faiss index is restored
    assert new_document_store.faiss_indexes[
        document_store.index].ntotal == len(DOCUMENTS)
    # check if documents are restored
    assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
Example #2
0
#                                   max_seq_len_passage=256,
#                                   batch_size=16,
#                                   use_gpu=True,
#                                   embed_title=True,
#                                   use_fast_tokenizers=True)


# Get dataframe with columns "question", "answer" and some custom metadata
df = pd.read_csv("faq.csv")
# Minimal cleaning
df.fillna(value="", inplace=True)
df["question"] = df["question"].apply(lambda x: x.strip())
print(df.head())

# Get embeddings for our questions from the FAQs
# questions = list(df["question"].values)
# df["question_emb"] = retriever2.embed_queries(texts=questions)
# text is the field to be converted to embeddings
df = df.rename(columns={"question": "text"})

# Convert Dataframe to list of dicts and index them in our DocumentStore
docs_to_index = df.to_dict(orient="records")
document_store.delete_all_documents()
document_store.write_documents(docs_to_index)


retriever2 = EmbeddingRetriever(
    document_store=document_store, embedding_model="sentence_bert-saved", use_gpu=False)
document_store.update_embeddings(retriever2)
document_store.save('faiss2')