Example #1
0
    def load(self):
        if(self.finder and self.finder2):
            return
        if(not self.document_store2):
            self.document_store2 = FAISSDocumentStore.load(
                sql_url=sqlUrlFAQ, faiss_file_path='faiss2')  # save before load in preprocess
            self.initSql(url=sqlUrlFAQ, document_store=self.document_store2)
        # else:  # reset session
        #     # self.document_store2.session.close()
        #     super(
        #         FAISSDocumentStore, self.document_store2).__init__(url=sqlUrlFAQ)
        if(not self.retriever2):
            self.retriever2 = EmbeddingRetriever(document_store=self.document_store2,
                                                 embedding_model="sentence_bert-saved", use_gpu=False)
        if(not self.finder2):
            self.finder2 = Finder(reader=None, retriever=self.retriever2)

        if(not self.document_store):
            self.document_store = SQLDocumentStore(url=sqlUrl)  
            #FAISSDocumentStore.load(faiss_file_path='faiss1', sql_url=sqlUrl)
                                                          
            self.initSql(url=sqlUrl, document_store=self.document_store)
        # else:  # reset session
        #     # self.document_store.session.close()
        #     super(
        #         FAISSDocumentStore, self.document_store).__init__(url=sqlUrl)
        # self.retriever = EmbeddingRetriever( #redice load by sharing the same retriever and set store on fly??
        #     document_store=self.document_store, embedding_model="sentence_bert-saved", use_gpu=False) if not self.retriever else self.retriever
        if(not self.retriever):
            self.retriever = TfidfRetriever(document_store=self.document_store)
        self.reader = FARMReader(model_name_or_path=modelDir,
                                 use_gpu=False, no_ans_boost=0) if not self.reader else self.reader
        # reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
        self.finder = Finder(
            self.reader, self.retriever) if not self.finder else self.finder
def test_faiss_index_save_and_load(tmp_path):
    document_store = FAISSDocumentStore(
        sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
        index="haystack_test",
    )
    document_store.write_documents(DOCUMENTS)

    # test saving the index
    document_store.save(tmp_path / "haystack_test_faiss")

    # clear existing faiss_index
    document_store.faiss_indexes[document_store.index].reset()

    # test faiss index is cleared
    assert document_store.faiss_indexes[document_store.index].ntotal == 0

    # test loading the index
    new_document_store = FAISSDocumentStore.load(
        sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
        faiss_file_path=tmp_path / "haystack_test_faiss",
        index=document_store.index)

    # check faiss index is restored
    assert new_document_store.faiss_indexes[
        document_store.index].ntotal == len(DOCUMENTS)
    # check if documents are restored
    assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
Example #3
0
def test_faiss_index_save_and_load(document_store):
    document_store.write_documents(DOCUMENTS)

    # test saving the index
    document_store.save("haystack_test_faiss")

    # clear existing faiss_index
    document_store.faiss_index.reset()

    # test faiss index is cleared
    assert document_store.faiss_index.ntotal == 0

    # test loading the index
    new_document_store = FAISSDocumentStore.load(
        sql_url="sqlite://", faiss_file_path="haystack_test_faiss")

    # check faiss index is restored
    assert new_document_store.faiss_index.ntotal == len(DOCUMENTS)
Example #4
0
import time
from elasticsearch import Elasticsearch

es_server = Popen(['/home/dr_lunars/elasticsearch-7.0.0/bin/elasticsearch'],stdout=PIPE, stderr=STDOUT)

time.sleep(30)

es = Elasticsearch("http://localhost:9200", timeout=300, max_retries=10, retry_on_timeout=True)

daily_score = 0

# %%
# DPR

from haystack.document_store.faiss import FAISSDocumentStore
document_store = FAISSDocumentStore.load(faiss_file_path="my_faiss", sql_url="sqlite:///my_doc_store.db", index="document")

from dpr_inference import DPR

model_path = '/home/dr_lunars/models/question_encoder-optimized-quantized.onnx'
tokenizer_path = "kykim/bert-kor-base"

dpr = DPR(
    model_path=model_path,
    tokenizer_path=tokenizer_path,
    document_store=document_store
)

# %%
# Reader