def load(self): if(self.finder and self.finder2): return if(not self.document_store2): self.document_store2 = FAISSDocumentStore.load( sql_url=sqlUrlFAQ, faiss_file_path='faiss2') # save before load in preprocess self.initSql(url=sqlUrlFAQ, document_store=self.document_store2) # else: # reset session # # self.document_store2.session.close() # super( # FAISSDocumentStore, self.document_store2).__init__(url=sqlUrlFAQ) if(not self.retriever2): self.retriever2 = EmbeddingRetriever(document_store=self.document_store2, embedding_model="sentence_bert-saved", use_gpu=False) if(not self.finder2): self.finder2 = Finder(reader=None, retriever=self.retriever2) if(not self.document_store): self.document_store = SQLDocumentStore(url=sqlUrl) #FAISSDocumentStore.load(faiss_file_path='faiss1', sql_url=sqlUrl) self.initSql(url=sqlUrl, document_store=self.document_store) # else: # reset session # # self.document_store.session.close() # super( # FAISSDocumentStore, self.document_store).__init__(url=sqlUrl) # self.retriever = EmbeddingRetriever( #redice load by sharing the same retriever and set store on fly?? # document_store=self.document_store, embedding_model="sentence_bert-saved", use_gpu=False) if not self.retriever else self.retriever if(not self.retriever): self.retriever = TfidfRetriever(document_store=self.document_store) self.reader = FARMReader(model_name_or_path=modelDir, use_gpu=False, no_ans_boost=0) if not self.reader else self.reader # reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1) self.finder = Finder( self.reader, self.retriever) if not self.finder else self.finder
def test_faiss_index_save_and_load(tmp_path): document_store = FAISSDocumentStore( sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}", index="haystack_test", ) document_store.write_documents(DOCUMENTS) # test saving the index document_store.save(tmp_path / "haystack_test_faiss") # clear existing faiss_index document_store.faiss_indexes[document_store.index].reset() # test faiss index is cleared assert document_store.faiss_indexes[document_store.index].ntotal == 0 # test loading the index new_document_store = FAISSDocumentStore.load( sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}", faiss_file_path=tmp_path / "haystack_test_faiss", index=document_store.index) # check faiss index is restored assert new_document_store.faiss_indexes[ document_store.index].ntotal == len(DOCUMENTS) # check if documents are restored assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
def test_faiss_index_save_and_load(document_store): document_store.write_documents(DOCUMENTS) # test saving the index document_store.save("haystack_test_faiss") # clear existing faiss_index document_store.faiss_index.reset() # test faiss index is cleared assert document_store.faiss_index.ntotal == 0 # test loading the index new_document_store = FAISSDocumentStore.load( sql_url="sqlite://", faiss_file_path="haystack_test_faiss") # check faiss index is restored assert new_document_store.faiss_index.ntotal == len(DOCUMENTS)
import time from elasticsearch import Elasticsearch es_server = Popen(['/home/dr_lunars/elasticsearch-7.0.0/bin/elasticsearch'],stdout=PIPE, stderr=STDOUT) time.sleep(30) es = Elasticsearch("http://localhost:9200", timeout=300, max_retries=10, retry_on_timeout=True) daily_score = 0 # %% # DPR from haystack.document_store.faiss import FAISSDocumentStore document_store = FAISSDocumentStore.load(faiss_file_path="my_faiss", sql_url="sqlite:///my_doc_store.db", index="document") from dpr_inference import DPR model_path = '/home/dr_lunars/models/question_encoder-optimized-quantized.onnx' tokenizer_path = "kykim/bert-kor-base" dpr = DPR( model_path=model_path, tokenizer_path=tokenizer_path, document_store=document_store ) # %% # Reader