def get_retriever(retriever_type, document_store): if retriever_type == "dpr": retriever = DensePassageRetriever( document_store=document_store, query_embedding_model= "facebook/dpr-question_encoder-single-nq-base", passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base", use_gpu=False, embed_title=True) elif retriever_type == "tfidf": retriever = TfidfRetriever(document_store=document_store) retriever.fit() elif retriever_type == "embedding": retriever = EmbeddingRetriever(document_store=document_store, embedding_model="deepset/sentence_bert", use_gpu=False) elif retriever_type == "elasticsearch": retriever = ElasticsearchRetriever(document_store=document_store) elif retriever_type == "es_filter_only": retriever = ElasticsearchFilterOnlyRetriever( document_store=document_store) else: raise Exception(f"No retriever fixture for '{retriever_type}'") return retriever
def test_dummy_retriever(document_store_with_docs): from haystack.retriever.sparse import ElasticsearchFilterOnlyRetriever retriever = ElasticsearchFilterOnlyRetriever(document_store_with_docs) result = retriever.retrieve(query="godzilla", filters={"name": ["filename1"]}, top_k=1) assert type(result[0]) == Document assert result[0].text == "My name is Carla and I live in Berlin" assert result[0].meta["name"] == "filename1" result = retriever.retrieve(query="godzilla", filters={"name": ["filename1"]}, top_k=5) assert type(result[0]) == Document assert result[0].text == "My name is Carla and I live in Berlin" assert result[0].meta["name"] == "filename1" result = retriever.retrieve(query="godzilla", filters={"name": ["filename3"]}, top_k=5) assert type(result[0]) == Document assert result[0].text == "My name is Christelle and I live in Paris" assert result[0].meta["name"] == "filename3"
search_fields=SEARCH_FIELD_NAME, embedding_dim=EMBEDDING_DIM, embedding_field=EMBEDDING_FIELD_NAME, excluded_meta_data=EXCLUDE_META_DATA_FIELDS, # type: ignore faq_question_field=FAQ_QUESTION_FIELD_NAME, ) if RETRIEVER_TYPE == "EmbeddingRetriever": retriever = EmbeddingRetriever(document_store=document_store, embedding_model=EMBEDDING_MODEL_PATH, model_format=EMBEDDING_MODEL_FORMAT, use_gpu=USE_GPU) # type: BaseRetriever elif RETRIEVER_TYPE == "ElasticsearchRetriever": retriever = ElasticsearchRetriever(document_store=document_store) elif RETRIEVER_TYPE is None or RETRIEVER_TYPE == "ElasticsearchFilterOnlyRetriever": retriever = ElasticsearchFilterOnlyRetriever(document_store=document_store) else: raise ValueError( f"Could not load Retriever of type '{RETRIEVER_TYPE}'. " f"Please adjust RETRIEVER_TYPE to one of: " f"'EmbeddingRetriever', 'ElasticsearchRetriever', 'ElasticsearchFilterOnlyRetriever', None" f"OR modify rest_api/search.py to support your retriever") if READER_MODEL_PATH: # for extractive doc-qa if READER_TYPE == "TransformersReader": use_gpu = -1 if not USE_GPU else GPU_NUMBER reader = TransformersReader( model=str(READER_MODEL_PATH), use_gpu=use_gpu, context_window_size=CONTEXT_WINDOW_SIZE, tokenizer=str(READER_TOKENIZER)) # type: Optional[FARMReader]