Ejemplo n.º 1
0
def get_retriever(retriever_type, document_store):

    if retriever_type == "dpr":
        retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model=
            "facebook/dpr-question_encoder-single-nq-base",
            passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
            use_gpu=False,
            embed_title=True)
    elif retriever_type == "tfidf":
        retriever = TfidfRetriever(document_store=document_store)
        retriever.fit()
    elif retriever_type == "embedding":
        retriever = EmbeddingRetriever(document_store=document_store,
                                       embedding_model="deepset/sentence_bert",
                                       use_gpu=False)
    elif retriever_type == "elasticsearch":
        retriever = ElasticsearchRetriever(document_store=document_store)
    elif retriever_type == "es_filter_only":
        retriever = ElasticsearchFilterOnlyRetriever(
            document_store=document_store)
    else:
        raise Exception(f"No retriever fixture for '{retriever_type}'")

    return retriever
Ejemplo n.º 2
0
def test_dummy_retriever(document_store_with_docs):
    from haystack.retriever.sparse import ElasticsearchFilterOnlyRetriever
    retriever = ElasticsearchFilterOnlyRetriever(document_store_with_docs)

    result = retriever.retrieve(query="godzilla", filters={"name": ["filename1"]}, top_k=1)
    assert type(result[0]) == Document
    assert result[0].text == "My name is Carla and I live in Berlin"
    assert result[0].meta["name"] == "filename1"

    result = retriever.retrieve(query="godzilla", filters={"name": ["filename1"]}, top_k=5)
    assert type(result[0]) == Document
    assert result[0].text == "My name is Carla and I live in Berlin"
    assert result[0].meta["name"] == "filename1"

    result = retriever.retrieve(query="godzilla", filters={"name": ["filename3"]}, top_k=5)
    assert type(result[0]) == Document
    assert result[0].text == "My name is Christelle and I live in Paris"
    assert result[0].meta["name"] == "filename3"
Ejemplo n.º 3
0
    search_fields=SEARCH_FIELD_NAME,
    embedding_dim=EMBEDDING_DIM,
    embedding_field=EMBEDDING_FIELD_NAME,
    excluded_meta_data=EXCLUDE_META_DATA_FIELDS,  # type: ignore
    faq_question_field=FAQ_QUESTION_FIELD_NAME,
)

if RETRIEVER_TYPE == "EmbeddingRetriever":
    retriever = EmbeddingRetriever(document_store=document_store,
                                   embedding_model=EMBEDDING_MODEL_PATH,
                                   model_format=EMBEDDING_MODEL_FORMAT,
                                   use_gpu=USE_GPU)  # type: BaseRetriever
elif RETRIEVER_TYPE == "ElasticsearchRetriever":
    retriever = ElasticsearchRetriever(document_store=document_store)
elif RETRIEVER_TYPE is None or RETRIEVER_TYPE == "ElasticsearchFilterOnlyRetriever":
    retriever = ElasticsearchFilterOnlyRetriever(document_store=document_store)
else:
    raise ValueError(
        f"Could not load Retriever of type '{RETRIEVER_TYPE}'. "
        f"Please adjust RETRIEVER_TYPE to one of: "
        f"'EmbeddingRetriever', 'ElasticsearchRetriever', 'ElasticsearchFilterOnlyRetriever', None"
        f"OR modify rest_api/search.py to support your retriever")

if READER_MODEL_PATH:  # for extractive doc-qa
    if READER_TYPE == "TransformersReader":
        use_gpu = -1 if not USE_GPU else GPU_NUMBER
        reader = TransformersReader(
            model=str(READER_MODEL_PATH),
            use_gpu=use_gpu,
            context_window_size=CONTEXT_WINDOW_SIZE,
            tokenizer=str(READER_TOKENIZER))  # type: Optional[FARMReader]