def test_tfidf_retriever():
    from haystack.retriever.tfidf import TfidfRetriever

    test_docs = [{
        "name": "testing the finder 1",
        "text": "godzilla says hello"
    }, {
        "name": "testing the finder 2",
        "text": "optimus prime says bye"
    }, {
        "name": "testing the finder 3",
        "text": "alien says arghh"
    }]

    from haystack.database.memory import InMemoryDocumentStore
    document_store = InMemoryDocumentStore()
    document_store.write_documents(test_docs)

    retriever = TfidfRetriever(document_store)
    retriever.fit()
    assert retriever.retrieve("godzilla", top_k=1) == [
        Document(id='0',
                 text='godzilla says hello',
                 external_source_id=None,
                 question=None,
                 query_score=None,
                 meta={})
    ]
def test_finder_get_answers_with_in_memory_store():
    test_docs = [{
        "name": "testing the finder 1",
        "text": "testing the finder with pyhton unit test 1",
        'meta': {
            'url': 'url'
        }
    }, {
        "name": "testing the finder 2",
        "text": "testing the finder with pyhton unit test 2",
        'meta': {
            'url': 'url'
        }
    }, {
        "name": "testing the finder 3",
        "text": "testing the finder with pyhton unit test 3",
        'meta': {
            'url': 'url'
        }
    }]

    from haystack.database.memory import InMemoryDocumentStore
    document_store = InMemoryDocumentStore()
    document_store.write_documents(test_docs)

    retriever = TfidfRetriever(document_store=document_store)
    reader = TransformersReader(
        model="distilbert-base-uncased-distilled-squad",
        tokenizer="distilbert-base-uncased",
        use_gpu=-1)
    finder = Finder(reader, retriever)
    prediction = finder.get_answers(question="testing finder",
                                    top_k_retriever=10,
                                    top_k_reader=5)
    assert prediction is not None
Exemple #3
0
def test_finder_get_answers():
    test_docs = [
        {"name": "testing the finder 1", "text": "testing the finder with pyhton unit test 1", "meta": {"test": "test"}},
        {"name": "testing the finder 2", "text": "testing the finder with pyhton unit test 2", "meta": {"test": "test"}},
        {"name": "testing the finder 3", "text": "testing the finder with pyhton unit test 3", "meta": {"test": "test"}}
    ]

    document_store = SQLDocumentStore(url="sqlite:///qa_test.db")
    document_store.write_documents(test_docs)
    retriever = TfidfRetriever(document_store=document_store)
    reader = TransformersReader(model="distilbert-base-uncased-distilled-squad",
                                tokenizer="distilbert-base-uncased", use_gpu=-1)
    finder = Finder(reader, retriever)
    prediction = finder.get_answers(question="testing finder", top_k_retriever=10,
                                    top_k_reader=5)
    assert prediction is not None
Exemple #4
0
    def qa(self, question, text_field):
        document_store = ElasticsearchDocumentStore(host=ES_HOST,
                                                    username=ES_USERNAME,
                                                    password=ES_PASSWORD,
                                                    index=self.ELASTIC_INDEX,
                                                    text_field=text_field)
        retriever = TfidfRetriever(document_store=document_store)

        reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2",
                            use_gpu=False)

        finder = Finder(reader, retriever)
        prediction = finder.get_answers(question=question,
                                        top_k_retriever=1,
                                        top_k_reader=5)

        return prediction
Exemple #5
0
)




if EMBEDDING_MODEL_PATH:
    retriever = EmbeddingRetriever(
        document_store=document_store,
        embedding_model=EMBEDDING_MODEL_PATH,
        model_format=EMBEDDING_MODEL_FORMAT,
        gpu=USE_GPU
    )  # type: BaseRetriever
else:
    retriever = ElasticsearchRetriever(document_store=document_store)'''
documentstore = SQLDocumentStore(url="sqlite:///qa.db")
retriever = TfidfRetriever(document_store=documentstore)

if READER_MODEL_PATH:  # for extractive doc-qa
    '''reader = FARMReader(
        model_name_or_path=str(READER_MODEL_PATH),
        batch_size=BATCHSIZE,
        use_gpu=USE_GPU,
        context_window_size=CONTEXT_WINDOW_SIZE,
        top_k_per_candidate=TOP_K_PER_CANDIDATE,
        no_ans_boost=NO_ANS_BOOST,
        num_processes=MAX_PROCESSES,
        max_seq_len=MAX_SEQ_LEN,
        doc_stride=DOC_STRIDE,
    )  # type: Optional[FARMReader]'''

    reader = TransformersReader(use_gpu=-1)
Exemple #6
0
## Indexing & cleaning documents
# Init a database (default: sqllite)

model_paths = []
for model_dir in MODELS_DIRS:
    path = Path(model_dir)
    if path.is_dir():
        models = [f for f in path.iterdir() if f.is_dir()]
        model_paths.extend(models)
#model_paths = [Path('./model')]

if len(model_paths) == 0:
    logger.error(
        f"Could not find any model to load. Checked folders: {MODELS_DIRS}")

retriever = TfidfRetriever()
FINDERS = {}
for idx, model_dir in enumerate(model_paths, start=1):
    reader = FARMReader(model_dir=str(model_dir),
                        batch_size=BATCH_SIZE,
                        use_gpu=USE_GPU)
    FINDERS[idx] = Finder(reader, retriever)
    logger.info(f"Initialized Finder (ID={idx}) with model '{model_dir}'")

logger.info(
    "Open http://127.0.0.1:8000/docs to see Swagger API Documentation.")
logger.info(
    """ Or just try it out directly: curl --request POST --url 'http://127.0.0.1:8000/finders/1/ask' --data '{"question": "Who is the father of Arya Starck?"}'"""
)

#############################################
Exemple #7
0
MODELS_DIRS = ["saved_models", "models", "model"]
USE_GPU = False
BATCH_SIZE = 16
DATABASE_URL = "sqlite:///qa.db"
MODEL_PATHS = ['deepset/bert-base-cased-squad2']

app = FastAPI(title="Haystack API", version="0.1")

if len(MODEL_PATHS) == 0:
    logger.error(
        f"No model to load. Please specify one via MODEL_PATHS (e.g. ['deepset/bert-base-cased-squad2']"
    )

datastore = SQLDocumentStore(url=DATABASE_URL)
retriever = TfidfRetriever(datastore=datastore)

FINDERS = {}
for idx, model_dir in enumerate(MODEL_PATHS, start=1):
    reader = FARMReader(model_name_or_path=str(model_dir),
                        batch_size=BATCH_SIZE,
                        use_gpu=USE_GPU)
    FINDERS[idx] = Finder(reader, retriever)
    logger.info(f"Initialized Finder (ID={idx}) with model '{model_dir}'")

logger.info(
    "Open http://127.0.0.1:8000/docs to see Swagger API Documentation.")
logger.info(
    """ Or just try it out directly: curl --request POST --url 'http://127.0.0.1:8000/finders/1/ask' --data '{"question": "Who is the father of Arya Starck?"}'"""
)