Beispiel #1
0
def test_finder_get_answers():
    test_docs = [{
        "name": "testing the finder 1",
        "text": "testing the finder with pyhton unit test 1",
        "meta": {
            "test": "test"
        }
    }, {
        "name": "testing the finder 2",
        "text": "testing the finder with pyhton unit test 2",
        "meta": {
            "test": "test"
        }
    }, {
        "name": "testing the finder 3",
        "text": "testing the finder with pyhton unit test 3",
        "meta": {
            "test": "test"
        }
    }]

    document_store = SQLDocumentStore(url="sqlite:///qa_test.db")
    document_store.write_documents(test_docs)
    retriever = TfidfRetriever(document_store=document_store)
    reader = TransformersReader(
        model="distilbert-base-uncased-distilled-squad",
        tokenizer="distilbert-base-uncased",
        use_gpu=-1)
    finder = Finder(reader, retriever)
    prediction = finder.get_answers(question="testing finder",
                                    top_k_retriever=10,
                                    top_k_reader=5)
    assert prediction is not None
def test_faq_retriever_in_memory_store():

    from haystack.database.memory import InMemoryDocumentStore
    from haystack.retriever.dense import EmbeddingRetriever

    document_store = InMemoryDocumentStore(embedding_field="embedding")

    documents = [
        {'text': 'By running tox in the command line!', 'meta': {'name': 'How to test this library?', 'question': 'How to test this library?'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
        {'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
    ]

    retriever = EmbeddingRetriever(document_store=document_store, embedding_model="deepset/sentence_bert", gpu=False)

    embedded = []
    for doc in documents:
        doc['embedding'] = retriever.embed([doc['meta']['question']])[0]
        embedded.append(doc)

    document_store.write_documents(embedded)

    finder = Finder(reader=None, retriever=retriever)
    prediction = finder.get_answers_via_similar_questions(question="How to test this?", top_k_retriever=1)

    assert len(prediction.get('answers', [])) == 1
Beispiel #3
0
    def load(self):
        if(self.finder and self.finder2):
            return
        if(not self.document_store2):
            self.document_store2 = FAISSDocumentStore.load(
                sql_url=sqlUrlFAQ, faiss_file_path='faiss2')  # save before load in preprocess
            self.initSql(url=sqlUrlFAQ, document_store=self.document_store2)
        # else:  # reset session
        #     # self.document_store2.session.close()
        #     super(
        #         FAISSDocumentStore, self.document_store2).__init__(url=sqlUrlFAQ)
        if(not self.retriever2):
            self.retriever2 = EmbeddingRetriever(document_store=self.document_store2,
                                                 embedding_model="sentence_bert-saved", use_gpu=False)
        if(not self.finder2):
            self.finder2 = Finder(reader=None, retriever=self.retriever2)

        if(not self.document_store):
            self.document_store = SQLDocumentStore(url=sqlUrl)  
            #FAISSDocumentStore.load(faiss_file_path='faiss1', sql_url=sqlUrl)
                                                          
            self.initSql(url=sqlUrl, document_store=self.document_store)
        # else:  # reset session
        #     # self.document_store.session.close()
        #     super(
        #         FAISSDocumentStore, self.document_store).__init__(url=sqlUrl)
        # self.retriever = EmbeddingRetriever( #redice load by sharing the same retriever and set store on fly??
        #     document_store=self.document_store, embedding_model="sentence_bert-saved", use_gpu=False) if not self.retriever else self.retriever
        if(not self.retriever):
            self.retriever = TfidfRetriever(document_store=self.document_store)
        self.reader = FARMReader(model_name_or_path=modelDir,
                                 use_gpu=False, no_ans_boost=0) if not self.reader else self.reader
        # reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
        self.finder = Finder(
            self.reader, self.retriever) if not self.finder else self.finder
Beispiel #4
0
def test_finder_get_answers_single_result(reader, retriever_with_docs, document_store_with_docs):
    finder = Finder(reader, retriever_with_docs)
    query = "testing finder"
    prediction = finder.get_answers(question=query, top_k_retriever=1,
                                    top_k_reader=1)
    assert prediction is not None
    assert len(prediction["answers"]) == 1
Beispiel #5
0
def test_finder_get_answers_with_in_memory_store():
    test_docs = [{
        "name": "testing the finder 1",
        "text": "testing the finder with pyhton unit test 1",
        'meta': {
            'url': 'url'
        }
    }, {
        "name": "testing the finder 2",
        "text": "testing the finder with pyhton unit test 2",
        'meta': {
            'url': 'url'
        }
    }, {
        "name": "testing the finder 3",
        "text": "testing the finder with pyhton unit test 3",
        'meta': {
            'url': 'url'
        }
    }]

    from haystack.database.memory import InMemoryDocumentStore
    document_store = InMemoryDocumentStore()
    document_store.write_documents(test_docs)

    retriever = TfidfRetriever(document_store=document_store)
    reader = TransformersReader(
        model="distilbert-base-uncased-distilled-squad",
        tokenizer="distilbert-base-uncased",
        use_gpu=-1)
    finder = Finder(reader, retriever)
    prediction = finder.get_answers(question="testing finder",
                                    top_k_retriever=10,
                                    top_k_reader=5)
    assert prediction is not None
Beispiel #6
0
 def __init__(self):
     self.finder = Finder(reader=Reader(model_name_or_path=MODEL_PATH,
                                        tokenizer=MODEL_PATH,
                                        use_gpu=0),
                          retriever=ElasticsearchRetriever(
                              document_store=ElasticsearchDocumentStore(
                                  refresh_type='false')))
Beispiel #7
0
def main():
    POPULATE_DOCUMENT_STORE = True

    document_store = ElasticsearchDocumentStore(host="localhost", username="", password="",
                                                index="document",
                                                text_field="text",
                                                embedding_field="question_emb",
                                                embedding_dim="768",
                                                excluded_meta_data=["question_emb"])

    retriever = EmbeddingRetriever(
        document_store=document_store, embedding_model=os.getcwd() +
        "\\kbQA\\bert-german-model",
        gpu=True, model_format="transformers")

    if POPULATE_DOCUMENT_STORE:
        doc_dir = os.getcwd() + "\\kbQA\\data\\Skripte\\Securplus\\txt"
        dicts = convert_files_to_dicts(
            dir_path=doc_dir, clean_func=clean_text, split_paragraphs=True)

        with open("Output.txt", "w") as text_file:
            text = ""
            for doc in dicts:
                text = text + "\n" + doc["text"]
            text_file.write(text)
        df = pd.DataFrame.from_dict(dicts)

        # Hier muss man aufpassen! Wir erzeugen an dieser Stelle keine embeddings für die questions, sondern für
        # für die Texte, d.h. die Antworten. Daher sind die Namen der Variablen etwas verwirrend gewählt.
        # dummy_questions ist einfach nur eine steigende Zahl beginnend bei eins. Wird benötigt, da sonst Exceptions
        # bei der Suche geschmissen werden.
        # Im Tutorial scheint von einem FAQ ausgegangen zu sein, bei dem Frage und Antwort
        # definiert sind und somit embeddings für die vordefinierte Frage erzeugt werden können und eigentlich nur
        # auf diese basierend, die k-besten Kandidaten zurückgegeben werden. Wir dagegen erzeugen embeddings für
        # jeden einzelnen Text.
        # todo: Da wir für jeden Text embeddings erzeugen müssen wir eventuell eine Sentence Segmentation durchführen,
        #       denn je länger die Texte werden, desto ungenauer werden auch die embeddings. Pro Satz embedding sind
        #       deutlich exakter.
        questions = list(df["text"].values)
        df["question_emb"] = retriever.create_embedding(texts=questions)
        dummy_questions = [f"{no}" for no, x in enumerate(questions, start=1)]
        df["question"] = dummy_questions
        print(df.head())

        docs_to_index = df.to_dict(orient="records")
        document_store.write_documents(docs_to_index)

    # question = "Wie viele haben Angst um ihren Job?"
    question = "welche leistungen sind ausgeschlossen?"
    # auch hier wieder: Kleinschreibung zwingend notwendig!
    question = question.lower()

    # Wir können aktuell keinen Reader verwenden, da diese scheinbar QA fine tuning voraussetzen
    # Der Retriever holt anhand der embeddings die besten Treffer ran.
    # get_answers() ohne reader nicht verwendbar
    finder = Finder(reader=None, retriever=retriever)
    prediction = finder.get_answers_via_similar_questions(
        question, top_k_retriever=5)
    print_answers(prediction, details="all")
Beispiel #8
0
def test_finder_get_answers_single_result(reader, document_store_with_docs):
    retriever = TfidfRetriever(document_store=document_store_with_docs)
    finder = Finder(reader, retriever)
    prediction = finder.get_answers(question="testing finder",
                                    top_k_retriever=1,
                                    top_k_reader=1)
    assert prediction is not None
    assert len(prediction["answers"]) == 1
Beispiel #9
0
def test_finding(document_store, retriever):
    document_store.write_documents(DOCUMENTS)
    finder = Finder(reader=None, retriever=retriever)

    prediction = finder.get_answers_via_similar_questions(
        question="How to test this?", top_k_retriever=1)

    assert len(prediction.get('answers', [])) == 1
Beispiel #10
0
def test_finder_offsets(reader, retriever_with_docs, document_store_with_docs):
    finder = Finder(reader, retriever_with_docs)
    prediction = finder.get_answers(question="Who lives in Berlin?", top_k_retriever=10,
                                    top_k_reader=5)

    assert prediction["answers"][0]["offset_start"] == 11
    assert prediction["answers"][0]["offset_end"] == 16
    start = prediction["answers"][0]["offset_start"]
    end = prediction["answers"][0]["offset_end"]
    assert prediction["answers"][0]["context"][start:end] == prediction["answers"][0]["answer"]
Beispiel #11
0
 def __init__(self):
     self.document_store = ElasticsearchDocumentStore(host="localhost",
                                                      username="",
                                                      password="",
                                                      index="document")
     self.retriever = ElasticsearchRetriever(
         document_store=self.document_store)
     self.reader = FARMReader(
         model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
     self.finder = Finder(self.reader, self.retriever)
     print('Ready')
Beispiel #12
0
def test_finder_offsets(reader, document_store_with_docs):
    retriever = TfidfRetriever(document_store=document_store_with_docs)
    finder = Finder(reader, retriever)
    prediction = finder.get_answers(question="Who lives in Berlin?",
                                    top_k_retriever=10,
                                    top_k_reader=5)

    assert prediction["answers"][0]["offset_start"] == 11
    #TODO enable again when FARM is upgraded incl. the new offset calc
    #    assert prediction["answers"][0]["offset_end"] == 16
    start = prediction["answers"][0]["offset_start"]
    end = prediction["answers"][0]["offset_end"]
Beispiel #13
0
def test_faiss_finding(document_store):
    document_store.write_documents(DOCUMENTS)

    retriever = EmbeddingRetriever(document_store=document_store,
                                   embedding_model="deepset/sentence_bert",
                                   use_gpu=False)
    finder = Finder(reader=None, retriever=retriever)

    prediction = finder.get_answers_via_similar_questions(
        question="How to test this?", top_k_retriever=1)

    assert len(prediction.get('answers', [])) == 1
Beispiel #14
0
def test_finder_get_answers(reader, retriever_with_docs, document_store_with_docs):
    finder = Finder(reader, retriever_with_docs)
    prediction = finder.get_answers(question="Who lives in Berlin?", top_k_retriever=10,
                                    top_k_reader=3)
    assert prediction is not None
    assert prediction["question"] == "Who lives in Berlin?"
    assert prediction["answers"][0]["answer"] == "Carla"
    assert prediction["answers"][0]["probability"] <= 1
    assert prediction["answers"][0]["probability"] >= 0
    assert prediction["answers"][0]["meta"]["meta_field"] == "test1"
    assert prediction["answers"][0]["context"] == "My name is Carla and I live in Berlin"

    assert len(prediction["answers"]) == 3
Beispiel #15
0
def get_results(txt_files_location, use_gpu, questions_list, results_location):

    document_store = ElasticsearchDocumentStore(host="localhost",
                                                username="",
                                                password="",
                                                index="document")
    for dirpath, dirnames, files in os.walk(txt_files_location):
        for dirname in dirnames:
            for dirpath, dirname, files in os.walk(
                    os.path.join(txt_files_location, dirname)):
                for file_name in files:
                    document_store.client.indices.delete(index='document',
                                                         ignore=[400, 404])

                    doc_dir = dirpath

                    dicts = convert_files_to_dicts(dir_path=doc_dir,
                                                   clean_func=clean_wiki_text,
                                                   split_paragraphs=True)

                    document_store.write_documents(dicts)

                    retriever = ElasticsearchRetriever(
                        document_store=document_store)

                    reader = FARMReader(
                        model_name_or_path=
                        "elgeish/cs224n-squad2.0-albert-xxlarge-v1",
                        use_gpu=use_gpu)

                    finder = Finder(reader, retriever)

                    sys.stdout = open(
                        os.path.join(results_location,
                                     file_name[:-4] + "_results.txt"), "a+")

                    for i, question in enumerate(questions_list):

                        prediction = finder.get_answers(question=question,
                                                        top_k_retriever=10,
                                                        top_k_reader=1)

                        print("\n\n\nQuestion " + str(i + 1) + ":\n")
                        print(question + "\n")
                        print_answers(prediction, details="minimal")

                    sys.stdout.close()

    document_store.client.transport.close()
Beispiel #16
0
def init():
    ### Model values for Reader and Document Store
    global document_store, retriever, reader, finder
    document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document")
    retriever = ElasticsearchRetriever(document_store=document_store)
    reader = FARMReader(model_name_or_path='deepset/roberta-base-squad2-covid', use_gpu=False)
    finder = Finder(reader, retriever)
 def setPrediction(self, reader, retriever, job):
     finder = Finder(reader, retriever)
     if 'top_k_retriever' not in job.task_params:
         job.task_params['top_k_retriever'] = 10
     if 'top_k_reader' not in job.task_params:
         job.task_params['top_k_reader'] = 5
     results = []
     es_query_body = self.buildFilter(job)
     del es_query_body['from']
     del es_query_body['size']
     es_query_body = { 'external_source_id': ['ea13ebc0-18bf-4dfe-8750-61641fdbb00b'] } 
     for question in job.task_params['questions']:
         prediction = finder.get_answers(question=question, top_k_retriever=job.task_params['top_k_retriever'], top_k_reader=job.task_params['top_k_reader'], filters=None) #es_query_body['query']['bool']
         results.append(prediction)
     print('INFO:', results, flush=True)
     return results
Beispiel #18
0
class QAPipeline:
    def __init__(self):
        self.document_store = ElasticsearchDocumentStore(host="localhost",
                                                         username="",
                                                         password="",
                                                         index="document")
        self.retriever = ElasticsearchRetriever(
            document_store=self.document_store)
        self.reader = FARMReader(
            model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
        self.finder = Finder(self.reader, self.retriever)
        print('Ready')

    def add_to_datastore_from_remote(self, data_url):
        return {'status': 'Not Implemented'}

    def add_to_datastore_local(self, data_path):
        json_data = read_json_data(data_path)
        es_data = create_data_dicts(json_data)
        self.document_store.write_documents(es_data)
        return {'status': 'Added To Datastore'}

    def answer(self, question, top_k_options=10, top_k_answers=3):
        prediction = self.finder.get_answers(question=question,
                                             top_k_retriever=top_k_options,
                                             top_k_reader=top_k_answers)
        results = extract_info_from_predictions(prediction)
        return results
Beispiel #19
0
def initFinder():
    """
    Function to initiate retriever, reader and finder
    Parameters
    ----------
    Returns
    -------
        finder (object): Haystack finder
    """
    retriever = DensePassageRetriever(
        document_store=document_store,
        query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
        passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
        use_gpu=False,
        embed_title=True,
        max_seq_len=256,
        batch_size=16,
        remove_sep_tok_from_untitled_passages=True)
    # Important:
    # Now that after we have the DPR initialized, we need to call update_embeddings() to iterate over all
    # previously indexed documents and update their embedding representation.
    # While this can be a time consuming operation (depending on corpus size), it only needs to be done once.
    # At query time, we only need to embed the query and compare it the existing doc embeddings which is very fast.
    document_store.update_embeddings(retriever)
    reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2",
                        use_gpu=False)
    return Finder(reader, retriever)
def set_finder(user_id_key):
    if user_settings[user_id_key]["model"] == "roberta":
        model_path = (
            "deepset/roberta-base-squad2"  # Path of the models hosted in Hugging Face
        )
    elif user_settings[user_id_key]["model"] == "bert":
        model_path = "deepset/bert-large-uncased-whole-word-masking-squad2"
    elif user_settings[user_id_key]["model"] == "distilbert":
        model_path = "distilbert-base-uncased-distilled-squad"
    else:
        model_path = "illuin/camembert-base-fquad"

    retriever = ElasticsearchRetriever(document_store=user_doc_store[user_id_key])

    if user_settings[user_id_key]["gpu"] == "on":
        try:
            reader = TransformersReader(
                model_name_or_path=model_path, tokenizer=model_path, use_gpu=0
            )
        except Exception as e:
            print(e)
            print("GPU not available. Inferencing on CPU")
            reader = TransformersReader(
                model_name_or_path=model_path, tokenizer=model_path, use_gpu=-1
            )

    else:
        reader = TransformersReader(
            model_name_or_path=model_path, tokenizer=model_path, use_gpu=-1
        )

    finder = Finder(reader, retriever)

    return finder
Beispiel #21
0
def feed_documents_to_model(model_name="deepset/roberta-base-squad2-covid"):
    """Feeds documents to model and returns a model ready to make predictions

    Parameters
    ----------
    model_name : str
        The path of the model to be selected from HuggingFace
        By default uses the pretrained version of roBERTa in squad2
        and covid articles

    Returns
    -------
    finder
        the model to use for predictions
    """

    # Initialize in memory Document Store
    document_store = InMemoryDocumentStore()
    # Load articles and format it as dictionary
    articles = ret.get_data(MANIFEST, ARTICLES_FOLDER, [])
    dicts_textContent = process_documents(articles)
    # Store the dictionary with articles content in the Document Store
    document_store.write_documents(dicts_textContent)
    # Retriever chooses what is the subset of documents that are relevant
    # many techniques are possible: for dev purposes TfidfRetriever is faster
    retriever = TfidfRetriever(document_store=document_store)
    # Reader provides interface to use the pre trained transformers
    # by default we're using the roberta
    reader = FARMReader(model_name_or_path=model_name, use_gpu=False)
    # The finder retrieves predictions
    finder = Finder(reader, retriever)

    return finder
Beispiel #22
0
def qna():
    """Return the n answers."""

    question = request.form['question']
    # index is the target document where queries need to sent.
    index = request.form['index']

    # to select train or untrained model
    mode = request.form['mode']

    #initialization of the Haystack Elasticsearch document storage
    document_store = ElasticsearchDocumentStore(
        host=app.config["host"],
        username=app.config["username"],
        password=app.config["password"],
        index=index)

    if mode == 'trained':
        # base on the search mode train_model
        reader = FARMReader(model_name_or_path=app.config["train_model"],
                            use_gpu=False)
    else:
        # base on the search mode pre_train
        reader = FARMReader(
            model_name_or_path="distilbert-base-uncased-distilled-squad",
            use_gpu=False)

    #initialization of ElasticRetriever
    retriever = ElasticsearchRetriever(document_store=document_store)
    # Finder sticks together reader and retriever
    # in a pipeline to answer our actual questions.
    finder = Finder(reader, retriever)

    # predict n answers
    n = int(request.form['n'])
    prediction = finder.get_answers(question=question,
                                    top_k_retriever=10,
                                    top_k_reader=n)
    answer = []
    for res in prediction['answers']:
        answer.append(res['answer'])

    return json.dumps({
        'status': 'success',
        'message': 'Process succesfully',
        'result': answer
    })
Beispiel #23
0
def qa_with_dense_retrieval(question: str) -> List[dict]:
    """
  """
    finder = Finder(reader, dense_retriever)
    prediction = finder.get_answers(question=question,
                                    top_k_retriever=10,
                                    top_k_reader=5)
    paras = {para.id: para.text for para in dense_retriever.retrieve(question)}
    results = []
    for result in prediction['answers']:
        if result['score'] > 0:
            results.append({
                'answer': result['answer'],
                'context': result['context'],
                'para': paras[result['document_id']]
            })
    return results
Beispiel #24
0
    def qa(self, question, text_field):
        document_store = ElasticsearchDocumentStore(host=ES_HOST,
                                                    username=ES_USERNAME,
                                                    password=ES_PASSWORD,
                                                    index=self.ELASTIC_INDEX,
                                                    text_field=text_field)
        retriever = TfidfRetriever(document_store=document_store)

        reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2",
                            use_gpu=False)

        finder = Finder(reader, retriever)
        prediction = finder.get_answers(question=question,
                                        top_k_retriever=1,
                                        top_k_reader=5)

        return prediction
Beispiel #25
0
def get_test_client_and_override_dependencies(reader, document_store_with_docs):
    from rest_api.application import app
    from rest_api.controller import search

    search.document_store = document_store_with_docs
    search.retriever = ElasticsearchRetriever(document_store=document_store_with_docs)
    search.FINDERS = {1: Finder(reader=reader, retriever=search.retriever)}

    return TestClient(app)
Beispiel #26
0
class QAPipeLine:
    def __init__(self):
        self.finder = Finder(reader=Reader(model_name_or_path=MODEL_PATH,
                                           tokenizer=MODEL_PATH,
                                           use_gpu=0),
                             retriever=ElasticsearchRetriever(
                                 document_store=ElasticsearchDocumentStore(
                                     refresh_type='false')))

    def __call__(self, paper_id: str, question: str):
        results = self.finder.get_answers(question=question,
                                          top_k_reader=5,
                                          filters={'name': [paper_id]})
        return results
Beispiel #27
0
    def __init__(self, id, add_sample_data=False):
        Model.__init__(self, id)

        doc_store = ElasticsearchDocumentStore(
            host=DB_HOST,
            port=DB_PORT,
            index=self.id,
            embedding_field="question_emb",
            embedding_dim=768,
            excluded_meta_data=["question_emb"])
        retriever = EmbeddingRetriever(document_store=doc_store,
                                       embedding_model="deepset/sentence_bert",
                                       use_gpu=False)

        self.finder = Finder(reader=None, retriever=retriever)

        if add_sample_data:
            add_sample_data_faq_qa(self)
Beispiel #28
0
    def __init__(self, id, add_sample_data=False):
        Model.__init__(self, id)

        doc_store = ElasticsearchDocumentStore(host=DB_HOST,
                                               port=DB_PORT,
                                               index=self.id)
        retriever = ElasticsearchRetriever(document_store=doc_store)

        reader = FARMReader(
            model_name_or_path=READER_MODEL_PATH,
            batch_size=BATCHSIZE,
            use_gpu=False,
            num_processes=MAX_PROCESSES,
        )
        self.finder = Finder(reader, retriever)

        if add_sample_data:
            add_sample_data_doc_qa(self)

        reader.save(directory=READER_MODEL_PATH)
        print("saved")
Beispiel #29
0
def create_app(config_name):
    app = Flask(__name__)
    app.config.from_object(config[config_name])
    host = config[config_name].ELASTIC_URL
    port = config[config_name].ELASTIC_PORT
    index = config[config_name].ELASTIC_INDEX

    doc_store = ElasticsearchDocumentStore(host=host,
                                           username='',
                                           password='',
                                           index=index)

    retriever = ElasticsearchRetriever(document_store=doc_store)
    model_name = "deepset/roberta-base-squad2"
    reader = FARMReader(model_name_or_path=model_name,
                        num_processes=0,
                        use_gpu=False)
    app.finder = Finder(reader, retriever)

    from app.main import main as main_blueprint
    app.register_blueprint(main_blueprint)

    return app
Beispiel #30
0
        model_name_or_path=str(READER_MODEL_PATH),
        batch_size=BATCHSIZE,
        use_gpu=USE_GPU,
        context_window_size=CONTEXT_WINDOW_SIZE,
        top_k_per_candidate=TOP_K_PER_CANDIDATE,
        no_ans_boost=NO_ANS_BOOST,
        max_processes=MAX_PROCESSES,
        max_seq_len=MAX_SEQ_LEN,
        doc_stride=DOC_STRIDE,
    )
else:
    # don't need one for pure FAQ matching
    reader = None

FINDERS = {
    1: Finder(reader=reader, retriever=retriever),
    2: Finder(reader=reader, retriever=english_retriever)
}


#############################################
# Basic data schema for request & response
#############################################
class Query(BaseModel):
    questions: List[str]
    filters: Dict[str, Optional[str]] = None
    top_k_reader: int = DEFAULT_TOP_K_READER
    top_k_retriever: int = DEFAULT_TOP_K_RETRIEVER


class Answer(BaseModel):