Beispiel #1
0
class SimpleClient:
    """
    A prototype of a client to comunicate with donkeybot.
    :param model: NLP model, e.g. BERT.
    :param data_storage: db where data is stored.
    :param num_answers_inf: number of highest score answers that client work with.

    """
    def __init__(self, model=None, db_name="data_storage", num_answers_inf=1):

        self.model = "distilbert-base-cased-distilled-squad"
        if model:
            check_model_availability(model)
            self.model = model

        gpu = 0 if torch.cuda.is_available() else -1
        self.answer_detector = AnswerDetector(
            model=self.model,
            device=gpu,
            num_answers_to_predict=num_answers_inf)
        data_storage = Database(f"{db_name}.db")
        faq_se, docs_se, question_se = setup_search_engines(db=data_storage)
        self.qa_interface = QAInterface(detector=self.answer_detector,
                                        question_engine=question_se,
                                        faq_engine=faq_se,
                                        docs_engine=docs_se)

    def get_answer(self, question):
        answers = self.qa_interface.get_answers(question, top_k=1)
        for i, answer in enumerate(answers):
            #TODO
            return answer.extended_answer, answer.confidence
Beispiel #2
0
class Donkeybot:
    """
    A wrapper for Donkeybot to be used by the server for the slackbot.
    Usability same with ask_donkeybot.py CLI scipt.

    :param model: NLP model, e.g. distilbert-base-cased-distilled-squad.
    :param data_storage: db where data is stored.
    :param num_answers_to_predict: number of answers predicted per document looked at.
    """

    def __init__(self, model=None, db_name="data_storage", num_answers_to_predict=3):

        self.model = "distilbert-base-cased-distilled-squad"
        if model:
            check_model_availability(model)
            self.model = model
        self.db_name = db_name
        # better if just CPU for inference
        gpu = 0 if torch.cuda.is_available() else -1
        self.answer_detector = AnswerDetector(
            model=self.model, device=gpu, num_answers_to_predict=num_answers_to_predict
        )
        data_storage = Database(f"{self.db_name}.db")
        faq_se, docs_se, question_se = setup_search_engines(db=data_storage)
        self.qa_interface = QAInterface(
            detector=self.answer_detector,
            question_engine=question_se,
            faq_engine=faq_se,
            docs_engine=docs_se,
        )
        # thread that innits donkeybot instance wont used db again
        data_storage.close_connection()

    def get_answers(self, question, top_k=1, store_answers=False):
        """Search past questions table for an answer"""
        answers = self.qa_interface.get_answers(question, top_k=top_k)
        # TODO add confidence cutoff
        if store_answers:
            self._store_answers(answers)
        print("Done")
        return answers

    def get_faq_answers(self, question, num_faqs=1, store_answers=False):
        """Search FAQs for an answer"""
        answers = self.qa_interface.get_faq_answers(question, num_faqs=num_faqs)
        if store_answers:
            self._store_answers(answers)
        return answers

    def _store_answers(self, answers):
        # a different thread is running each time so new connection to db is needed
        # could use 'sqlite3.connect('your.db', check_same_thread=False)' but need to do my own synchronization
        data_storage = Database(f"{self.db_name}.db")
        for answer in answers:
            data_storage.insert_answer(answer)
        data_storage.close_connection()
        return

    def update_label(self, answer_id, label):
        data_storage = Database(f"{self.db_name}.db")
        assert label in (0, 1)
        data_storage.update_label(answer_id, label)
        data_storage.close_connection()
        return
Beispiel #3
0
def main():
    # Parse cli arguments
    parser = argparse.ArgumentParser(
        description="""Use this script to ask DonkeyBot!""")
    optional = parser.add_argument_group("optional arguments")

    optional.add_argument(
        "-m",
        "--model",
        default="distilbert-base-cased-distilled-squad",
        help=
        "BERT/DistilBERT model used to inference answers. (default is distilbert-base-cased-distilled-squad)",
    )
    optional.add_argument(
        "-db",
        "--db_name",
        default="data_storage",
        help=
        "Name of database where all data is stored. (default is data_storage)",
    )
    optional.add_argument(
        "-s",
        "--store_answers",
        type=str2bool,
        nargs="?",
        const=True,
        default=False,
        help=
        "Store the answers on the '--answers_table' table. (default is False)",
    )
    optional.add_argument(
        "-n",
        "--num_answers_predicted_per_document",
        default=3,
        help="Number of answers predicted per document. (default is 3)",
    )
    optional.add_argument(
        "--answers_table",
        default="answers",
        help="Name of the answers table. (default is 'answers')",
    )

    args = parser.parse_args()
    db_name = args.db_name
    model = args.model
    answers_table = args.answers_table
    store_answers = args.store_answers
    num_answers_inf = int(args.num_answers_predicted_per_document)

    check_model_availability(model)

    # prepare data_storage
    data_storage = Database(f"{db_name}.db")
    # check for the answers table
    tables_in_db = list([table[0] for table in data_storage.get_tables()])
    if answers_table not in tables_in_db:
        print(f"Creating '{answers_table}' table in {db_name}.db")
        data_storage.create_answers_table(table_name=f"{answers_table}")

    # load answer detector
    print("Loading AnswerDetector...")
    gpu = 0 if torch.cuda.is_available() else -1
    answer_detector = AnswerDetector(model=model,
                                     device=gpu,
                                     num_answers_to_predict=num_answers_inf)

    # load search engines
    faq_se, docs_se, question_se = setup_search_engines(db=data_storage)

    # load interface
    qa_interface = QAInterface(
        detector=answer_detector,
        question_engine=question_se,
        faq_engine=faq_se,
        docs_engine=docs_se,
    )

    # Main Loop
    print("DonkeyBot ready to be asked!")
    try:
        while True:
            print("\nCTRL+C to exit donkeybot")
            query = str(input("ask question: "))
            top_k = int(input("how many answers: "))
            start_time = time.time()
            answers = qa_interface.get_answers(query, top_k=top_k)
            print(
                f"Total inference time: {round(time.time() - start_time, 2)} seconds"
            )
            print_answers(answers)

            if store_answers:
                for answer in answers:
                    data_storage.insert_answer(answer,
                                               table_name=f"{answers_table}")
    except KeyboardInterrupt:
        data_storage.close_connection()
        sys.exit("\nExiting...")