class SimpleClient: """ A prototype of a client to comunicate with donkeybot. :param model: NLP model, e.g. BERT. :param data_storage: db where data is stored. :param num_answers_inf: number of highest score answers that client work with. """ def __init__(self, model=None, db_name="data_storage", num_answers_inf=1): self.model = "distilbert-base-cased-distilled-squad" if model: check_model_availability(model) self.model = model gpu = 0 if torch.cuda.is_available() else -1 self.answer_detector = AnswerDetector( model=self.model, device=gpu, num_answers_to_predict=num_answers_inf) data_storage = Database(f"{db_name}.db") faq_se, docs_se, question_se = setup_search_engines(db=data_storage) self.qa_interface = QAInterface(detector=self.answer_detector, question_engine=question_se, faq_engine=faq_se, docs_engine=docs_se) def get_answer(self, question): answers = self.qa_interface.get_answers(question, top_k=1) for i, answer in enumerate(answers): #TODO return answer.extended_answer, answer.confidence
class Donkeybot: """ A wrapper for Donkeybot to be used by the server for the slackbot. Usability same with ask_donkeybot.py CLI scipt. :param model: NLP model, e.g. distilbert-base-cased-distilled-squad. :param data_storage: db where data is stored. :param num_answers_to_predict: number of answers predicted per document looked at. """ def __init__(self, model=None, db_name="data_storage", num_answers_to_predict=3): self.model = "distilbert-base-cased-distilled-squad" if model: check_model_availability(model) self.model = model self.db_name = db_name # better if just CPU for inference gpu = 0 if torch.cuda.is_available() else -1 self.answer_detector = AnswerDetector( model=self.model, device=gpu, num_answers_to_predict=num_answers_to_predict ) data_storage = Database(f"{self.db_name}.db") faq_se, docs_se, question_se = setup_search_engines(db=data_storage) self.qa_interface = QAInterface( detector=self.answer_detector, question_engine=question_se, faq_engine=faq_se, docs_engine=docs_se, ) # thread that innits donkeybot instance wont used db again data_storage.close_connection() def get_answers(self, question, top_k=1, store_answers=False): """Search past questions table for an answer""" answers = self.qa_interface.get_answers(question, top_k=top_k) # TODO add confidence cutoff if store_answers: self._store_answers(answers) print("Done") return answers def get_faq_answers(self, question, num_faqs=1, store_answers=False): """Search FAQs for an answer""" answers = self.qa_interface.get_faq_answers(question, num_faqs=num_faqs) if store_answers: self._store_answers(answers) return answers def _store_answers(self, answers): # a different thread is running each time so new connection to db is needed # could use 'sqlite3.connect('your.db', check_same_thread=False)' but need to do my own synchronization data_storage = Database(f"{self.db_name}.db") for answer in answers: data_storage.insert_answer(answer) data_storage.close_connection() return def update_label(self, answer_id, label): data_storage = Database(f"{self.db_name}.db") assert label in (0, 1) data_storage.update_label(answer_id, label) data_storage.close_connection() return
def main(): # Parse cli arguments parser = argparse.ArgumentParser( description="""Use this script to ask DonkeyBot!""") optional = parser.add_argument_group("optional arguments") optional.add_argument( "-m", "--model", default="distilbert-base-cased-distilled-squad", help= "BERT/DistilBERT model used to inference answers. (default is distilbert-base-cased-distilled-squad)", ) optional.add_argument( "-db", "--db_name", default="data_storage", help= "Name of database where all data is stored. (default is data_storage)", ) optional.add_argument( "-s", "--store_answers", type=str2bool, nargs="?", const=True, default=False, help= "Store the answers on the '--answers_table' table. (default is False)", ) optional.add_argument( "-n", "--num_answers_predicted_per_document", default=3, help="Number of answers predicted per document. (default is 3)", ) optional.add_argument( "--answers_table", default="answers", help="Name of the answers table. (default is 'answers')", ) args = parser.parse_args() db_name = args.db_name model = args.model answers_table = args.answers_table store_answers = args.store_answers num_answers_inf = int(args.num_answers_predicted_per_document) check_model_availability(model) # prepare data_storage data_storage = Database(f"{db_name}.db") # check for the answers table tables_in_db = list([table[0] for table in data_storage.get_tables()]) if answers_table not in tables_in_db: print(f"Creating '{answers_table}' table in {db_name}.db") data_storage.create_answers_table(table_name=f"{answers_table}") # load answer detector print("Loading AnswerDetector...") gpu = 0 if torch.cuda.is_available() else -1 answer_detector = AnswerDetector(model=model, device=gpu, num_answers_to_predict=num_answers_inf) # load search engines faq_se, docs_se, question_se = setup_search_engines(db=data_storage) # load interface qa_interface = QAInterface( detector=answer_detector, question_engine=question_se, faq_engine=faq_se, docs_engine=docs_se, ) # Main Loop print("DonkeyBot ready to be asked!") try: while True: print("\nCTRL+C to exit donkeybot") query = str(input("ask question: ")) top_k = int(input("how many answers: ")) start_time = time.time() answers = qa_interface.get_answers(query, top_k=top_k) print( f"Total inference time: {round(time.time() - start_time, 2)} seconds" ) print_answers(answers) if store_answers: for answer in answers: data_storage.insert_answer(answer, table_name=f"{answers_table}") except KeyboardInterrupt: data_storage.close_connection() sys.exit("\nExiting...")