def get_bot_accuracies(bot, scored_qa_pairs=None, min_qa_bot_confidence=.2): """ Compare answers from bot to answers in test set >>> from qary.skills import glossary_bots >>> bot = glossary_bots.Bot() >>> scored_qa_pairs = [dict(question='What is RMSE?', answer='Root Mean Square Error', score=.9, topic='ds')] >>> get_bot_accuracies(bot=bot, scored_qa_pairs=scored_qa_pairs)[0]['bot_accuracy'] 1.0 >>> scored_qa_pairs = [dict(question='What is RMSE?', answer='root-mean-sqr-error', score=.9, topic='ds')] >>> get_bot_accuracies(bot=bot, scored_qa_pairs=scored_qa_pairs)[0] {'question': 'What is RMSE?', 'answer': 'root-mean-sqr-error', 'score': 0.9, 'topic': 'ds', 'bot_answer': 'Root Mean Square Error', 'bot_w2v_similarity': 0.64..., 'bot_ed_distance': 0.52..., 'bot_ed_distance_low': 0.31..., 'bot_ed_distance_folded': 0.15..., 'bot_accuracy': 0.65...} """ scored_qa_pairs = load_qa_dataset( ) if scored_qa_pairs is None else scored_qa_pairs scored_qa_pairs = load_qa_dataset(scored_qa_pairs) if isinstance( scored_qa_pairs, str) else scored_qa_pairs validated_qa_pairs = [] for truth in scored_qa_pairs: texts = scrape_wikipedia.find_document_texts(topic=truth['topic'], max_results=10) for context in texts: bot.reset_context(context) replies = sorted(bot.reply(truth['question'])) if len(replies) and sorted(replies)[-1][0] > min_qa_bot_confidence: break replies = replies or [(0, "Sorry, I don't know.")] truth['bot_answer'] = replies[-1][1] truth['bot_w2v_similarity'] = nlp(truth['bot_answer']).similarity( nlp(truth['answer'])) truth['bot_ed_distance'] = distance( truth['answer'], truth['bot_answer']) / len(truth['answer']) truth['bot_ed_distance_low'] = distance( truth['answer'].lower().strip(), truth['bot_answer'].lower().strip()) / len(truth['answer'].strip()) truth['bot_ed_distance_folded'] = distance( fold_characters(truth['answer']), fold_characters(truth['bot_answer'])) / len( truth['answer'].strip()) truth['bot_accuracy'] = .5 * truth['bot_w2v_similarity'] + .5 * ( 1 - (truth['bot_ed_distance'] + truth['bot_ed_distance_low'] + truth['bot_ed_distance_folded']) / 3) validated_qa_pairs.append(dict(truth)) return validated_qa_pairs
def load(domains=FAQ_DOMAINS): """ Load yaml file, use hashtags to create context tags as multihot columns Load faq*.yml into dictionary: question: answer >>> g = load(domains='dsdh'.split(',')) >>> len(g['raw']) <= len(g['cleaned']) > 30 True >>> sorted(g['cleaned']['Allele']) ['acronym', 'definition', 'hashtags', 'parenthetical'] """ # faq_raw = {} # q_vectors = pd.DataFrame() questions, answers, question_vectors = [], [], [] faqdirpath = os.path.join(DATA_DIR, 'faq') for domain in tqdm(domains): for filepath in Path(faqdirpath).glob(f'faq*{domain}*.yml'): # filename = filepath.name try: filepointer = open(filepath) except FileNotFoundError as e: log.error(f"{e}\n Unable to find the file path object: {filepath}") continue with filepointer: log.info(f"loading: {filepath.name}\n with file pointer: {filepointer}") try: qa_list = yaml.load(filepointer) except ScannerError as e: log.error(f"{e}\n yaml.load unable to read {filepointer.name}") continue for qa_dict in qa_list: questions.append(qa_dict.get('Q', qa_dict.get('q', ''))) answers.append(qa_dict.get('A', qa_dict.get('a', ''))) try: question_vectors.append(list(nlp(questions[-1] or '').vector)) except TypeError: question_vectors.append(list(UNKNOWN_WORDVEC)) continue assert len(UNKNOWN_WORDVEC) == len(question_vectors[-1]) log.debug(f'len(question_vectors): {len(question_vectors)}') questions = np.array(questions) log.debug(f'len(questions): {len(questions)}') answers = np.array(answers) log.debug(f'len(answers): {len(answers)}') mask = np.array([(bool(a) and bool(q) and len(str(a).strip()) > 0 and len(str(q).strip()) > 0) for a, q in zip(questions, answers)]) question_vectors = normalize_docvectors(question_vectors) # This should be a Kendra/gensim/annoy class (with methods like .find_similar) return dict( questions=questions[mask], answers=answers[mask], question_vectors=np.array([qv for qv, m in zip(question_vectors, mask) if m]) )
def get_bot_accuracies(bot, scored_qa_pairs=None, min_qa_bot_confidence=.2, num_questions=None, shuffle_seed=None): """ Compare answers from bot to answers in test set >>> from qary.skills import glossary_bots >>> bot = glossary_bots.Bot() >>> scored_qa_pairs = [dict(question='What is RMSE?', answer='Root Mean Square Error', score=.9, topic='ds')] >>> next(get_bot_accuracies(bot=bot, scored_qa_pairs=scored_qa_pairs))['bot_accuracy'] 1.0 >>> scored_qa_pairs = [dict(question='What is RMSE?', answer='root-mean-sqr-error', score=.9, topic='ds')] >>> next(get_bot_accuracies(bot=bot, scored_qa_pairs=scored_qa_pairs)) {'question': 'What is RMSE?', 'answer': 'root-mean-sqr-error', 'score': 0.9, 'topic': 'ds', 'bot_answer': 'Root Mean Square Error', 'bot_w2v_similarity': 0.64..., 'bot_ed_distance': 0.52..., 'bot_ed_distance_low': 0.31..., 'bot_ed_distance_folded': 0.15..., 'bot_accuracy': 0.65...} """ if scored_qa_pairs is None: scored_qa_pairs = load_qa_dataset() elif isinstance(scored_qa_pairs, str): scored_qa_pairs = load_qa_dataset(scored_qa_pairs) if shuffle_seed: np.random.seed(shuffle_seed) np.random.shuffle(scored_qa_pairs) bot_answers = {} for i, truth in enumerate(scored_qa_pairs): if num_questions and i >= num_questions: break topic = truth.get('topic') if not topic or not truth or not truth['answer'] or not truth[ 'question']: continue log.warning(f"topic: {truth['topic']}, question: {truth['question']}") textgen = scrape_wikipedia.find_article_texts(query=[topic], max_articles=5) texts = chain( textgen, scrape_wikipedia.find_article_texts(query=truth['question'], max_articles=10)) # TODO: def get_best_bot_answer(bot, question, texts) # memoize bot_answer = bot_answers.get(truth['question'], None) if not bot_answer: for context in texts: bot.reset_context(context) replies = sorted(bot.reply(truth['question'])) if len(replies ) and sorted(replies)[-1][0] > min_qa_bot_confidence: break replies = replies or [(0, "Sorry, I don't know.")] bot_answer = replies[-1][1] bot_answers[truth['question']] = bot_answer truth['bot_answer'] = bot_answer # END TODO: def get_best_bot_answer(bot, question, texts) truth['bot_w2v_similarity'] = nlp(truth['bot_answer']).similarity( nlp(truth['answer'])) truth['bot_ed_distance'] = distance( truth['answer'], truth['bot_answer']) / len(truth['answer']) truth['bot_ed_distance_low'] = distance( truth['answer'].lower().strip(), truth['bot_answer'].lower().strip()) / len(truth['answer'].strip()) truth['bot_ed_distance_folded'] = distance( fold_characters(truth['answer']), fold_characters(truth['bot_answer'])) / len( truth['answer'].strip()) truth['bot_accuracy'] = .5 * truth['bot_w2v_similarity'] + .5 * ( 1 - (truth['bot_ed_distance'] + truth['bot_ed_distance_low'] + truth['bot_ed_distance_folded']) / 3) log.warning( f"q: accuracy: {truth['question']}: {truth['bot_accuracy']}") yield dict(truth)
def test_spacy_language_model(): assert callable(nlp) assert len(list(nlp("Hello world!"))) == 3