def extract_topics(curr_uttr): """Extract entities as topics for news request. If no entities found, extract nounphrases. Args: curr_uttr: current human utterance dictionary Returns: list of mentioned entities/nounphrases """ entities = get_entities(curr_uttr, only_named=True, with_labels=False) entities = [ent.lower() for ent in entities] entities = [ ent for ent in entities if not (ent == "alexa" and curr_uttr["text"].lower()[:5] == "alexa") and "news" not in ent ] if len(entities) == 0: for ent in get_entities(curr_uttr, only_named=False, with_labels=False): if ent.lower() not in BANNED_UNIGRAMS and "news" not in ent.lower(): if ent in entities: pass else: entities.append(ent) entities = [ent for ent in entities if len(ent) > 0] return entities
def kbqa_formatter_dialog(dialog: Dict): # Used by: kbqa annotator annotations = dialog["human_utterances"][-1]["annotations"] if "sentseg" in annotations: if "segments" in annotations["sentseg"]: sentences = deepcopy(annotations["sentseg"]["segments"]) else: sentences = [deepcopy(annotations["sentseg"]["punct_sent"])] else: sentences = [deepcopy(dialog["human_utterances"][-1]["text"])] entity_substr = get_entities(dialog["human_utterances"][-1], only_named=True, with_labels=False) nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) entities = [] if entity_substr: entities = [entity_substr] elif nounphrases: entities = [nounphrases] else: entities = [[]] return [{"x_init": sentences, "entities": entities}]
def filter_nouns_for_conceptnet(annotated_phrase): if get_toxic(annotated_phrase, probs=False): return [] subjects = get_entities(annotated_phrase, only_named=False, with_labels=False) subjects = [re.sub(possessive_pronouns, "", noun) for noun in subjects] subjects = [re.sub(r"(\bthe\b|\ba\b|\ban\b)", "", noun) for noun in subjects] subjects = [noun for noun in subjects if noun not in BANNED_NOUNS_FOR_OPINION_EXPRESSION] subjects = [ noun for noun in subjects if not re.search(BANNED_WORDS_IN_NOUNS_FOR_OPINION_EXPRESSION_COMPILED, annotated_phrase["text"]) ] for ent in get_entities(annotated_phrase, only_named=True, with_labels=True): subjects = remove_intersections_of_entities(ent["text"], subjects) bad_subjects = [] for subject in subjects: if len(subject.split()) == 1: doc = nlp(subject) if doc[0].pos not in [PROPN, NOUN]: bad_subjects.append(subject) for bad_subj in bad_subjects: try: subjects.remove(bad_subj) except ValueError: pass subjects = [noun for noun in subjects if len(noun) > 0] return subjects
def get_main_info_annotations(annotated_utterance): intents = get_intents(annotated_utterance, which="all") topics = get_topics(annotated_utterance, which="all") named_entities = get_entities(annotated_utterance, only_named=True, with_labels=False) nounphrases = get_entities(annotated_utterance, only_named=False, with_labels=False) return intents, topics, named_entities, nounphrases
def get_spacy_nounphrases(utt): cob_nounphs = get_entities(utt, only_named=False, with_labels=False) spacy_nounphrases = [] for ph in cob_nounphs: if not pos_tag([ph])[0][1].startswith("VB"): spacy_nounphrases.append(ph) return spacy_nounphrases
def extract_from_dialog(dialog): prev_news_outputs = get_skill_outputs_from_dialog( dialog["utterances"][-3:], "news_api_skill", activated=True) if len(prev_news_outputs) > 0: prev_news_output = prev_news_outputs[-1] else: prev_news_output = {} no_detected = is_no(dialog["human_utterances"][-1]) nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) if prev_news_output.get( "news_status", "finished") == OPINION_REQUEST_STATUS or (prev_news_output.get( "news_status", "finished") == OFFERED_NEWS_DETAILS_STATUS and no_detected): verb_noun_phrases, sources = extract_verb_noun_phrases( prev_news_outputs[-1].get("text", "nothing"), only_i_do_that=False, nounphrases=nounphrases) else: verb_noun_phrases, sources = extract_verb_noun_phrases( dialog["utterances"][-1]["text"], only_i_do_that=False, nounphrases=nounphrases) return verb_noun_phrases, sources
def ask_if_user_thinks_that_gaming_is_unhealthy_response(vars): response = ( "It is known that people who play computer games too much can have health problems, " "both physical and emotional. Do you agree?") human_uttr = state_utils.get_last_human_utterance(vars) entities = get_entities(human_uttr, only_named=True) logger.info( f"(ask_if_user_thinks_that_gaming_is_unhealthy_response)entities: {entities}" ) bot_text = state_utils.get_last_bot_utterance(vars).get("text", "").lower() flags_set = False if not if_chat_about_particular_topic( human_uttr, compiled_pattern=VIDEO_GAME_WORDS_COMPILED_PATTERN): flags_set, response = common_nlg.maybe_set_confidence_and_continue_based_on_previous_bot_phrase( vars, bot_text, response) if not flags_set: if entities: state_utils.set_confidence( vars, confidence=common_nlg.CONF_092_CAN_CONTINUE) state_utils.set_can_continue( vars, continue_flag=common_constants.CAN_CONTINUE_SCENARIO) else: state_utils.set_confidence(vars, confidence=common_nlg.CONF_1) state_utils.set_can_continue( vars, continue_flag=common_constants.MUST_CONTINUE) return response
def get_nounphrases_from_human_utterance(ctx: Context, actor: Actor): nps = common_utils.get_entities( get_last_human_utterance(ctx, actor), only_named=False, with_labels=False, ) return nps
def get_named_entities(utt): entities = [] for ent in get_entities(utt, only_named=True, with_labels=False): if ent not in UNIGRAMS and not (ent == "alexa" and utt["text"].lower()[:5] == "alexa"): entities.append(ent) return entities
def get_named_entities_from_human_utterance(ctx: Context, actor: Actor): # ent is a dict! ent = {"text": "London":, "type": "LOC"} entities = common_utils.get_entities( get_last_human_utterance(ctx, actor), only_named=True, with_labels=True, ) return entities
def extract_possible_names(annotated_utterance, only_named, with_labels): entities = get_entities( annotated_utterance, only_named=only_named, with_labels=with_labels, ) if not only_named: nounphrases = annotated_utterance["annotations"].get( "spacy_nounphrases", []) if with_labels: nounphrases = [{"text": np, "label": "misc"} for np in nounphrases] entities += nounphrases return entities
def el_formatter_dialog(dialog: Dict): # Used by: entity_linking annotator num_last_utterances = 2 ner_output = get_entities(dialog["human_utterances"][-1], only_named=True, with_labels=True) nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) entity_substr_list = [] if ner_output: for entity in ner_output: if entity and isinstance( entity, dict ) and "text" in entity and entity["text"].lower() != "alexa": entity_substr_list.append(entity["text"]) entity_substr_lower_list = { entity_substr.lower() for entity_substr in entity_substr_list } dialog = utils.get_last_n_turns(dialog, bot_last_turns=1) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") context = [[ uttr["text"] for uttr in dialog["utterances"][-num_last_utterances:] ]] if nounphrases: entity_substr_list += [ nounphrase for nounphrase in nounphrases if nounphrase.lower() not in entity_substr_lower_list ] entity_substr_list = list(set(entity_substr_list)) return [{ "entity_substr": [entity_substr_list], "template": [""], "context": context }]
def add_bot_encounters(self, human_utters, bot_utters, human_utter_index): bot_utter = bot_utters[0] if bot_utters else {} entities = get_entities(bot_utter, only_named=False, with_labels=False) entities = [ ent for ent in entities if self.name in wnl.lemmatize(ent, "n") ] active_skill = bot_utter.get("active_skill", "pre_start") for entity in entities: bee = BotEntityEncounter( human_utterance_index=human_utter_index, full_name=entity, skill_name=active_skill, ) self.bot_encounters.append(bee)
def fav_food_check(vars): flag = False user_fav_food = get_entities(state_utils.get_last_human_utterance(vars), only_named=False, with_labels=False) # cobot_topic = "Food_Drink" in get_topics(state_utils.get_last_human_utterance(vars), which="cobot_topics") food_words_search = bool(re.search(FOOD_WORDS_RE, state_utils.get_last_human_utterance(vars)["text"])) if all( [ any([user_fav_food, check_conceptnet(vars), food_words_search]), # condition_utils.no_requests(vars), not bool(re.search(NO_WORDS_RE, state_utils.get_last_human_utterance(vars)["text"])), not dont_want_talk(vars), ] ): flag = True logger.info(f"fav_food_check {flag}") return flag
def fav_food_request(ngrams, vars): flag = False user_fav_food = get_entities(state_utils.get_last_human_utterance(vars), only_named=False, with_labels=False) food_words_search = re.search( FOOD_WORDS_RE, state_utils.get_last_human_utterance(vars)["text"]) if all([ any([user_fav_food, check_conceptnet(vars), food_words_search]), condition_utils.no_requests(vars), not re.search(NO_WORDS_RE, state_utils.get_last_human_utterance(vars)["text"]), ]): flag = True logger.info(f"fav_food_request {flag}") return flag
def generate_acknowledgement_response(ctx: Context) -> REPLY_TYPE: """Generate acknowledgement for human questions. Returns: string acknowledgement (templated acknowledgement from `midas_acknowledgements.json` file, confidence (default ACKNOWLEDGEMENT_CONF), human attributes (empty), bot attributes (empty), attributes (with response parts set to acknowledgement) """ dialog = ctx.misc["agent"]["dialog"] curr_intents = get_current_intents(dialog["human_utterances"][-1]) curr_considered_intents = [ intent for intent in curr_intents if intent in MIDAS_INTENT_ACKNOWLEDGEMENTS ] ackn_response = "" attr = {} curr_human_entities = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) contains_question = is_any_question_sentence_in_utterance( dialog["human_utterances"][-1]) # we generate acknowledgement ONLY if we have some entities! if curr_considered_intents and len( curr_human_entities) and contains_question: # can generate acknowledgement ackn_response, attr = generate_acknowledgement( dialog["human_utterances"][-1], curr_intents, curr_considered_intents) elif contains_question: ackn_response = random.choice(MANY_INTERESTING_QUESTIONS) attr = {"response_parts": ["acknowledgement"]} elif not contains_question and "opinion" in curr_considered_intents: ackn_response = get_midas_intent_acknowledgement("opinion", "") return ackn_response, ACKNOWLEDGEMENT_CONF, {}, {}, attr
def generate_acknowledgement(last_human_utterances, curr_intents, curr_considered_intents): ackn_response = "" is_need_nounphrase_intent = any( [intent in curr_intents for intent in ["open_question_opinion"]]) if is_need_nounphrase_intent: curr_nounphrase = get_entities(last_human_utterances, only_named=False, with_labels=False) curr_nounphrase = curr_nounphrase[-1] if len( curr_nounphrase) > 0 and curr_nounphrase[-1] else "" if curr_nounphrase: ackn_response = get_midas_intent_acknowledgement( curr_considered_intents[-1], curr_nounphrase) else: # to reformulate question, we take only the last human sentence last_human_sent = get_last_human_sent(last_human_utterances) curr_reformulated_question = reformulate_question_to_statement( last_human_sent) ackn_response = get_midas_intent_acknowledgement( curr_considered_intents[-1], curr_reformulated_question) attr = {"response_parts": ["acknowledgement"]} return ackn_response, attr
def extract_movies_names_from_annotations(annotated_uttr, check_full_utterance=False): movies_titles = None if "entity_detection" in annotated_uttr["annotations"]: movies_titles = [] entities = get_entities(annotated_uttr, only_named=False, with_labels=True) for ent in entities: if ent.get("label", "") == "videoname": movies_titles += [ent["text"]] # for now let's remove full utterance check but add entity_linking usage! if not movies_titles: # either None or empty list if "wiki_parser" in annotated_uttr["annotations"]: movies_titles = [] for ent_name, ent_dict in annotated_uttr["annotations"][ "wiki_parser"].get("entities_info", {}).items(): instance_of_types = [ el[0] for el in ent_dict.get("instance of", []) ] instance_of_types += [ el[0] for el in ent_dict.get("types_2hop", []) ] if (len( set(instance_of_types).intersection( set(topic_types["film"]))) > 0 and ent_dict.get("token_conf", 0.0) >= 0.5 and ent_dict.get("conf", 0.0) >= 0.5): movies_titles += [ ent_dict.get("entity_label", ent_name).lower() ] # if check_full_utterance: # movies_titles += [re.sub(r"[\.\?,!]", "", annotated_uttr["text"]).strip()] return movies_titles
def tag_based_response_selection(dialog, candidates, scores, confidences, bot_utterances, all_prev_active_skills=None): all_prev_active_skills = all_prev_active_skills if all_prev_active_skills is not None else [] all_prev_active_skills = Counter(all_prev_active_skills) annotated_uttr = dialog["human_utterances"][-1] all_user_intents, all_user_topics, all_user_named_entities, all_user_nounphrases = get_main_info_annotations( annotated_uttr) _is_switch_topic_request = is_switch_topic(annotated_uttr) _is_force_intent = any( [_intent in all_user_intents for _intent in FORCE_INTENTS_IC.keys()]) # if user utterance contains any question (REGEXP & punctuation check!) _is_require_action_intent = is_any_question_sentence_in_utterance({ "text": annotated_uttr.get("annotations", {}).get("sentseg", {}).get("punct_sent", annotated_uttr["text"]) }) # if user utterance contains any question AND requires some intent by socialbot _is_require_action_intent = _is_require_action_intent and any([ _intent in all_user_intents for _intent in REQUIRE_ACTION_INTENTS.keys() ]) _force_intents_detected = [ _intent for _intent in FORCE_INTENTS_IC.keys() if _intent in all_user_intents ] # list of user intents which require some action by socialbot _require_action_intents_detected = [ _intent for _intent in REQUIRE_ACTION_INTENTS.keys() if _intent in all_user_intents ] _force_intents_skills = sum([ FORCE_INTENTS_IC.get(_intent, []) for _intent in _force_intents_detected ], []) # list of intents required by the socialbot _required_actions = sum([ REQUIRE_ACTION_INTENTS.get(_intent, []) for _intent in _require_action_intents_detected ], []) _contains_entities = len( get_entities(annotated_uttr, only_named=False, with_labels=False)) > 0 _is_active_skill_can_not_continue = False _prev_bot_uttr = dialog["bot_utterances"][-1] if len( dialog["bot_utterances"]) > 0 else {} _prev_active_skill = dialog["bot_utterances"][-1]["active_skill"] if len( dialog["bot_utterances"]) > 0 else "" _prev_prev_active_skill = dialog["bot_utterances"][-2][ "active_skill"] if len(dialog["bot_utterances"]) > 1 else "" _no_script_two_times_in_a_row = False if _prev_active_skill and _prev_prev_active_skill: if all([ skill not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS for skill in [_prev_active_skill, _prev_prev_active_skill] ]): _no_script_two_times_in_a_row = True disliked_skills = get_updated_disliked_skills( dialog, can_not_be_disliked_skills=CAN_NOT_BE_DISLIKED_SKILLS) _is_dummy_linkto_available = any([ cand_uttr["skill_name"] == "dummy_skill" and cand_uttr.get("type", "") == "link_to_for_response_selector" for cand_uttr in candidates ]) categorized_hyps = {} categorized_prompts = {} for dasuffix in ["reqda", ""]: for actsuffix in ["active", "continued", "finished"]: for suffix in [ "same_topic_entity_no_db", "same_topic_entity_db", "othr_topic_entity_no_db", "othr_topic_entity_db", ]: categorized_hyps[f"{actsuffix}_{suffix}_{dasuffix}"] = [] categorized_prompts[f"{actsuffix}_{suffix}_{dasuffix}"] = [] CASE = "" acknowledgement_hypothesis = {} for cand_id, cand_uttr in enumerate(candidates): if confidences[cand_id] == 0.0 and cand_uttr[ "skill_name"] not in ACTIVE_SKILLS: logger.info( f"Dropping cand_id: {cand_id} due to toxicity/badlists") continue all_cand_intents, all_cand_topics, all_cand_named_entities, all_cand_nounphrases = get_main_info_annotations( cand_uttr) skill_name = cand_uttr["skill_name"] _is_dialog_abandon = get_dialog_breakdown_annotations( cand_uttr) and PRIORITIZE_NO_DIALOG_BREAKDOWN _is_just_prompt = (cand_uttr["skill_name"] == "dummy_skill" and any([ question_type in cand_uttr.get("type", "") for question_type in ["normal_question", "link_to_for_response_selector"] ])) or cand_uttr.get("response_parts", []) == ["prompt"] if cand_uttr["confidence"] == 1.0: # for those hypotheses where developer forgot to set tag to MUST_CONTINUE cand_uttr["can_continue"] = MUST_CONTINUE _can_continue = cand_uttr.get("can_continue", CAN_NOT_CONTINUE) _user_wants_to_chat_about_topic = ( if_chat_about_particular_topic(annotated_uttr) and "about it" not in annotated_uttr["text"].lower()) _user_does_not_want_to_chat_about_topic = if_not_want_to_chat_about_particular_topic( annotated_uttr) _user_wants_bot_to_choose_topic = if_choose_topic( annotated_uttr, _prev_bot_uttr) if any([ phrase.lower() in cand_uttr["text"].lower() for phrase in LINK_TO_PHRASES ]): # add `prompt` to response_parts if any linkto phrase in hypothesis cand_uttr["response_parts"] = cand_uttr.get("response_parts", []) + ["prompt"] # identifies if candidate contains named entities from last human utterance _same_named_entities = (len( get_common_tokens_in_lists_of_strings( all_cand_named_entities, all_user_named_entities)) > 0) # identifies if candidate contains all (not only named) entities from last human utterance _same_nounphrases = len( get_common_tokens_in_lists_of_strings(all_cand_nounphrases, all_user_nounphrases)) > 0 _same_topic_entity = (_same_named_entities or _same_nounphrases ) and PRIORITIZE_WITH_SAME_TOPIC_ENTITY _is_active_skill = (_prev_active_skill == cand_uttr["skill_name"] or cand_uttr.get("can_continue", "") == MUST_CONTINUE) _is_active_skill = _is_active_skill and skill_name in ACTIVE_SKILLS _is_active_skill = _is_active_skill and (_can_continue in [ MUST_CONTINUE, CAN_CONTINUE_SCENARIO, CAN_NOT_CONTINUE ] or (_can_continue == CAN_CONTINUE_PROMPT and all_prev_active_skills.get(skill_name, []) < 10)) _is_active_skill = _is_active_skill and PRIORITIZE_SCRIPTED_SKILLS if _is_active_skill: # we will forcibly add prompt if current scripted skill finishes scenario, # and has no opportunity to continue at all. _is_active_skill_can_not_continue = _is_active_skill and _can_continue in [ CAN_NOT_CONTINUE ] if _is_force_intent: # =====force intents, choose as best_on_topic hypotheses from skills responding this request===== CASE = "Force intent." if cand_uttr["skill_name"] in _force_intents_skills: categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _is_switch_topic_request or _user_does_not_want_to_chat_about_topic or _user_wants_bot_to_choose_topic: # =====direct request by user to switch the topic of current conversation===== # give priority to dummy linkto hypothesis if available, else other prompts if available. _is_active_skill = ( cand_uttr.get("type", "") == "link_to_for_response_selector" if _is_dummy_linkto_available else _is_just_prompt) # no priority to must_continue to skip incorrect continuation of script _can_continue = CAN_CONTINUE_SCENARIO if _can_continue == MUST_CONTINUE else _can_continue CASE = "Switch topic intent." if len(all_user_named_entities) > 0 or len( all_user_nounphrases) > 0: # -----user defines new topic/entity----- # _same_topic_entity does not depend on hyperparameter in these case _same_topic_entity = _same_named_entities or _same_nounphrases categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) else: # -----user want socialbot to define new topic/entity----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _user_wants_to_chat_about_topic: # user wants to chat about particular topic CASE = "User wants to talk about topic." # in this case we do not give priority to previously active skill (but give to must continue skill!) # because now user wants to talk about something particular _is_active_skill = cand_uttr.get("can_continue", "") == MUST_CONTINUE # _same_topic_entity does not depend on hyperparameter in these case _same_topic_entity = _same_named_entities or _same_nounphrases categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _is_require_action_intent and PRIORITIZE_WITH_REQUIRED_ACT: # =====user intent requires particular action===== CASE = "User intent requires action. USER UTTERANCE CONTAINS QUESTION." _is_grounding_reqda = (skill_name == "dff_grounding_skill" and cand_uttr.get( "type", "") == "universal_response") _is_active_skill = cand_uttr.get( "can_continue", "") == MUST_CONTINUE # no priority to prev active skill _can_continue = CAN_NOT_CONTINUE # no priority to scripted skills if set(all_cand_intents).intersection( set(_required_actions )) or _is_grounding_reqda or _is_active_skill: # -----one of the can intent is in intents required by user----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=True, ) else: # -----NO required dialog acts----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) else: # =====user intent does NOT require particular action===== CASE = "General case." categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) # a bit of rule based help if (len(dialog["human_utterances"]) == 1 and cand_uttr["skill_name"] == "dff_friendship_skill" and greeting_spec in cand_uttr["text"]): categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif (cand_uttr["skill_name"] == "dff_friendship_skill" and (how_are_you_spec in cand_uttr["text"] or what_i_can_do_spec in cand_uttr["text"]) and len(dialog["utterances"]) < 16): categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) # elif cand_uttr["skill_name"] == 'program_y_dangerous' and cand_uttr['confidence'] == 0.98: # categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif cand_uttr[ "skill_name"] == "small_talk_skill" and is_sensitive_situation( dialog["human_utterances"][-1]): # let small talk to talk about sex ^_^ categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif cand_uttr["confidence"] >= 1.0: # -------------------- SUPER CONFIDENCE CASE HERE! -------------------- categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) if cand_uttr["skill_name"] == "dff_grounding_skill" and [ "acknowledgement" ] == cand_uttr.get("response_parts", []): acknowledgement_hypothesis = deepcopy(cand_uttr) logger.info(f"Current CASE: {CASE}") # now compute current scores as one float value curr_single_scores = compute_curr_single_scores(candidates, scores, confidences) # remove disliked skills from hypotheses if IGNORE_DISLIKED_SKILLS: for category in categorized_hyps: new_ids = [] for cand_id in categorized_hyps[category]: if (candidates[cand_id]["skill_name"] in disliked_skills and candidates[cand_id].get("can_continue", CAN_NOT_CONTINUE) == MUST_CONTINUE): disliked_skills.remove(candidates[cand_id]["skill_name"]) if candidates[cand_id]["skill_name"] not in disliked_skills: new_ids.append(cand_id) categorized_hyps[category] = deepcopy(new_ids) for category in categorized_prompts: new_ids = [] for cand_id in categorized_prompts[category]: if (candidates[cand_id]["skill_name"] in disliked_skills and candidates[cand_id].get("can_continue", CAN_NOT_CONTINUE) == MUST_CONTINUE): disliked_skills.remove(candidates[cand_id]["skill_name"]) if candidates[cand_id]["skill_name"] not in disliked_skills: new_ids.append(cand_id) categorized_prompts[category] = deepcopy(new_ids) best_cand_id = pickup_best_id(categorized_hyps, candidates, curr_single_scores, bot_utterances) best_candidate = candidates[best_cand_id] best_candidate["human_attributes"] = best_candidate.get( "human_attributes", {}) # save updated disliked skills to human attributes of the best candidate best_candidate["human_attributes"]["disliked_skills"] = disliked_skills logger.info(f"Best candidate: {best_candidate}") n_sents_without_prompt = len(sent_tokenize(best_candidate["text"])) _is_best_not_script = best_candidate[ "skill_name"] not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS no_question_by_user = "******" not in dialog["human_utterances"][-1][ "annotations"].get("sentseg", {}).get("punct_sent", dialog["human_utterances"][-1]["text"]) # if `no` to 1st in a row linkto question, and chosen response is not from scripted skill _no_to_first_linkto = is_no(dialog["human_utterances"][-1]) and any([ phrase.lower() in _prev_bot_uttr.get("text", "").lower() for phrase in LINK_TO_PHRASES ]) # if chosen short response or question by not-scripted skill _is_short_or_question_by_not_script = _is_best_not_script and ( "?" in best_candidate["text"] or len(best_candidate["text"].split()) < 4) _no_questions_for_3_steps = not any([ is_any_question_sentence_in_utterance(uttr) for uttr in dialog["bot_utterances"][-3:] ]) if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: if (_no_script_two_times_in_a_row and _is_short_or_question_by_not_script and no_question_by_user) or (_no_to_first_linkto and _is_best_not_script): # if no scripted skills 2 time sin a row before, current chosen best cand is not scripted, contains `?`, # and user utterance does not contain "?", replace utterance with dummy! best_prompt_id = pickup_best_id(categorized_prompts, candidates, curr_single_scores, bot_utterances) best_candidate = deepcopy(candidates[best_prompt_id]) best_cand_id = best_prompt_id if does_not_require_prompt(candidates, best_cand_id): # the candidate already contains a prompt or a question or of a length more than 200 symbols logger.info( "Best candidate contains prompt, question, request or length of > 200 symbols. Do NOT add prompt." ) pass elif sum(categorized_prompts.values(), []): # best cand is 3d times in a row not scripted skill, let's append linkto # need to add some prompt, and have a prompt _add_prompt_forcibly = best_candidate[ "skill_name"] == _prev_active_skill and _is_active_skill_can_not_continue _add_prompt_forcibly = _add_prompt_forcibly and not _contains_entities # prompts are added: # - in 1 out of 10 cases, if current human utterance does not contain entities, # and no prompt for several last bot utterances # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and current utterance is from active on prev step scripted skill and # it has a status can-not-continue # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and last 2 bot uttr are not from scripted skill, # and current best uttr is also from not-scripted skill if ((prompt_decision() and not _contains_entities and _no_questions_for_3_steps) or (_add_prompt_forcibly and PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS) or (PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and _no_script_two_times_in_a_row and _is_best_not_script)): logger.info("Decided to add a prompt to the best candidate.") best_prompt_id = pickup_best_id(categorized_prompts, candidates, curr_single_scores, bot_utterances) # as we have only one active skill, let's consider active skill as that one providing prompt # but we also need to reassign all the attributes best_prompt = candidates[best_prompt_id] best_candidate[ "text"] = f'{best_candidate["text"]} {best_prompt["text"]}' best_candidate["attributes"] = best_candidate.get("attributes", {}) best_candidate["attributes"]["prompt_skill"] = best_prompt # anyway we must combine used links best_candidate["human_attributes"] = best_candidate.get( "human_attributes", {}) best_candidate["human_attributes"] = join_used_links_in_attributes( best_candidate["human_attributes"], best_prompt.get("human_attributes", {})) if len(best_candidate["human_attributes"]["used_links"]) == 0: best_candidate["human_attributes"].pop("used_links") was_ackn = if_acknowledgement_in_previous_bot_utterance(dialog) best_resp_cont_ackn = "acknowledgement" in best_candidate.get( "response_parts", []) if (ADD_ACKNOWLEDGMENTS_IF_POSSIBLE and acknowledgement_hypothesis and acknowledgement_decision(all_user_intents) and n_sents_without_prompt == 1 and not was_ackn and not best_resp_cont_ackn): logger.info( "Acknowledgement is given, Final hypothesis contains only 1 sentence, no ackn in prev bot uttr," "and we decided to add an acknowledgement to the best candidate.") best_candidate[ "text"] = f'{acknowledgement_hypothesis["text"]} {best_candidate["text"]}' best_candidate["response_parts"] = ["acknowledgement" ] + best_candidate.get( "response_parts", []) return best_candidate, best_cand_id, curr_single_scores
def get_entities_with_attitudes(annotated_uttr: dict, prev_annotated_uttr: dict): entities_with_attitudes = {"like": [], "dislike": []} all_entities = get_entities(annotated_uttr, only_named=False, with_labels=False) all_prev_entities = get_entities(prev_annotated_uttr, only_named=False, with_labels=False) logger.info( f"Consider all curr entities: {all_entities}, and all previous entities: {all_prev_entities}" ) curr_entity = all_entities[0] if all_entities else "" prev_entity = all_prev_entities[-1] if all_prev_entities else "" curr_uttr_text = annotated_uttr.get("text", "") prev_uttr_text = prev_annotated_uttr.get("text", "") curr_sentiment = get_sentiment(annotated_uttr, probs=False, default_labels=["neutral"])[0] current_first_sentence = (annotated_uttr.get("annotations", {}).get( "sentseg", {}).get("segments", [curr_uttr_text])[0]) if "?" in current_first_sentence: pass elif WHAT_FAVORITE_PATTERN.search(prev_uttr_text): # what is your favorite ..? - animals -> `like animals` entities_with_attitudes["like"] += [curr_entity] elif WHAT_LESS_FAVORITE_PATTERN.search(prev_uttr_text): # what is your less favorite ..? - animals -> `dislike animals` entities_with_attitudes["dislike"] += [curr_entity] elif DO_YOU_LOVE_PATTERN.search(prev_uttr_text): if is_no(annotated_uttr): # do you love .. animals? - no -> `dislike animals` entities_with_attitudes["dislike"] += [prev_entity] elif is_yes(annotated_uttr): # do you love .. animals? - yes -> `like animals` entities_with_attitudes["like"] += [prev_entity] elif DO_YOU_HATE_PATTERN.search(prev_uttr_text): if is_no(annotated_uttr): # do you hate .. animals? - no -> `like animals` entities_with_attitudes["like"] += [prev_entity] elif is_yes(annotated_uttr): # do you hate .. animals? - yes -> `dislike animals` entities_with_attitudes["dislike"] += [prev_entity] elif I_HATE_PATTERN.search(curr_uttr_text): # i hate .. animals -> `dislike animals` entities_with_attitudes["dislike"] += [curr_entity] elif I_LOVE_PATTERN.search(curr_uttr_text) or MY_FAVORITE_PATTERN.search( curr_uttr_text): # i love .. animals -> `like animals` entities_with_attitudes["like"] += [curr_entity] elif if_chat_about_particular_topic( annotated_uttr, prev_annotated_uttr=prev_annotated_uttr, key_words=[curr_entity]): # what do you want to chat about? - ANIMALS -> `like animals` entities_with_attitudes["like"] += [curr_entity] elif if_not_want_to_chat_about_particular_topic( annotated_uttr, prev_annotated_uttr=prev_annotated_uttr): # i don't wanna talk about animals -> `dislike animals` entities_with_attitudes["dislike"] += [curr_entity] elif WHAT_DO_YOU_THINK_PATTERN.search(prev_uttr_text): if curr_sentiment == "negative": # what do you thank .. animals? - negative -> `dislike animals` entities_with_attitudes["dislike"] += [prev_entity] elif curr_sentiment == "positive": # what do you thank .. animals? - positive -> `like animals` entities_with_attitudes["like"] += [prev_entity] entities_with_attitudes["like"] = [ el for el in entities_with_attitudes["like"] if el ] entities_with_attitudes["dislike"] = [ el for el in entities_with_attitudes["dislike"] if el ] return entities_with_attitudes
def food_fact_response(vars): acknowledgements = [ "I like it too.", "I'm not fond of it.", "It's awesome.", "Fantastic.", "Loving it.", "Yummy!" ] human_utt = state_utils.get_last_human_utterance(vars) annotations = human_utt["annotations"] human_utt_text = human_utt["text"].lower() bot_utt_text = state_utils.get_last_bot_utterance(vars)["text"].lower() fact = "" berry_name = "" entity = "" facts = annotations.get("fact_random", []) if "berry" in bot_utt_text: berry_names = get_entities(state_utils.get_last_human_utterance(vars), only_named=False, with_labels=False) if berry_names: berry_name = berry_names[0] if all([ "berry" not in human_utt_text, len(human_utt_text.split()) == 1, berry_name ]): berry_name += "berry" fact = get_fact(berry_name, f"fact about {berry_name}") entity = berry_name elif berry_name: if facts: fact = facts[0].get("fact", "") entity = facts[0].get("entity_substr", "") else: if facts: fact = facts[0].get("fact", "") entity = facts[0].get("entity_substr", "") try: state_utils.set_confidence(vars, confidence=CONF_MIDDLE) if re.search(DONOTKNOW_LIKE_RE, human_utt_text): state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE) return error_response(vars) # "I have never heard about it. Could you tell me more about that please." elif (not fact) and check_conceptnet(vars): state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) return "I haven't tried yet. Why do you like it?" elif not fact: state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE) return error_response(vars) elif fact and entity: state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) return f"{entity}. {random.choice(acknowledgements)} {fact}" elif fact: state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) return f"Okay. {fact}" else: state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE) return error_response(vars) except Exception as exc: logger.exception(exc) sentry_sdk.capture_exception(exc) return error_response(vars)
async def send(self, payload: Dict, callback: Callable): try: st_time = time.time() dialog = deepcopy(payload["payload"]["dialogs"][0]) is_sensitive_case = is_sensitive_situation(dialog["human_utterances"][-1]) all_prev_active_skills = payload["payload"]["all_prev_active_skills"][0] curr_topics = get_topics(dialog["human_utterances"][-1], which="cobot_topics") curr_nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) if len(curr_topics) == 0: curr_topics = ["Phatic"] logger.info(f"Found topics: {curr_topics}") for i in range(len(curr_nounphrases)): np = re.sub(np_remove_expr, "", curr_nounphrases[i]) np = re.sub(rm_spaces_expr, " ", np) if re.search(np_ignore_expr, np): curr_nounphrases[i] = "" else: curr_nounphrases[i] = np.strip() curr_nounphrases = [np for np in curr_nounphrases if len(np) > 0] logger.info(f"Found nounphrases: {curr_nounphrases}") cands = [] confs = [] human_attrs = [] bot_attrs = [] attrs = [] cands += [choice(donotknow_answers)] confs += [0.5] attrs += [{"type": "dummy"}] human_attrs += [{}] bot_attrs += [{}] if len(dialog["utterances"]) > 14 and not is_sensitive_case: questions_same_nps = [] for i, nphrase in enumerate(curr_nounphrases): for q_id in NP_QUESTIONS.get(nphrase, []): questions_same_nps += [QUESTIONS_MAP[str(q_id)]] if len(questions_same_nps) > 0: logger.info("Found special nounphrases for questions. Return question with the same nounphrase.") cands += [choice(questions_same_nps)] confs += [0.5] attrs += [{"type": "nounphrase_question"}] human_attrs += [{}] bot_attrs += [{}] link_to_question, human_attr = get_link_to_question(dialog, all_prev_active_skills) if link_to_question: _prev_bot_uttr = dialog["bot_utterances"][-2]["text"] if len(dialog["bot_utterances"]) > 1 else "" _bot_uttr = dialog["bot_utterances"][-1]["text"] if len(dialog["bot_utterances"]) > 0 else "" _prev_active_skill = ( dialog["bot_utterances"][-1]["active_skill"] if len(dialog["bot_utterances"]) > 0 else "" ) _no_to_first_linkto = any([phrase in _bot_uttr for phrase in LINK_TO_PHRASES]) _no_to_first_linkto = _no_to_first_linkto and all( [phrase not in _prev_bot_uttr for phrase in LINK_TO_PHRASES] ) _no_to_first_linkto = _no_to_first_linkto and is_no(dialog["human_utterances"][-1]) _no_to_first_linkto = _no_to_first_linkto and _prev_active_skill != "dff_friendship_skill" _if_switch_topic = is_switch_topic(dialog["human_utterances"][-1]) bot_uttr_dict = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) > 0 else {} _if_choose_topic = if_choose_topic(dialog["human_utterances"][-1], bot_uttr_dict) _is_ask_me_something = ASK_ME_QUESTION_PATTERN.search(dialog["human_utterances"][-1]["text"]) if len(dialog["human_utterances"]) > 1: _was_cant_do = "cant_do" in get_intents(dialog["human_utterances"][-2]) and ( len(curr_nounphrases) == 0 or is_yes(dialog["human_utterances"][-1]) ) _was_cant_do_stop_it = "cant_do" in get_intents(dialog["human_utterances"][-2]) and is_no( dialog["human_utterances"][-1] ) else: _was_cant_do = False _was_cant_do_stop_it = False if _was_cant_do_stop_it: link_to_question = "Sorry, bye! #+#exit" confs += [1.0] # finish dialog request elif _no_to_first_linkto: confs += [0.99] elif _is_ask_me_something or _if_switch_topic or _was_cant_do or _if_choose_topic: confs += [1.0] # Use it only as response selector retrieve skill output modifier else: confs += [0.05] # Use it only as response selector retrieve skill output modifier cands += [link_to_question] attrs += [{"type": "link_to_for_response_selector"}] human_attrs += [human_attr] bot_attrs += [{}] facts_same_nps = [] for i, nphrase in enumerate(curr_nounphrases): for fact_id in NP_FACTS.get(nphrase, []): facts_same_nps += [ f"Well, now that you've mentioned {nphrase}, I've remembered this. {FACTS_MAP[str(fact_id)]}. " f"{(opinion_request_question() if random.random() < ASK_QUESTION_PROB else '')}" ] if len(facts_same_nps) > 0 and not is_sensitive_case: logger.info("Found special nounphrases for facts. Return fact with the same nounphrase.") cands += [choice(facts_same_nps)] confs += [0.5] attrs += [{"type": "nounphrase_fact"}] human_attrs += [{}] bot_attrs += [{}] total_time = time.time() - st_time logger.info(f"dummy_skill exec time: {total_time:.3f}s") asyncio.create_task( callback(task_id=payload["task_id"], response=[cands, confs, human_attrs, bot_attrs, attrs]) ) except Exception as e: logger.exception(e) sentry_sdk.capture_exception(e) asyncio.create_task(callback(task_id=payload["task_id"], response=e))
def respond(): print("response generation started") st_time = time.time() dialogs_batch = request.json["dialogs"] # following 3 lists have len = number of samples going to the model annotations_depths = [] dial_ids = [] input_batch = [] # following 4 lists have len = len(dialogs_batch) entities = [] lets_chat_about_flags = [] nounphrases = [] special_intents_flags = [] chosen_topics = {} for d_id, dialog in enumerate(dialogs_batch): try: user_input_text = dialog["human_utterances"][-1]["text"] bot_uttr = dialog["bot_utterances"][-1] if len( dialog["bot_utterances"]) > 0 else {} switch_choose_topic = if_choose_topic( dialog["human_utterances"][-1], bot_uttr) # spacy_nounphrases spacy_nounphrases = get_spacy_nounphrases( dialog["human_utterances"][-1]) nounphrases.append( re.compile(join_sentences_in_or_pattern(spacy_nounphrases), re. IGNORECASE) if spacy_nounphrases else "") # entities curr_ents = get_named_entities(dialog["human_utterances"][-1]) entities.append( re.compile(join_sentences_in_or_pattern(curr_ents), re. IGNORECASE) if curr_ents else "") # intents lets_chat_about_flag, special_intents_flag = get_intents_flags( dialog["human_utterances"][-1]) lets_chat_about_flags.append(lets_chat_about_flag) special_intents_flags.append(special_intents_flag) anntr_history_len = DEFAULT_ANNTR_HISTORY_LEN bot_uttrs_for_dff_check = dialog["bot_utterances"][-2:] if len( dialog["bot_utterances"]) > 1 else [] dffs_flag = check_dffs(bot_uttrs_for_dff_check) if lets_chat_about_flag or switch_choose_topic: anntr_history_len = 0 elif dffs_flag: anntr_history_len = DFF_ANNTR_HISTORY_LEN # if detected lets_chat is about topic from the file lets_chat_topic = get_lets_chat_topic( lets_chat_about_flag, dialog["human_utterances"][-1]) # if prev skill == news_api_skill get news description and create knowledge fact news_api_fact = get_news_api_fact( bot_uttr, dialog["human_utterances"], not (switch_choose_topic or lets_chat_about_flag)) # start creating data for kg service user_input_history = "\n".join( [i["text"] for i in dialog["utterances"]]) annotators = { # "odqa": "answer_sentence", # "kbqa": "answer" } if not switch_choose_topic: user_input_knowledge, annotations_depth = get_knowledge_from_annotators( annotators, dialog["utterances"], anntr_history_len) else: user_input_knowledge = "" annotations_depth = {} # add nounphrases and entities to the knowledge if user_input_knowledge: user_input_checked_sentence = ( space_join(spacy_nounphrases) + space_join(curr_ents) + tokenize.sent_tokenize(user_input_knowledge)[0]) else: user_input_checked_sentence = "" if user_input_knowledge: user_input = { "checked_sentence": user_input_checked_sentence, "knowledge": user_input_knowledge, "text": user_input_text, "history": user_input_history, } annotations_depths.append(annotations_depth) dial_ids.append(d_id) input_batch.append(user_input) retrieved_facts = get_annotations_from_dialog( dialog["utterances"][-anntr_history_len * 2 - 1:], "fact_retrieval") if retrieved_facts: for depth, fact in retrieved_facts[-TOP_N_FACTS:]: user_input = { "checked_sentence": fact, "knowledge": fact, "text": user_input_text, "history": user_input_history, } input_batch.append(user_input) annotations_depths.append({"retrieved_fact": depth}) dial_ids.append(d_id) if any( [switch_choose_topic, lets_chat_topic, lets_chat_about_flag]): if lets_chat_topic: fact = random.sample(TOPICS_FACTS[lets_chat_topic], 1)[0] chosen_topics[d_id] = lets_chat_topic _chosen_topic_fact = "lets_chat_cobot_da" elif not get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False): topic = random.sample(TOPICS_FACTS.keys(), 1)[0] fact = random.sample(TOPICS_FACTS[topic], 1)[0] chosen_topics[d_id] = topic _chosen_topic_fact = "switch_random" else: fact = "" if fact: user_input = { "checked_sentence": fact, "knowledge": fact, "text": user_input_text, "history": user_input_history, "chosen_topic_fact": _chosen_topic_fact, } input_batch.append(user_input) annotations_depths.append({}) dial_ids.append(d_id) if news_api_fact: user_input = { "checked_sentence": news_api_fact, "knowledge": news_api_fact, "text": user_input_text, "history": user_input_history, "news_api_fact": True, } input_batch.append(user_input) annotations_depths.append({}) dial_ids.append(d_id) fact_random_facts = get_fact_random( dialog["utterances"][-anntr_history_len * 2 - 1:]) if fact_random_facts: user_input = { "checked_sentence": fact_random_facts[-1][1], "knowledge": fact_random_facts[-1][1], "text": user_input_text, "history": user_input_history, "fact_random_fact": True, } input_batch.append(user_input) annotations_depths.append( {"fact_random": fact_random_facts[-1][0]}) dial_ids.append(d_id) user_news = get_news(dialog["human_utterances"][-1], "human") bot_news = get_news(dialog["human_utterances"][-1], "bot") # all_news = get_news(dialog["human_utterances"][-1], "all") if user_news: news_desc = user_news[-1].get("decsription", "") if news_desc: user_input = { "checked_sentence": news_desc, "knowledge": news_desc, "text": user_input_text, "history": user_input_history, "news_fact": "human ", } input_batch.append(user_input) annotations_depths.append({}) dial_ids.append(d_id) elif bot_news: news_desc = bot_news[-1].get("decsription", "") if news_desc: user_input = { "checked_sentence": news_desc, "knowledge": news_desc, "text": user_input_text, "history": user_input_history, "news_fact": "bot ", } input_batch.append(user_input) annotations_depths.append({}) dial_ids.append(d_id) # elif all_news: # user_input = { # 'checked_sentence': all_news[-1].get("decsription", ""), # 'knowledge': all_news[-1].get("decsription", ""), # 'text': user_input_text, # 'history': user_input_history, # 'news_fact': "all ", # 'news_title': all_news[-1].get("title", "") # } # input_batch.append(user_input) # annotations_depths.append({}) # dial_ids.append(d_id) except Exception as ex: sentry_sdk.capture_exception(ex) logger.exception(ex) try: raw_responses = [] if input_batch: logger.info(f"skill sends to service: {input_batch}") resp = requests.post(KNOWLEDGE_GROUNDING_SERVICE_URL, json={"batch": input_batch}, timeout=1.5) raw_responses = resp.json() logger.info(f"skill receives from service: {raw_responses}") else: responses = [[""]] confidences = [[0.0]] attributes = [[{}]] logger.info( f"Collected no hypotheses, exiting with {list(zip(responses, confidences, attributes))}" ) return jsonify(list(zip(responses, confidences, attributes))) dial_ids = np.array(dial_ids) attributes = [] confidences = [] responses = [] for i, dialog in enumerate(dialogs_batch): curr_attributes = [] curr_confidences = [] curr_responses = [] for curr_i in np.where(dial_ids == i)[0]: attr = { "knowledge_paragraph": input_batch[curr_i]["knowledge"], "knowledge_checked_sentence": input_batch[curr_i]["checked_sentence"], "can_continue": CAN_NOT_CONTINUE, "confidence_case": "", } already_was_active, short_long_response = get_penalties( dialog["bot_utterances"], raw_responses[curr_i]) curr_nounphrase_search = nounphrases[i].search( raw_responses[curr_i]) if nounphrases[i] else False curr_entities_search = entities[i].search( raw_responses[curr_i]) if entities[i] else False no_penalties = False fact_random_penalty = 0.0 topic = chosen_topics.get(i, "") chosen_topic_fact_flag = input_batch[curr_i].get( "chosen_topic_fact", "") curr_news_fact = input_batch[curr_i].get("news_fact", "") add_intro = "" if topic and chosen_topic_fact_flag: add_intro = f"Okay, Let's chat about {topic}. " confidence = HIGHEST_CONFIDENCE no_penalties = True attr[ "confidence_case"] += f"topic_fact: {chosen_topic_fact_flag} " attr["response_parts"] = ["prompt"] elif input_batch[curr_i].get("news_api_fact", ""): add_intro = random.choice([ "Sounds like ", "Seems like ", "Makes sense. ", # "Here's what I've heard: ", "Here's something else I've heard: ", "It reminds me that", "This comes to my mind: ", "", ]) no_penalties = True confidence = HIGHEST_CONFIDENCE attr["confidence_case"] += "news_api_fact " elif input_batch[curr_i].get("fact_random_fact", ""): fact_random_penalty = annotations_depths[curr_i].get( "fact_random", 0.0) confidence = DEFAULT_CONFIDENCE attr["confidence_case"] += "fact_random_fact " elif curr_news_fact: if curr_news_fact != "all": confidence = NOUNPHRASE_ENTITY_CONFIDENCE else: confidence = DEFAULT_CONFIDENCE curr_news_title = input_batch[curr_i].get( "news_title", "") if curr_news_title: add_intro = f"I have just read that {curr_news_title}. " attr["confidence_case"] += "news_fact: " + curr_news_fact elif (curr_nounphrase_search or curr_entities_search) and lets_chat_about_flags[i]: confidence = HIGHEST_CONFIDENCE attr[ "confidence_case"] += "nounphrase_entity_and_lets_chat_about " attr["response_parts"] = ["prompt"] elif curr_nounphrase_search or curr_entities_search: confidence = NOUNPHRASE_ENTITY_CONFIDENCE attr["confidence_case"] += "nounphrase_entity " elif lets_chat_about_flags[i]: confidence = LETS_CHAT_ABOUT_CONFIDENDENCE attr["confidence_case"] += "lets_chat_about " attr["response_parts"] = ["prompt"] else: confidence = DEFAULT_CONFIDENCE attr["confidence_case"] += "default " acronym_flag = ABBRS.search(raw_responses[curr_i]) if acronym_flag: confidence = ABBRS_CONFIDENCE attr["confidence_case"] += f"acronyms: {acronym_flag} " logger.debug(f"KG skill: found acronyms: {acronym_flag}") special_char_flag = special_char_re.search( raw_responses[curr_i]) if special_char_flag: confidence = HAS_SPEC_CHAR_CONFIDENCE attr["confidence_case"] += "special_char " logger.debug( f"KG skill: found special_char: {special_char_flag}") if special_intents_flags[i]: confidence = 0.0 attr["confidence_case"] += "special_intents " logger.debug("KG skill: found special_intents") greetings_farewells_flag = greetings_farewells_re.search( raw_responses[curr_i]) if greetings_farewells_flag: confidence = 0.0 attr["confidence_case"] += "greetings_farewells " logger.debug( f"KG skill: found greetings_farewells: {greetings_farewells_flag}" ) penalties = ( annotations_depths[curr_i].get("retrieved_fact", 0.0) + fact_random_penalty + already_was_active + short_long_response if not no_penalties else 0.0) confidence -= penalties if any([ acronym_flag, special_char_flag, special_intents_flags[i], greetings_farewells_flag, short_long_response, ]): logger.debug( f"KG skill: found penalties in response: {raw_responses[curr_i]}, skipping it" ) continue else: curr_attributes.append(attr) curr_confidences.append(max(0.0, confidence)) curr_responses.append( re.sub(r'\s([?.!",;:](?:\s|$))', r"\1", add_intro + raw_responses[curr_i]).replace( " ' t", "'t")) attributes.append(curr_attributes) confidences.append(curr_confidences) responses.append(curr_responses) except Exception as ex: sentry_sdk.capture_exception(ex) logger.exception(ex) responses = [[""]] confidences = [[0.0]] attributes = [[{}]] logger.info( f"knowledge_grounding_skill exec time: {time.time() - st_time}") return jsonify(list(zip(responses, confidences, attributes)))
def respond(): st_time = time.time() # to clarify, there's just one (1) dialog returned, not multiple dialogs_batch = request.json["dialogs"] confidences = [] responses = [] attributes = [] sentences_to_classify = [] ner_outputs_to_classify = [] is_factoid_sents = [] for dialog in dialogs_batch: uttr = dialog["human_utterances"][-1] # probabilities of being factoid question last_phrase = dialog["human_utterances"][-1]["text"] if "about" in last_phrase: probable_subjects = last_phrase.split("about")[1:] else: probable_subjects = [] names = get_entities(dialog["human_utterances"][-1], only_named=True, with_labels=True) names = [j["text"].lower() for j in names] names = [j for j in names + probable_subjects if j in fact_dict.keys()] names = list(set(names)) nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) factoid_conf = get_factoid(uttr) is_factoid_cls = factoid_conf.get("is_factoid", 0.0) > 0.9 is_factoid = is_factoid_cls and ( names or nounphrases) and check_factoid(last_phrase) is_factoid_sents.append(is_factoid) ner_outputs_to_classify.append(names) logger.info(f"Ner outputs {ner_outputs_to_classify}") fact_outputs = get_random_facts(ner_outputs_to_classify) logger.info(f"Fact outputs {fact_outputs}") for i in range(len(sentences_to_classify)): if asked_about_fact(sentences_to_classify[i]): is_factoid_sents[i] = ASKED_ABOUT_FACT_PROB # factoid_classes = [cl > FACTOID_CLASS_THRESHOLD for cl in factoid_classes] # logger.info('Factoid classes ' + str(factoid_classes)) questions_batch = [] facts_batch = [] question_nums = [] for n, (dialog, is_factoid, fact_output) in enumerate( zip(dialogs_batch, is_factoid_sents, fact_outputs)): curr_ann_uttr = dialog["human_utterances"][-1] prev_ann_uttr = dialog["bot_utterances"][-1] if len( dialog["bot_utterances"]) else {} annotations = curr_ann_uttr["annotations"] tell_me_about_intent = annotations.get("intent_catcher", {}).get( "lets_chat_about", {}).get( "detected", 0) == 1 or if_chat_about_particular_topic( curr_ann_uttr, prev_ann_uttr) if "sentrewrite" in annotations: text_rewritten = annotations["sentrewrite"]["modified_sents"][-1] else: text_rewritten = curr_ann_uttr["text"] is_question = "?" in text_rewritten if is_factoid and (tell_me_about_intent or is_question): questions_batch.append(curr_ann_uttr["text"]) facts_batch.append( annotations.get("fact_retrieval", {}).get("facts", [])) question_nums.append(n) text_qa_response_batch = [{ "answer": "", "answer_sentence": "", "confidence": 0.0 } for _ in dialogs_batch] resp = requests.post(TEXT_QA_URL, json={ "question_raw": questions_batch, "top_facts": facts_batch }, timeout=0.5) if resp.status_code != 200: logger.info("API Error: Text QA inaccessible") else: logger.info("Query against Text QA succeeded") text_qa_resp = resp.json() text_qa_response_batch = [] cnt_fnd = 0 for i in range(len(dialogs_batch)): if i in question_nums and cnt_fnd < len(text_qa_resp): text_qa_response_batch.append({ "answer": text_qa_resp[cnt_fnd][0], "answer_sentence": text_qa_resp[cnt_fnd][3], "confidence": text_qa_resp[cnt_fnd][1], }) else: text_qa_response_batch.append({ "answer": "", "answer_sentence": "", "confidence": 0.0 }) logger.info(f"Response: {resp.json()}") kbqa_response = dict() for dialog, text_qa_response, is_factoid, fact_output in zip( dialogs_batch, text_qa_response_batch, is_factoid_sents, fact_outputs): attr = {} curr_ann_uttr = dialog["human_utterances"][-1] prev_ann_uttr = dialog["bot_utterances"][-1] if len( dialog["bot_utterances"]) else {} tell_me_about_intent = curr_ann_uttr["annotations"].get( "intent_catcher", {}).get("lets_chat_about", {}).get( "detected", 0) == 1 or if_chat_about_particular_topic( curr_ann_uttr, prev_ann_uttr) if "sentrewrite" in curr_ann_uttr["annotations"]: curr_uttr_rewritten = curr_ann_uttr["annotations"]["sentrewrite"][ "modified_sents"][-1] else: curr_uttr_rewritten = curr_ann_uttr["text"] is_question = "?" in curr_uttr_rewritten logger.info( f"is_factoid {is_factoid} tell_me_about {tell_me_about_intent} is_question {is_question}" ) if is_factoid and (tell_me_about_intent or is_question): logger.info( "Question is classified as factoid. Querying KBQA and ODQA.") print( "Question is classified as factoid. Querying KBQA and ODQA...", flush=True) logger.info( f"Using annotators output, kbqa_response {curr_ann_uttr['annotations'].get('kbqa', [])}" ) if use_annotators_output: kbqa_response = curr_ann_uttr["annotations"].get("kbqa", {}) logger.info( f"Using annotators output, kbqa_response {kbqa_response}") else: futures = [] executor = concurrent.futures.ThreadPoolExecutor() for system in ["kbqa"]: futures.append( executor.submit(getQaResponse, last_phrase, system)) results = [] for future in concurrent.futures.as_completed(futures): results.append(future.result()) for result in results: kbqa_response = result response, confidence = qa_choose(last_phrase, text_qa_response, kbqa_response) if len(response) > 300: response_cut = "" cur_len = 0 response_split = response.split(", ") for piece in response_split: if cur_len + len(piece) < 300: response_cut += f"{piece}, " cur_len += len(piece) response = response_cut.rstrip(", ") if not response: response = random.choice(DONT_KNOW_ANSWER) confidence = FACTOID_NOTSURE_CONFIDENCE attr["not sure"] = True else: logger.info("Question is not classified as factoid.") response = "" confidence = 0.0 responses.append(response) confidences.append(confidence) attributes.append(attr) logger.info(f"Responses {responses}") total_time = time.time() - st_time logger.info(f"factoid_qa exec time: {total_time:.3f}s") return jsonify(list(zip(responses, confidences, attributes)))
def get_named_entities_from_human_utterance(vars): # ent is a dict! ent = {"text": "London":, "type": "LOC"} entities = common_utils.get_entities( vars["agent"]["dialog"]["human_utterances"][-1], only_named=True, with_labels=True ) return entities
def get_nounphrases_from_human_utterance(vars): nps = common_utils.get_entities( vars["agent"]["dialog"]["human_utterances"][-1], only_named=False, with_labels=False ) return nps
def process_info(dialog, which_info="name"): human_attr = {} bot_attr = {} attr = {"can_continue": CAN_NOT_CONTINUE} response = "" confidence = 0.0 curr_uttr_dict = dialog["human_utterances"][-1] curr_user_uttr = curr_uttr_dict["text"].lower() curr_user_annot = curr_uttr_dict["annotations"] bot_utterance_texts = [u["text"].lower() for u in dialog["bot_utterances"]] try: prev_bot_uttr = dialog["bot_utterances"][-1]["text"].lower() except IndexError: prev_bot_uttr = "" logger.info(f"Previous bot uterance: {prev_bot_uttr}") is_about_templates = { "name": what_is_your_name_pattern.search(prev_bot_uttr) or my_name_is_pattern.search(curr_user_uttr), "homeland": where_are_you_from_pattern.search(prev_bot_uttr) or my_origin_is_pattern.search(curr_user_uttr), "location": what_is_your_location_pattern.search(prev_bot_uttr) or my_location_is_pattern.search(curr_user_uttr), } response_phrases = { "name": RESPONSE_PHRASES["name"][0], "location": RESPONSE_PHRASES["location"][1] if RESPONSE_PHRASES["location"][0].lower() in bot_utterance_texts else RESPONSE_PHRASES["location"][0], "homeland": RESPONSE_PHRASES["homeland"][1] if RESPONSE_PHRASES["homeland"][0].lower() in bot_utterance_texts else RESPONSE_PHRASES["homeland"][0], } got_info = False # if user doesn't want to share his info if user_tells_bot_called_him_wrong(curr_uttr_dict, prev_bot_uttr, dialog["human"]["profile"]): logger.info(f"User says My name is not Blabla") response = f"My bad. What is your name again?" confidence = 1.0 got_info = True attr["can_continue"] = MUST_CONTINUE elif (is_about_templates[which_info] or was_user_asked_to_clarify_info(prev_bot_uttr, which_info)) and ( is_no(curr_uttr_dict) or is_secret(curr_user_uttr, which_info)): response = "As you wish." confidence = 1.0 attr["can_continue"] = CAN_NOT_CONTINUE return response, confidence, human_attr, bot_attr, attr elif re.search(r"is that where you live now", prev_bot_uttr) and is_yes(curr_uttr_dict): logger.info(f"Found location=homeland") if dialog["human"]["attributes"].get("homeland", None): human_attr["location"] = dialog["human"]["attributes"]["homeland"] else: found_homeland = check_entities( "homeland", curr_user_uttr=dialog["utterances"][-3]["text"].lower(), curr_user_annot=dialog["utterances"][-3]["annotations"], prev_bot_uttr=dialog["utterances"][-4]["text"].lower(), ) human_attr["location"] = found_homeland response = response_phrases["location"] confidence = 1.0 got_info = True attr["can_continue"] = MUST_CONTINUE elif re.search(r"is that where you live now", prev_bot_uttr) and is_no(curr_uttr_dict): logger.info(f"Found location is not homeland") response = f"So, where do you live now?" confidence = 1.0 got_info = False attr["can_continue"] = MUST_CONTINUE if (is_about_templates[which_info] or was_user_asked_to_clarify_info( prev_bot_uttr, which_info)) and not got_info: logger.info(f"Asked for {which_info} in {prev_bot_uttr}") found_info, named_entities_found = check_entities( which_info, curr_user_uttr, curr_user_annot, prev_bot_uttr) logger.info( f"found_info, named_entities_found: {found_info}, {named_entities_found}" ) if which_info == "name" and found_info is not None: found_info = filter_unreadable_names(found_info) if found_info is None: logger.info(f"found_info is None") if did_user_misunderstand_bot_question_about_geography( curr_user_uttr, which_info, prev_bot_uttr): response = ASK_GEOGRAPHICAL_LOCATION_BECAUSE_USER_MISUNDERSTOOD_BOT[ which_info] confidence = 0.9 attr["can_continue"] = CAN_CONTINUE_SCENARIO elif which_info in [ "homeland", "location" ] and NON_GEOGRAPHICAL_LOCATIONS_COMPILED_PATTERN.search( curr_user_uttr): response = "" confidence = 0.0 attr["can_continue"] = CAN_NOT_CONTINUE elif was_user_asked_to_clarify_info(prev_bot_uttr, which_info): response = "" confidence = 0.0 attr["can_continue"] = CAN_NOT_CONTINUE elif (which_info == "name" and len(curr_user_uttr.split()) == 1 and len( get_entities(curr_uttr_dict, only_named=False, with_labels=False)) > 0): response = "I've never heard about this name." confidence = 1.0 attr["can_continue"] = MUST_CONTINUE else: response = REPEAT_INFO_PHRASES[which_info] confidence = 1.0 attr["can_continue"] = MUST_CONTINUE else: if which_info == "name": found_info = shorten_long_names(found_info) response = response_phrases[which_info] + found_info + "." confidence = 1.0 attr["can_continue"] = MUST_CONTINUE human_attr[which_info] = found_info else: if NON_GEOGRAPHICAL_LOCATIONS_COMPILED_PATTERN.search( found_info): if did_user_misunderstand_bot_question_about_geography( found_info, which_info, prev_bot_uttr): response = ASK_GEOGRAPHICAL_LOCATION_BECAUSE_USER_MISUNDERSTOOD_BOT[ which_info] confidence = 0.9 attr["can_continue"] = CAN_CONTINUE_SCENARIO else: response = "" confidence = 0.0 attr["can_continue"] = CAN_NOT_CONTINUE else: if which_info == "location": response = response_phrases[which_info] elif which_info == "homeland": if dialog["human"]["profile"].get("location", None) is None: response = response_phrases[which_info] else: response = response_phrases["location"] human_attr[which_info] = found_info if named_entities_found: confidence = 1.0 attr["can_continue"] = MUST_CONTINUE else: confidence = 0.9 attr["can_continue"] = CAN_CONTINUE_SCENARIO return response, confidence, human_attr, bot_attr, attr
def last_human_utt_nounphrases(dialog: Dict) -> List[Dict]: # Used by: comet_conceptnet_annotator entities = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) return [{"nounphrases": [entities]}]
def food_fact_response(vars): human_utt = state_utils.get_last_human_utterance(vars) annotations = human_utt["annotations"] human_utt_text = human_utt["text"].lower() bot_utt_text = state_utils.get_last_bot_utterance(vars)["text"] shared_memory = state_utils.get_shared_memory(vars) used_facts = shared_memory.get("used_facts", []) fact = "" facts = [] entity = "" berry_name = "" linkto_check = any([linkto in bot_utt_text for linkto in link_to_skill2i_like_to_talk["dff_food_skill"]]) black_list_check = any(list(annotations.get("badlisted_words", {}).values())) conceptnet_flag, food_item = check_conceptnet(vars) entities_facts = annotations.get("fact_retrieval", {}).get("topic_facts", []) for entity_facts in entities_facts: if entity_facts["entity_type"] in ["food", "fruit", "vegetable", "berry"]: if entity_facts["facts"]: facts = entity_facts["facts"][0].get("sentences", []) entity = entity_facts["entity_substr"] else: facts = [] if not facts: facts = annotations.get("fact_random", []) if black_list_check: state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE) return error_response(vars) elif conceptnet_flag and all(["shower" not in human_utt_text, " mela" not in human_utt_text]): if "berry" in bot_utt_text.lower(): berry_names = get_entities(state_utils.get_last_human_utterance(vars), only_named=False, with_labels=False) if berry_names: berry_name = berry_names[0] if all(["berr" not in human_utt_text, len(human_utt_text.split()) == 1, berry_name]): berry_name += "berry" fact = get_fact(berry_name, f"fact about {berry_name}") entity = berry_name elif berry_name: if facts and entity: fact = random.choice([i for i in facts if i not in used_facts]) # facts[0] elif facts: for facts_item in facts: if all( [ facts_item.get("entity_substr", "xxx") in food_item, facts_item.get("fact", "") not in used_facts, ] ): fact = facts_item.get("fact", "") entity = facts_item.get("entity_substr", "") break else: fact = "" entity = "" else: if all([facts, entity, entity in food_item]): fact = random.choice([i for i in facts if i not in used_facts]) # facts[0] elif facts and not entity: for facts_item in facts: if all( [ facts_item.get("entity_substr", "xxx") in food_item, facts_item.get("fact", "") not in used_facts, ] ): fact = facts_item.get("fact", "") entity = facts_item.get("entity_substr", "") break else: fact = "" entity = "" else: fact = "" entity = "" acknowledgement = random.choice(FOOD_FACT_ACKNOWLEDGEMENTS).replace("ENTITY", entity.lower()) state_utils.save_to_shared_memory(vars, used_facts=used_facts + [fact]) try: if bot_persona_fav_food_check(vars) or len(state_utils.get_last_human_utterance(vars)["text"].split()) == 1: state_utils.set_confidence(vars, confidence=CONF_HIGH) else: state_utils.set_confidence(vars, confidence=CONF_MIDDLE) if bool(re.search(DONOTKNOW_LIKE_RE, human_utt_text)): state_utils.set_confidence(vars, confidence=CONF_MIDDLE) state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) return "Well, as for me, I am a fan of pizza despite I cannot eat as humans." elif any([dont_want_talk(vars), bool(re.search(NO_WORDS_RE, human_utt_text))]): state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE) return error_response(vars) elif (not fact) and conceptnet_flag: state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) return "Why do you like it?" elif not fact: state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE) return error_response(vars) elif fact and entity: state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) if len(used_facts): return f"{fact} Do you want me to tell you more about {entity}?" else: response = acknowledgement + f"{fact} Do you want to hear more about {entity}?" state_utils.add_acknowledgement_to_response_parts(vars) return response elif fact: state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) if len(used_facts): return f"{fact} Do you want me to tell you more about {entity}?" else: return f"Okay. {fact} I can share with you one more cool fact. Do you agree?" elif linkto_check: state_utils.set_confidence(vars, confidence=CONF_MIDDLE) state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) return "Sorry. I didn't get what kind of food you have mentioned. Could you repeat it please?" else: state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE) return error_response(vars) except Exception as exc: logger.exception(exc) sentry_sdk.capture_exception(exc) return error_response(vars) elif linkto_check: state_utils.set_confidence(vars, confidence=CONF_MIDDLE) state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO) return "Sorry. I didn't get what kind of food you have mentioned. Could you repeat it please?" else: state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE) return error_response(vars)