def switch_to_particular_game_discussion(vars): user_uttr = state_utils.get_last_human_utterance(vars) user_text = user_uttr.get("text", "").lower() prev_bot_uttr = state_utils.get_last_bot_utterance(vars) prev_bot_text = prev_bot_uttr.get("text", "") found_video_game_in_user_uttr = find_games_in_text(user_text) logger.info( f"(switch_to_particular_game_discussion)found_video_game_in_user_uttr: {found_video_game_in_user_uttr}" ) found_video_game_in_user_uttr = bool(found_video_game_in_user_uttr) found_video_game_in_bot_uttr = find_games_in_text(prev_bot_text) logger.info( f"(switch_to_particular_game_discussion)found_video_game_in_bot_uttr: {found_video_game_in_bot_uttr}" ) found_video_game_in_bot_uttr = bool(found_video_game_in_bot_uttr) choose_particular_game = if_choose_topic( user_uttr, prev_bot_uttr) and found_video_game_in_user_uttr question_answer_contains_video_game = ("?" not in user_text and "?" in prev_bot_text and found_video_game_in_user_uttr) bot_asked_about_game_and_user_answered_yes = (found_video_game_in_bot_uttr and "?" in prev_bot_text and is_yes(user_uttr)) flag = (lets_talk_about( vars, GAMES_WITH_AT_LEAST_1M_COPIES_SOLD_COMPILED_PATTERN) or choose_particular_game or question_answer_contains_video_game or bot_asked_about_game_and_user_answered_yes) logger.info(f"switch_to_particular_game_discussion={flag}") return flag
def lets_chat_about_movies(uttr, prev_uttr=None): prev_uttr = {} if prev_uttr is None else prev_uttr curr_uttr_is_about_movies = re.search(MOVIE_PATTERN, uttr.get("text", "").lower()) lets_talk_about_movies = if_chat_about_particular_topic( uttr, prev_uttr, compiled_pattern=MOVIE_PATTERN) chosed_topic = if_choose_topic(uttr, prev_uttr) and curr_uttr_is_about_movies if (lets_talk_about_movies or chosed_topic or ("?" not in uttr.get("text", "") and "?" in prev_uttr.get("text", "") and curr_uttr_is_about_movies)): return True else: return False
def is_about_movies(uttr, prev_uttr=None): prev_uttr = {} if prev_uttr is None else prev_uttr curr_uttr_is_about_movies = re.search(MOVIE_PATTERN, uttr.get("text", "").lower()) prev_uttr_last_sent = prev_uttr.get("annotations", {}).get("sentseg", {}).get("segments", [""])[-1].lower() prev_uttr_is_about_movies = re.search(MOVIE_PATTERN, prev_uttr_last_sent) lets_talk_about_movies = if_chat_about_particular_topic( uttr, prev_uttr, compiled_pattern=MOVIE_PATTERN) chosed_topic = if_choose_topic(uttr, prev_uttr) and curr_uttr_is_about_movies if (lets_talk_about_movies or chosed_topic or curr_uttr_is_about_movies or ("?" in prev_uttr_last_sent and prev_uttr_is_about_movies)): return True else: return False
def switch_to_general_gaming_discussion(vars): user_uttr = state_utils.get_last_human_utterance(vars) user_text = user_uttr.get("text", "").lower() prev_bot_uttr = state_utils.get_last_bot_utterance(vars) prev_bot_text = prev_bot_uttr.get("text", "") found_video_game_words_in_user_uttr = bool( VIDEO_GAME_WORDS_COMPILED_PATTERN.search(user_text)) choose_gaming_discussion = if_choose_topic( user_uttr, prev_bot_uttr) and found_video_game_words_in_user_uttr question_answer_contains_video_game_words = ( "?" not in user_text and "?" in prev_bot_text and found_video_game_words_in_user_uttr) bot_asked_about_game_and_user_answered_yes = is_yes( user_uttr) and is_question_about_games(prev_bot_text) flag = (lets_talk_about(vars, VIDEO_GAME_WORDS_COMPILED_PATTERN) or choose_gaming_discussion or question_answer_contains_video_game_words or bot_asked_about_game_and_user_answered_yes) logger.info(f"switch_to_general_gaming_discussion={flag}") return flag
async def send(self, payload: Dict, callback: Callable): st_time = time.time() try: dialog = payload["payload"]["states_batch"][0] skills_for_uttr = [] user_uttr = dialog["human_utterances"][-1] user_uttr_text = user_uttr["text"].lower() user_uttr_annotations = user_uttr["annotations"] bot_uttr = dialog["bot_utterances"][-1] if len( dialog["bot_utterances"]) else {} bot_uttr_text_lower = bot_uttr.get("text", "").lower() prev_active_skill = bot_uttr.get("active_skill", "") intent_catcher_intents = get_intents(user_uttr, probs=False, which="intent_catcher") high_priority_intent_detected = any([ k for k in intent_catcher_intents if k in high_priority_intents["dff_intent_responder_skill"] ]) low_priority_intent_detected = any([ k for k in intent_catcher_intents if k in low_priority_intents ]) detected_topics = set(get_topics(user_uttr, which="all")) is_factoid = get_factoid(user_uttr).get("is_factoid", 0.0) > 0.96 is_celebrity_mentioned = check_is_celebrity_mentioned(user_uttr) if_choose_topic_detected = if_choose_topic(user_uttr, bot_uttr) if_lets_chat_about_particular_topic_detected = if_chat_about_particular_topic( user_uttr, bot_uttr) dialog_len = len(dialog["human_utterances"]) exit_cond = "exit" in intent_catcher_intents and ( dialog_len == 1 or (dialog_len == 2 and len(user_uttr_text.split()) > 3)) repeat_cond = ("repeat" in intent_catcher_intents and prev_active_skill in UNPREDICTABLE_SKILLS and re.match(r"^what.?$", user_uttr_text)) cant_do_cond = ("cant_do" in intent_catcher_intents and "play" in user_uttr_text and any([ phrase in bot_uttr_text_lower for phrase in GREETING_QUESTIONS_TEXTS ])) for intent_name, condition in zip( ["exit", "repeat", "cant_do"], [exit_cond, repeat_cond, cant_do_cond]): if condition: high_priority_intent_detected = False not_detected = {"detected": 0, "confidence": 0.0} user_uttr["annotations"]["intent_catcher"][ intent_name] = not_detected dialog["utterances"][-1]["annotations"]["intent_catcher"][ intent_name] = not_detected if "/new_persona" in user_uttr_text: # process /new_persona command skills_for_uttr.append( "personality_catcher" ) # TODO: rm crutch of personality_catcher elif user_uttr_text == "/get_dialog_id": skills_for_uttr.append("dummy_skill") elif high_priority_intent_detected: # process intent with corresponding IntentResponder skills_for_uttr.append("dff_intent_responder_skill") elif is_sensitive_topic_and_request(user_uttr): # process user utterance with sensitive content, "safe mode" # adding open-domain skills without opinion expression skills_for_uttr.append("dff_program_y_dangerous_skill") skills_for_uttr.append("meta_script_skill") skills_for_uttr.append("personal_info_skill") skills_for_uttr.append("factoid_qa") skills_for_uttr.append("dff_grounding_skill") skills_for_uttr.append("dummy_skill") skills_for_uttr.append("small_talk_skill") if if_lets_chat_about_particular_topic_detected: skills_for_uttr.append("news_api_skill") if if_special_weather_turn_on(user_uttr, bot_uttr): skills_for_uttr.append("dff_weather_skill") if is_celebrity_mentioned: skills_for_uttr.append("dff_gossip_skill") # adding closed-domain skills skills_for_uttr += turn_on_skills( detected_topics, intent_catcher_intents, user_uttr_text, bot_uttr.get("text", ""), available_skills=[ "news_api_skill", "dff_coronavirus_skill", "dff_funfact_skill", "dff_weather_skill", "dff_short_story_skill", ], ) # adding linked-to skills skills_for_uttr.extend(get_linked_to_skills(dialog)) skills_for_uttr.extend(get_previously_active_skill(dialog)) else: # general case if low_priority_intent_detected: skills_for_uttr.append("dff_intent_responder_skill") # adding open-domain skills skills_for_uttr.append("dff_grounding_skill") skills_for_uttr.append("dff_program_y_skill") skills_for_uttr.append("personal_info_skill") skills_for_uttr.append("meta_script_skill") skills_for_uttr.append("dummy_skill") skills_for_uttr.append("dialogpt") # generative skill skills_for_uttr.append("small_talk_skill") skills_for_uttr.append("knowledge_grounding_skill") skills_for_uttr.append("convert_reddit") skills_for_uttr.append("comet_dialog_skill") skills_for_uttr.append("dff_program_y_wide_skill") # adding friendship only in the beginning of the dialog if len(dialog["utterances"]) < 20: skills_for_uttr.append("dff_friendship_skill") if if_choose_topic_detected or if_lets_chat_about_particular_topic_detected: skills_for_uttr.append("knowledge_grounding_skill") skills_for_uttr.append("news_api_skill") switch_wiki_skill, _ = if_switch_wiki_skill( user_uttr, bot_uttr) if switch_wiki_skill or switch_wiki_skill_on_news( user_uttr, bot_uttr): skills_for_uttr.append("dff_wiki_skill") if if_switch_test_skill(user_uttr, bot_uttr): skills_for_uttr.append("dff_art_skill") # adding factoidQA Skill if user utterance is factoid question if is_factoid: skills_for_uttr.append("factoid_qa") if "dummy_skill" in prev_active_skill and len( dialog["utterances"]) > 4: skills_for_uttr.append("dummy_skill_dialog") # if user mentions if is_celebrity_mentioned: skills_for_uttr.append("dff_gossip_skill") # some special cases if if_special_weather_turn_on(user_uttr, bot_uttr): skills_for_uttr.append("dff_weather_skill") if if_turn_on_emotion(user_uttr, bot_uttr): skills_for_uttr.append("emotion_skill") if get_named_locations(user_uttr): skills_for_uttr.append("dff_travel_skill") if extract_movies_names_from_annotations(user_uttr): skills_for_uttr.append("dff_movie_skill") # adding closed-domain skills skills_for_uttr += turn_on_skills( detected_topics, intent_catcher_intents, user_uttr_text, bot_uttr.get("text", ""), available_skills=[ "dff_art_skill", "dff_movie_skill", "dff_book_skill", "news_api_skill", "dff_food_skill", "dff_animals_skill", "dff_sport_skill", "dff_music_skill", "dff_science_skill", "dff_gossip_skill", "game_cooperative_skill", "dff_weather_skill", "dff_funfact_skill", "dff_travel_skill", "dff_coronavirus_skill", "dff_bot_persona_skill", "dff_gaming_skill", "dff_short_story_skill", ], ) # adding linked-to skills skills_for_uttr.extend(get_linked_to_skills(dialog)) skills_for_uttr.extend(get_previously_active_skill(dialog)) # NOW IT IS NOT ONLY FOR USUAL CONVERSATION BUT ALSO FOR SENSITIVE/HIGH PRIORITY INTENTS/ETC if "dff_coronavirus_skill" in skills_for_uttr: # no convert & comet when about coronavirus if "convert_reddit" in skills_for_uttr: skills_for_uttr.remove("convert_reddit") if "comet_dialog_skill" in skills_for_uttr: skills_for_uttr.remove("comet_dialog_skill") if len(dialog["utterances"]) > 1: # Use only misheard asr skill if asr is not confident and skip it for greeting if user_uttr_annotations.get("asr", {}).get("asr_confidence", "high") == "very_low": skills_for_uttr = ["misheard_asr"] if "/alexa_" in user_uttr_text: # adding alexa handler for Amazon Alexa specific commands skills_for_uttr = ["alexa_handler"] logger.info(f"Selected skills: {skills_for_uttr}") total_time = time.time() - st_time logger.info(f"rule_based_selector exec time = {total_time:.3f}s") asyncio.create_task( callback(task_id=payload["task_id"], response=list(set(skills_for_uttr)))) except Exception as e: total_time = time.time() - st_time logger.info(f"rule_based_selector exec time = {total_time:.3f}s") logger.exception(e) sentry_sdk.capture_exception(e) asyncio.create_task( callback(task_id=payload["task_id"], response=["dff_program_y_skill", "dummy_skill"]))
def tag_based_response_selection(dialog, candidates, scores, confidences, bot_utterances, all_prev_active_skills=None): all_prev_active_skills = all_prev_active_skills if all_prev_active_skills is not None else [] all_prev_active_skills = Counter(all_prev_active_skills) annotated_uttr = dialog["human_utterances"][-1] all_user_intents, all_user_topics, all_user_named_entities, all_user_nounphrases = get_main_info_annotations( annotated_uttr) _is_switch_topic_request = is_switch_topic(annotated_uttr) _is_force_intent = any( [_intent in all_user_intents for _intent in FORCE_INTENTS_IC.keys()]) # if user utterance contains any question (REGEXP & punctuation check!) _is_require_action_intent = is_any_question_sentence_in_utterance({ "text": annotated_uttr.get("annotations", {}).get("sentseg", {}).get("punct_sent", annotated_uttr["text"]) }) # if user utterance contains any question AND requires some intent by socialbot _is_require_action_intent = _is_require_action_intent and any([ _intent in all_user_intents for _intent in REQUIRE_ACTION_INTENTS.keys() ]) _force_intents_detected = [ _intent for _intent in FORCE_INTENTS_IC.keys() if _intent in all_user_intents ] # list of user intents which require some action by socialbot _require_action_intents_detected = [ _intent for _intent in REQUIRE_ACTION_INTENTS.keys() if _intent in all_user_intents ] _force_intents_skills = sum([ FORCE_INTENTS_IC.get(_intent, []) for _intent in _force_intents_detected ], []) # list of intents required by the socialbot _required_actions = sum([ REQUIRE_ACTION_INTENTS.get(_intent, []) for _intent in _require_action_intents_detected ], []) _contains_entities = len( get_entities(annotated_uttr, only_named=False, with_labels=False)) > 0 _is_active_skill_can_not_continue = False _prev_bot_uttr = dialog["bot_utterances"][-1] if len( dialog["bot_utterances"]) > 0 else {} _prev_active_skill = dialog["bot_utterances"][-1]["active_skill"] if len( dialog["bot_utterances"]) > 0 else "" _prev_prev_active_skill = dialog["bot_utterances"][-2][ "active_skill"] if len(dialog["bot_utterances"]) > 1 else "" _no_script_two_times_in_a_row = False if _prev_active_skill and _prev_prev_active_skill: if all([ skill not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS for skill in [_prev_active_skill, _prev_prev_active_skill] ]): _no_script_two_times_in_a_row = True disliked_skills = get_updated_disliked_skills( dialog, can_not_be_disliked_skills=CAN_NOT_BE_DISLIKED_SKILLS) _is_dummy_linkto_available = any([ cand_uttr["skill_name"] == "dummy_skill" and cand_uttr.get("type", "") == "link_to_for_response_selector" for cand_uttr in candidates ]) categorized_hyps = {} categorized_prompts = {} for dasuffix in ["reqda", ""]: for actsuffix in ["active", "continued", "finished"]: for suffix in [ "same_topic_entity_no_db", "same_topic_entity_db", "othr_topic_entity_no_db", "othr_topic_entity_db", ]: categorized_hyps[f"{actsuffix}_{suffix}_{dasuffix}"] = [] categorized_prompts[f"{actsuffix}_{suffix}_{dasuffix}"] = [] CASE = "" acknowledgement_hypothesis = {} for cand_id, cand_uttr in enumerate(candidates): if confidences[cand_id] == 0.0 and cand_uttr[ "skill_name"] not in ACTIVE_SKILLS: logger.info( f"Dropping cand_id: {cand_id} due to toxicity/badlists") continue all_cand_intents, all_cand_topics, all_cand_named_entities, all_cand_nounphrases = get_main_info_annotations( cand_uttr) skill_name = cand_uttr["skill_name"] _is_dialog_abandon = get_dialog_breakdown_annotations( cand_uttr) and PRIORITIZE_NO_DIALOG_BREAKDOWN _is_just_prompt = (cand_uttr["skill_name"] == "dummy_skill" and any([ question_type in cand_uttr.get("type", "") for question_type in ["normal_question", "link_to_for_response_selector"] ])) or cand_uttr.get("response_parts", []) == ["prompt"] if cand_uttr["confidence"] == 1.0: # for those hypotheses where developer forgot to set tag to MUST_CONTINUE cand_uttr["can_continue"] = MUST_CONTINUE _can_continue = cand_uttr.get("can_continue", CAN_NOT_CONTINUE) _user_wants_to_chat_about_topic = ( if_chat_about_particular_topic(annotated_uttr) and "about it" not in annotated_uttr["text"].lower()) _user_does_not_want_to_chat_about_topic = if_not_want_to_chat_about_particular_topic( annotated_uttr) _user_wants_bot_to_choose_topic = if_choose_topic( annotated_uttr, _prev_bot_uttr) if any([ phrase.lower() in cand_uttr["text"].lower() for phrase in LINK_TO_PHRASES ]): # add `prompt` to response_parts if any linkto phrase in hypothesis cand_uttr["response_parts"] = cand_uttr.get("response_parts", []) + ["prompt"] # identifies if candidate contains named entities from last human utterance _same_named_entities = (len( get_common_tokens_in_lists_of_strings( all_cand_named_entities, all_user_named_entities)) > 0) # identifies if candidate contains all (not only named) entities from last human utterance _same_nounphrases = len( get_common_tokens_in_lists_of_strings(all_cand_nounphrases, all_user_nounphrases)) > 0 _same_topic_entity = (_same_named_entities or _same_nounphrases ) and PRIORITIZE_WITH_SAME_TOPIC_ENTITY _is_active_skill = (_prev_active_skill == cand_uttr["skill_name"] or cand_uttr.get("can_continue", "") == MUST_CONTINUE) _is_active_skill = _is_active_skill and skill_name in ACTIVE_SKILLS _is_active_skill = _is_active_skill and (_can_continue in [ MUST_CONTINUE, CAN_CONTINUE_SCENARIO, CAN_NOT_CONTINUE ] or (_can_continue == CAN_CONTINUE_PROMPT and all_prev_active_skills.get(skill_name, []) < 10)) _is_active_skill = _is_active_skill and PRIORITIZE_SCRIPTED_SKILLS if _is_active_skill: # we will forcibly add prompt if current scripted skill finishes scenario, # and has no opportunity to continue at all. _is_active_skill_can_not_continue = _is_active_skill and _can_continue in [ CAN_NOT_CONTINUE ] if _is_force_intent: # =====force intents, choose as best_on_topic hypotheses from skills responding this request===== CASE = "Force intent." if cand_uttr["skill_name"] in _force_intents_skills: categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _is_switch_topic_request or _user_does_not_want_to_chat_about_topic or _user_wants_bot_to_choose_topic: # =====direct request by user to switch the topic of current conversation===== # give priority to dummy linkto hypothesis if available, else other prompts if available. _is_active_skill = ( cand_uttr.get("type", "") == "link_to_for_response_selector" if _is_dummy_linkto_available else _is_just_prompt) # no priority to must_continue to skip incorrect continuation of script _can_continue = CAN_CONTINUE_SCENARIO if _can_continue == MUST_CONTINUE else _can_continue CASE = "Switch topic intent." if len(all_user_named_entities) > 0 or len( all_user_nounphrases) > 0: # -----user defines new topic/entity----- # _same_topic_entity does not depend on hyperparameter in these case _same_topic_entity = _same_named_entities or _same_nounphrases categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) else: # -----user want socialbot to define new topic/entity----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _user_wants_to_chat_about_topic: # user wants to chat about particular topic CASE = "User wants to talk about topic." # in this case we do not give priority to previously active skill (but give to must continue skill!) # because now user wants to talk about something particular _is_active_skill = cand_uttr.get("can_continue", "") == MUST_CONTINUE # _same_topic_entity does not depend on hyperparameter in these case _same_topic_entity = _same_named_entities or _same_nounphrases categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _is_require_action_intent and PRIORITIZE_WITH_REQUIRED_ACT: # =====user intent requires particular action===== CASE = "User intent requires action. USER UTTERANCE CONTAINS QUESTION." _is_grounding_reqda = (skill_name == "dff_grounding_skill" and cand_uttr.get( "type", "") == "universal_response") _is_active_skill = cand_uttr.get( "can_continue", "") == MUST_CONTINUE # no priority to prev active skill _can_continue = CAN_NOT_CONTINUE # no priority to scripted skills if set(all_cand_intents).intersection( set(_required_actions )) or _is_grounding_reqda or _is_active_skill: # -----one of the can intent is in intents required by user----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=True, ) else: # -----NO required dialog acts----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) else: # =====user intent does NOT require particular action===== CASE = "General case." categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) # a bit of rule based help if (len(dialog["human_utterances"]) == 1 and cand_uttr["skill_name"] == "dff_friendship_skill" and greeting_spec in cand_uttr["text"]): categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif (cand_uttr["skill_name"] == "dff_friendship_skill" and (how_are_you_spec in cand_uttr["text"] or what_i_can_do_spec in cand_uttr["text"]) and len(dialog["utterances"]) < 16): categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) # elif cand_uttr["skill_name"] == 'program_y_dangerous' and cand_uttr['confidence'] == 0.98: # categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif cand_uttr[ "skill_name"] == "small_talk_skill" and is_sensitive_situation( dialog["human_utterances"][-1]): # let small talk to talk about sex ^_^ categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif cand_uttr["confidence"] >= 1.0: # -------------------- SUPER CONFIDENCE CASE HERE! -------------------- categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) if cand_uttr["skill_name"] == "dff_grounding_skill" and [ "acknowledgement" ] == cand_uttr.get("response_parts", []): acknowledgement_hypothesis = deepcopy(cand_uttr) logger.info(f"Current CASE: {CASE}") # now compute current scores as one float value curr_single_scores = compute_curr_single_scores(candidates, scores, confidences) # remove disliked skills from hypotheses if IGNORE_DISLIKED_SKILLS: for category in categorized_hyps: new_ids = [] for cand_id in categorized_hyps[category]: if (candidates[cand_id]["skill_name"] in disliked_skills and candidates[cand_id].get("can_continue", CAN_NOT_CONTINUE) == MUST_CONTINUE): disliked_skills.remove(candidates[cand_id]["skill_name"]) if candidates[cand_id]["skill_name"] not in disliked_skills: new_ids.append(cand_id) categorized_hyps[category] = deepcopy(new_ids) for category in categorized_prompts: new_ids = [] for cand_id in categorized_prompts[category]: if (candidates[cand_id]["skill_name"] in disliked_skills and candidates[cand_id].get("can_continue", CAN_NOT_CONTINUE) == MUST_CONTINUE): disliked_skills.remove(candidates[cand_id]["skill_name"]) if candidates[cand_id]["skill_name"] not in disliked_skills: new_ids.append(cand_id) categorized_prompts[category] = deepcopy(new_ids) best_cand_id = pickup_best_id(categorized_hyps, candidates, curr_single_scores, bot_utterances) best_candidate = candidates[best_cand_id] best_candidate["human_attributes"] = best_candidate.get( "human_attributes", {}) # save updated disliked skills to human attributes of the best candidate best_candidate["human_attributes"]["disliked_skills"] = disliked_skills logger.info(f"Best candidate: {best_candidate}") n_sents_without_prompt = len(sent_tokenize(best_candidate["text"])) _is_best_not_script = best_candidate[ "skill_name"] not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS no_question_by_user = "******" not in dialog["human_utterances"][-1][ "annotations"].get("sentseg", {}).get("punct_sent", dialog["human_utterances"][-1]["text"]) # if `no` to 1st in a row linkto question, and chosen response is not from scripted skill _no_to_first_linkto = is_no(dialog["human_utterances"][-1]) and any([ phrase.lower() in _prev_bot_uttr.get("text", "").lower() for phrase in LINK_TO_PHRASES ]) # if chosen short response or question by not-scripted skill _is_short_or_question_by_not_script = _is_best_not_script and ( "?" in best_candidate["text"] or len(best_candidate["text"].split()) < 4) _no_questions_for_3_steps = not any([ is_any_question_sentence_in_utterance(uttr) for uttr in dialog["bot_utterances"][-3:] ]) if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: if (_no_script_two_times_in_a_row and _is_short_or_question_by_not_script and no_question_by_user) or (_no_to_first_linkto and _is_best_not_script): # if no scripted skills 2 time sin a row before, current chosen best cand is not scripted, contains `?`, # and user utterance does not contain "?", replace utterance with dummy! best_prompt_id = pickup_best_id(categorized_prompts, candidates, curr_single_scores, bot_utterances) best_candidate = deepcopy(candidates[best_prompt_id]) best_cand_id = best_prompt_id if does_not_require_prompt(candidates, best_cand_id): # the candidate already contains a prompt or a question or of a length more than 200 symbols logger.info( "Best candidate contains prompt, question, request or length of > 200 symbols. Do NOT add prompt." ) pass elif sum(categorized_prompts.values(), []): # best cand is 3d times in a row not scripted skill, let's append linkto # need to add some prompt, and have a prompt _add_prompt_forcibly = best_candidate[ "skill_name"] == _prev_active_skill and _is_active_skill_can_not_continue _add_prompt_forcibly = _add_prompt_forcibly and not _contains_entities # prompts are added: # - in 1 out of 10 cases, if current human utterance does not contain entities, # and no prompt for several last bot utterances # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and current utterance is from active on prev step scripted skill and # it has a status can-not-continue # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and last 2 bot uttr are not from scripted skill, # and current best uttr is also from not-scripted skill if ((prompt_decision() and not _contains_entities and _no_questions_for_3_steps) or (_add_prompt_forcibly and PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS) or (PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and _no_script_two_times_in_a_row and _is_best_not_script)): logger.info("Decided to add a prompt to the best candidate.") best_prompt_id = pickup_best_id(categorized_prompts, candidates, curr_single_scores, bot_utterances) # as we have only one active skill, let's consider active skill as that one providing prompt # but we also need to reassign all the attributes best_prompt = candidates[best_prompt_id] best_candidate[ "text"] = f'{best_candidate["text"]} {best_prompt["text"]}' best_candidate["attributes"] = best_candidate.get("attributes", {}) best_candidate["attributes"]["prompt_skill"] = best_prompt # anyway we must combine used links best_candidate["human_attributes"] = best_candidate.get( "human_attributes", {}) best_candidate["human_attributes"] = join_used_links_in_attributes( best_candidate["human_attributes"], best_prompt.get("human_attributes", {})) if len(best_candidate["human_attributes"]["used_links"]) == 0: best_candidate["human_attributes"].pop("used_links") was_ackn = if_acknowledgement_in_previous_bot_utterance(dialog) best_resp_cont_ackn = "acknowledgement" in best_candidate.get( "response_parts", []) if (ADD_ACKNOWLEDGMENTS_IF_POSSIBLE and acknowledgement_hypothesis and acknowledgement_decision(all_user_intents) and n_sents_without_prompt == 1 and not was_ackn and not best_resp_cont_ackn): logger.info( "Acknowledgement is given, Final hypothesis contains only 1 sentence, no ackn in prev bot uttr," "and we decided to add an acknowledgement to the best candidate.") best_candidate[ "text"] = f'{acknowledgement_hypothesis["text"]} {best_candidate["text"]}' best_candidate["response_parts"] = ["acknowledgement" ] + best_candidate.get( "response_parts", []) return best_candidate, best_cand_id, curr_single_scores
def get_response_for_particular_topic_and_status(topic, curr_meta_script_status, dialog, source_topic): attr = { "meta_script_topic": topic, "meta_script_status": curr_meta_script_status } if len(dialog["human_utterances"]) > 0: user_uttr = dialog["human_utterances"][-1] text_user_uttr = dialog["human_utterances"][-1]["text"].lower() last_user_sent_text = (dialog["human_utterances"][-1].get( "annotations", {}).get("sentseg", {}).get("segments", [""])[-1].lower()) else: user_uttr = {"text": ""} text_user_uttr = "" last_user_sent_text = "" if len(dialog["bot_utterances"]) > 0: bot_uttr = dialog["bot_utterances"][-1] else: bot_uttr = {} if curr_meta_script_status == "starting": response, confidence, attr = get_starting_phrase(dialog, topic, attr) attr["response_parts"] = ["prompt"] can_offer_topic = if_choose_topic(dialog["human_utterances"][-1], bot_uttr) talk_about_user_topic = is_custom_topic( topic) and if_chat_about_particular_topic(user_uttr, bot_uttr) prev_what_to_talk_about_outputs = [ get_outputs_with_response_from_dialog(dialog["utterances"][-3:], response=response, activated=True) for response in GREETING_QUESTIONS[list(GREETING_QUESTIONS.keys())[0]] ] prev_what_to_talk_about_outputs = sum([ list_of_outputs for list_of_outputs in prev_what_to_talk_about_outputs if len(list_of_outputs) > 0 ], []) prev_what_to_talk_about_greeting = len( prev_what_to_talk_about_outputs) > 0 and bot_uttr.get( "active_skill", "") in ["dff_friendship_skill", "program_y"] if (not prev_what_to_talk_about_greeting and can_offer_topic) or talk_about_user_topic: # if person wants to talk about something particular and we have extracted some topic - do that! confidence = MATCHED_DIALOG_BEGIN_CONFIDENCE elif "?" in last_user_sent_text or prev_what_to_talk_about_greeting: # if some question was asked by user, do not start script at all! response, confidence = "", 0.0 elif len(dialog["utterances"]) <= 20: confidence = DEFAULT_DIALOG_BEGIN_CONFIDENCE elif source_topic == NP_SOURCE: confidence = NOUN_TOPIC_STARTING_CONFIDENCE else: confidence = DEFAULT_STARTING_CONFIDENCE else: if curr_meta_script_status == "deeper1" and "?" in last_user_sent_text and "what" not in text_user_uttr: response, confidence, attr = "", 0.0, {} elif "?" in last_user_sent_text and not check_topic_lemmas_in_sentence( text_user_uttr, topic): logger.info( "Question by user was detected. Without any word from topic in it. " "Don't continue the script on this turn.") response, confidence, attr = "", 0.0, {} elif is_switch_topic(user_uttr) or if_chat_about_particular_topic( user_uttr): logger.info("Topic switching was detected. Finish script.") response, confidence = FINISHED_SCRIPT_RESPONSE, 0.5 attr["meta_script_status"] = FINISHED_SCRIPT attr["can_continue"] = CAN_NOT_CONTINUE elif get_user_replies_to_particular_skill( dialog["utterances"], "meta_script_skill")[-2:] == ["no.", "no."]: logger.info( "Two consequent `no` answers were detected. Finish script.") response, confidence = FINISHED_SCRIPT_RESPONSE, 0.5 attr["meta_script_status"] = FINISHED_SCRIPT attr["can_continue"] = CAN_NOT_CONTINUE elif curr_meta_script_status == "comment": response, confidence, attr = get_comment_phrase(dialog, attr) attr["can_continue"] = CAN_NOT_CONTINUE elif curr_meta_script_status == "opinion": response, confidence, attr = get_opinion_phrase( dialog, topic, attr) elif curr_meta_script_status == "deeper1" and ( is_no(user_uttr) or "never" in text_user_uttr): response, confidence = FINISHED_SCRIPT_RESPONSE, 0.5 attr["meta_script_status"] = FINISHED_SCRIPT attr["can_continue"] = CAN_NOT_CONTINUE else: response, confidence, attr = get_statement_phrase( dialog, topic, attr, TOPICS) attr["can_continue"] = CAN_CONTINUE_SCENARIO if confidence > 0.7 and (is_yes(user_uttr) or len(text_user_uttr.split()) > 7): # if yes detected, confidence 1.0 - we like agreements! confidence = 1.0 if confidence > 0.7 and bot_uttr.get("active_skill", "") != "meta_script_skill": confidence = BROKEN_DIALOG_CONTINUE_CONFIDENCE logger.info( f"User sent: `{text_user_uttr}`. Response: `{response}`. Attr: `{attr}.`" ) return response, confidence, attr
def get_statuses_and_topics(dialog): """ Find prevously discussed meta-script topics, the last met-script status, determine current step meta-script status and topic. Args: dialog: dialog itself Returns: tuple of current status and topic """ # deeper2 and opinion could be randomly skipped in dialog flow dialog_flow = ["starting", "deeper1", "deeper2", "opinion", "comment"] dialog_flow_user_topic = ["starting", "deeper1", "comment"] curr_meta_script_statuses = [] curr_meta_script_topics = [] source_topics = [] if len(dialog["utterances"]) >= 3: # if dialog is not empty used_topics = get_used_attributes_by_name( dialog["utterances"], attribute_name="meta_script_topic", value_by_default="", activated=True) # this determines how many replies back we assume active meta script skill to continue dialog. # let's assume we can continue if meta_scrip skill was active on up to 2 steps back prev_reply_output = get_skill_outputs_from_dialog( dialog["utterances"][-5:], skill_name="meta_script_skill", activated=True) # get last meta script output even if it was not activated but right after it was active last_all_meta_script_outputs = get_skill_outputs_from_dialog( dialog["utterances"][-5:], skill_name="meta_script_skill", activated=False) prev_topic_finished = False for out in last_all_meta_script_outputs: if out.get("meta_script_status", "") == "finished": logger.info( f"Found finished dialog on meta_script_topic: `{out.get('meta_script_status', '')}`" ) prev_topic_finished = True if len(prev_reply_output) > 0: # previously active skill was `meta_script_skill` curr_meta_script_status = prev_reply_output[-1].get( "meta_script_status", "") else: # previous active skill was not `meta_script_skill` curr_meta_script_status = "" logger.info(f"Found meta_script_status: `{curr_meta_script_status}`") if curr_meta_script_status in ["comment", "", FINISHED_SCRIPT ] or prev_topic_finished: # if previous meta script is finished (comment given) in previous bot reply # or if no meta script in previous reply or script was forcibly topics, curr_source_topics = get_not_used_topics( used_topics, dialog) if curr_source_topics != [PREDEFINED_SOURCE]: # if topic is extracted from utterances pass elif if_choose_topic(dialog["human_utterances"][-1], dialog["bot_utterances"][-1]): # len(utterances) >3 so at least 1 bot utterance exists # one of the predefined topics (wiki or hand-written) curr_meta_script_statuses += [dialog_flow[0]] * len(topics) curr_meta_script_topics += topics source_topics += curr_source_topics else: pass else: # some meta script is already in progress # we define it here as predefined because we do not care about this variable if it's not script starting source_topic = PREDEFINED_SOURCE curr_meta_script_topic = used_topics[-1] logger.info( f"Found meta_script_status: `{curr_meta_script_status}` " f"on previous meta_script_topic: `{curr_meta_script_topic}`") # getting the next dialog flow status if is_custom_topic(curr_meta_script_topic): curr_meta_script_status = dialog_flow_user_topic[ dialog_flow_user_topic.index(curr_meta_script_status) + 1] else: curr_meta_script_status = dialog_flow[ dialog_flow.index(curr_meta_script_status) + 1] if curr_meta_script_status == "opinion": # randomly skip third deeper question if uniform(0, 1) <= 0.5: curr_meta_script_status = "comment" if curr_meta_script_status == "deeper2": # randomly skip third deeper question if uniform(0, 1) <= 0.5: curr_meta_script_status = "opinion" logger.info(f"New meta_script_status: `{curr_meta_script_status}` " f"on meta_script_topic: `{curr_meta_script_topic}`") curr_meta_script_statuses += [curr_meta_script_status] curr_meta_script_topics += [curr_meta_script_topic] source_topics += [source_topic] else: # start of the dialog, pick up a topic of meta script curr_meta_script_topics, source_topics = get_not_used_topics([], dialog) if source_topics != [PREDEFINED_SOURCE]: curr_meta_script_statuses = [dialog_flow_user_topic[0] ] * len(curr_meta_script_topics) else: curr_meta_script_statuses = [dialog_flow[0] ] * len(curr_meta_script_topics) logger.info(f"Final new meta_script_status: `{curr_meta_script_statuses}` " f"on meta_script_topic: `{curr_meta_script_topics}`") return curr_meta_script_statuses, curr_meta_script_topics, source_topics
def rule_score_based_selection(dialog, candidates, scores, confidences, is_toxics, bot_utterances): curr_single_scores = [] bot_utt_counter = Counter(bot_utterances) lower_duplicates_score(candidates, bot_utt_counter, scores, confidences) lower_retrieve_skills_confidence_if_scenario_exist(candidates, scores, confidences) # prev_active_skill = dialog["bot_utterances"][-1]['active_skill'] if len(dialog["bot_utterances"]) > 0 else '' skill_names = [c["skill_name"] for c in candidates] very_big_score = 100 very_low_score = -100 dummy_question = "" dummy_question_human_attr = {} link_to_question = "" link_to_human_attrs = {} not_sure_factoid = False if "factoid_qa" in skill_names: factoid_index = skill_names.index("factoid_qa") logging.debug("factoid") logging.debug(str(candidates[factoid_index])) if "not sure" in candidates[factoid_index] and candidates[factoid_index]["not sure"]: not_sure_factoid = True for i in range(len(scores)): curr_score = None is_misheard = misheard_with_spec1 in candidates[i]["text"] or misheard_with_spec2 in candidates[i]["text"] intent_name = get_intent_name(candidates[i]["text"]) is_intent_candidate = (skill_names[i] in ["dff_intent_responder_skill", "dff_program_y_skill"]) and intent_name is_intent_candidate = is_intent_candidate and intent_name not in low_priority_intents # print("is intent candidate? " + str(is_intent_candidate), flush=True) if len(dialog["human_utterances"]) == 1 and greeting_spec not in candidates[i]["text"]: logger.info("Dialog Beginning detected.") if ( if_chat_about_particular_topic(dialog["utterances"][0]) and "about it" not in dialog["utterances"][0]["text"].lower() ): logger.info("User wants to talk about particular topic") # if user says `let's chat about blablabla` if skill_names[i] == "factoid_qa": logger.info("Particular topic. Facts + Greeting to very big score.") # I don't have an opinion on that but I know some facts. resp = candidates[i]["text"].replace("I don't have an opinion on that but I know some facts.", "") candidates[i]["text"] = "Hi, " + greeting_spec + "! " + resp curr_score = very_big_score elif skill_names[i] == "meta_script_skill" and len(candidates[i]["text"]) > 0 and confidences[i] > 0.98: logger.info("Particular topic. meta_script_skill + Greeting to very big score.") # I don't have an opinion on that but I know some facts. resp = candidates[i]["text"] candidates[i]["text"] = "Hi, " + greeting_spec + "! " + resp curr_score = very_big_score elif skill_names[i] == "small_talk_skill": logger.info("Particular topic. Small-talk + Greeting NOT to very big score.") # for now do not give small talk a very big score here candidates[i]["text"] = "Hi, " + greeting_spec + "! " + candidates[i]["text"] # curr_score = very_big_score elif if_choose_topic(dialog["utterances"][0]) and "about it" not in dialog["utterances"][0]["text"].lower(): logger.info("User wants bot to choose the topic") # if user says `let's chat about something` if skill_names[i] == "small_talk_skill": logger.info("No topic. Small-talk + Greeting to very big score.") candidates[i]["text"] = "Hi, " + greeting_spec + "! " + candidates[i]["text"] curr_score = very_big_score elif skill_names[i] == "meta_script_skill" and len(candidates[i]["text"]) > 0: logger.info("No topic. Meta-script + Greeting to very big score.") candidates[i]["text"] = "Hi, " + greeting_spec + "! " + candidates[i]["text"] curr_score = very_big_score else: logger.info("User just wants to talk.") # if user says something else if skill_names[i] == "program_y" and greeting_spec in candidates[i]["text"]: logger.info("Just chat. Program-y to very big score.") curr_score = very_big_score elif ( skill_names[i] == "dff_friendship_skill" and (how_are_you_spec in candidates[i]["text"] or what_i_can_do_spec in candidates[i]["text"]) and len(dialog["utterances"]) < 16 ): curr_score = very_big_score elif skill_names[i] == "dff_friendship_skill" and greeting_spec in candidates[i]["text"]: if len(dialog["utterances"]) < 2: curr_score = very_big_score else: confidences[i] = 0.2 # Low confidence for greeting in the middle of dialogue # we don't have 'cobotqa' anymore; instead we have factoid_qa elif skill_names[i] in ["factoid_qa"] and "Here's something I found on the web." in candidates[i]["text"]: confidences[i] = 0.6 elif ( skill_names[i] == "factoid_qa" and dialog["human_utterances"][-1]["annotations"] .get("intent_catcher", {}) .get("weather_forecast_intent", {}) .get("detected", 0) == 1 ): confidences[i] = 0.8 elif skill_names[i] == "misheard_asr" and is_misheard: curr_score = very_big_score elif is_intent_candidate: curr_score = very_big_score elif skill_names[i] in ["dummy_skill", "convert_reddit", "alice", "eliza", "tdidf_retrieval", "program_y"]: if "question" in candidates[i].get("type", "") or "?" in candidates[i]["text"]: penalty_start_utt = 1 if skill_names[i] == "program_y": penalty_start_utt = 4 n_questions = 0 if len(bot_utterances) >= penalty_start_utt and "?" in bot_utterances[-1]: confidences[i] /= 1.5 n_questions += 1 if len(bot_utterances) >= penalty_start_utt + 1 and "?" in bot_utterances[-2]: confidences[i] /= 1.1 n_questions += 1 if n_questions == 2: # two subsequent questions (1 / (1.5 * 1.1 * 1.2) = ~0.5) confidences[i] /= 1.2 # this is only about `dummy_skill` if "link_to_for_response_selector" in candidates[i].get("type", ""): link_to_question = candidates[i]["text"] link_to_human_attrs = candidates[i].get("human_attributes", {}) if skill_names[i] == "dummy_skill" and "question" in candidates[i].get("type", ""): dummy_question = candidates[i]["text"] dummy_question_human_attr = candidates[i].get("human_attributes", {}) if curr_score is None: cand_scores = scores[i] confidence = confidences[i] skill_name = skill_names[i] score_conv_eval = calculate_single_convers_evaluator_score(cand_scores) score = CONV_EVAL_STRENGTH * score_conv_eval + CONFIDENCE_STRENGTH * confidence logger.info( f"Skill {skill_name} has final score: {score}. Confidence: {confidence}. " f"Toxicity: {is_toxics[i]}. Cand scores: {cand_scores}" ) curr_single_scores.append(score) else: cand_scores = scores[i] skill_name = skill_names[i] score_conv_eval = calculate_single_convers_evaluator_score(cand_scores) score = CONV_EVAL_STRENGTH * score_conv_eval + curr_score logger.info( f"Skill {skill_name} has final score: {score}. " f"Toxicity: {is_toxics[i]}. Cand scores: {cand_scores}" ) curr_single_scores.append(score) highest_conf_exist = True if any(confidences >= 1.0) else False if highest_conf_exist: logger.info("Found skill with the highest confidence.") for j in range(len(candidates)): if highest_conf_exist and confidences[j] < 1.0 and curr_single_scores[j] < very_big_score: # need to drop this candidates logger.info(f"Dropping {skill_names[j]} which does not have a highest confidence or `very big score`.") curr_single_scores[j] = very_low_score best_id = np.argmax(curr_single_scores) best_candidate = candidates[best_id] best_skill_name = skill_names[int(best_id)] best_candidate = add_question_to_statement( best_candidate, best_skill_name, dummy_question, dummy_question_human_attr, link_to_question, link_to_human_attrs, not_sure_factoid, ) return best_candidate, best_id, curr_single_scores
def respond(): print("response generation started") st_time = time.time() dialogs_batch = request.json["dialogs"] # following 3 lists have len = number of samples going to the model annotations_depths = [] dial_ids = [] input_batch = [] # following 4 lists have len = len(dialogs_batch) entities = [] lets_chat_about_flags = [] nounphrases = [] special_intents_flags = [] chosen_topics = {} for d_id, dialog in enumerate(dialogs_batch): try: user_input_text = dialog["human_utterances"][-1]["text"] bot_uttr = dialog["bot_utterances"][-1] if len( dialog["bot_utterances"]) > 0 else {} switch_choose_topic = if_choose_topic( dialog["human_utterances"][-1], bot_uttr) # spacy_nounphrases spacy_nounphrases = get_spacy_nounphrases( dialog["human_utterances"][-1]) nounphrases.append( re.compile(join_sentences_in_or_pattern(spacy_nounphrases), re. IGNORECASE) if spacy_nounphrases else "") # entities curr_ents = get_named_entities(dialog["human_utterances"][-1]) entities.append( re.compile(join_sentences_in_or_pattern(curr_ents), re. IGNORECASE) if curr_ents else "") # intents lets_chat_about_flag, special_intents_flag = get_intents_flags( dialog["human_utterances"][-1]) lets_chat_about_flags.append(lets_chat_about_flag) special_intents_flags.append(special_intents_flag) anntr_history_len = DEFAULT_ANNTR_HISTORY_LEN bot_uttrs_for_dff_check = dialog["bot_utterances"][-2:] if len( dialog["bot_utterances"]) > 1 else [] dffs_flag = check_dffs(bot_uttrs_for_dff_check) if lets_chat_about_flag or switch_choose_topic: anntr_history_len = 0 elif dffs_flag: anntr_history_len = DFF_ANNTR_HISTORY_LEN # if detected lets_chat is about topic from the file lets_chat_topic = get_lets_chat_topic( lets_chat_about_flag, dialog["human_utterances"][-1]) # if prev skill == news_api_skill get news description and create knowledge fact news_api_fact = get_news_api_fact( bot_uttr, dialog["human_utterances"], not (switch_choose_topic or lets_chat_about_flag)) # start creating data for kg service user_input_history = "\n".join( [i["text"] for i in dialog["utterances"]]) annotators = { # "odqa": "answer_sentence", # "kbqa": "answer" } if not switch_choose_topic: user_input_knowledge, annotations_depth = get_knowledge_from_annotators( annotators, dialog["utterances"], anntr_history_len) else: user_input_knowledge = "" annotations_depth = {} # add nounphrases and entities to the knowledge if user_input_knowledge: user_input_checked_sentence = ( space_join(spacy_nounphrases) + space_join(curr_ents) + tokenize.sent_tokenize(user_input_knowledge)[0]) else: user_input_checked_sentence = "" if user_input_knowledge: user_input = { "checked_sentence": user_input_checked_sentence, "knowledge": user_input_knowledge, "text": user_input_text, "history": user_input_history, } annotations_depths.append(annotations_depth) dial_ids.append(d_id) input_batch.append(user_input) retrieved_facts = get_annotations_from_dialog( dialog["utterances"][-anntr_history_len * 2 - 1:], "fact_retrieval") if retrieved_facts: for depth, fact in retrieved_facts[-TOP_N_FACTS:]: user_input = { "checked_sentence": fact, "knowledge": fact, "text": user_input_text, "history": user_input_history, } input_batch.append(user_input) annotations_depths.append({"retrieved_fact": depth}) dial_ids.append(d_id) if any( [switch_choose_topic, lets_chat_topic, lets_chat_about_flag]): if lets_chat_topic: fact = random.sample(TOPICS_FACTS[lets_chat_topic], 1)[0] chosen_topics[d_id] = lets_chat_topic _chosen_topic_fact = "lets_chat_cobot_da" elif not get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False): topic = random.sample(TOPICS_FACTS.keys(), 1)[0] fact = random.sample(TOPICS_FACTS[topic], 1)[0] chosen_topics[d_id] = topic _chosen_topic_fact = "switch_random" else: fact = "" if fact: user_input = { "checked_sentence": fact, "knowledge": fact, "text": user_input_text, "history": user_input_history, "chosen_topic_fact": _chosen_topic_fact, } input_batch.append(user_input) annotations_depths.append({}) dial_ids.append(d_id) if news_api_fact: user_input = { "checked_sentence": news_api_fact, "knowledge": news_api_fact, "text": user_input_text, "history": user_input_history, "news_api_fact": True, } input_batch.append(user_input) annotations_depths.append({}) dial_ids.append(d_id) fact_random_facts = get_fact_random( dialog["utterances"][-anntr_history_len * 2 - 1:]) if fact_random_facts: user_input = { "checked_sentence": fact_random_facts[-1][1], "knowledge": fact_random_facts[-1][1], "text": user_input_text, "history": user_input_history, "fact_random_fact": True, } input_batch.append(user_input) annotations_depths.append( {"fact_random": fact_random_facts[-1][0]}) dial_ids.append(d_id) user_news = get_news(dialog["human_utterances"][-1], "human") bot_news = get_news(dialog["human_utterances"][-1], "bot") # all_news = get_news(dialog["human_utterances"][-1], "all") if user_news: news_desc = user_news[-1].get("decsription", "") if news_desc: user_input = { "checked_sentence": news_desc, "knowledge": news_desc, "text": user_input_text, "history": user_input_history, "news_fact": "human ", } input_batch.append(user_input) annotations_depths.append({}) dial_ids.append(d_id) elif bot_news: news_desc = bot_news[-1].get("decsription", "") if news_desc: user_input = { "checked_sentence": news_desc, "knowledge": news_desc, "text": user_input_text, "history": user_input_history, "news_fact": "bot ", } input_batch.append(user_input) annotations_depths.append({}) dial_ids.append(d_id) # elif all_news: # user_input = { # 'checked_sentence': all_news[-1].get("decsription", ""), # 'knowledge': all_news[-1].get("decsription", ""), # 'text': user_input_text, # 'history': user_input_history, # 'news_fact': "all ", # 'news_title': all_news[-1].get("title", "") # } # input_batch.append(user_input) # annotations_depths.append({}) # dial_ids.append(d_id) except Exception as ex: sentry_sdk.capture_exception(ex) logger.exception(ex) try: raw_responses = [] if input_batch: logger.info(f"skill sends to service: {input_batch}") resp = requests.post(KNOWLEDGE_GROUNDING_SERVICE_URL, json={"batch": input_batch}, timeout=1.5) raw_responses = resp.json() logger.info(f"skill receives from service: {raw_responses}") else: responses = [[""]] confidences = [[0.0]] attributes = [[{}]] logger.info( f"Collected no hypotheses, exiting with {list(zip(responses, confidences, attributes))}" ) return jsonify(list(zip(responses, confidences, attributes))) dial_ids = np.array(dial_ids) attributes = [] confidences = [] responses = [] for i, dialog in enumerate(dialogs_batch): curr_attributes = [] curr_confidences = [] curr_responses = [] for curr_i in np.where(dial_ids == i)[0]: attr = { "knowledge_paragraph": input_batch[curr_i]["knowledge"], "knowledge_checked_sentence": input_batch[curr_i]["checked_sentence"], "can_continue": CAN_NOT_CONTINUE, "confidence_case": "", } already_was_active, short_long_response = get_penalties( dialog["bot_utterances"], raw_responses[curr_i]) curr_nounphrase_search = nounphrases[i].search( raw_responses[curr_i]) if nounphrases[i] else False curr_entities_search = entities[i].search( raw_responses[curr_i]) if entities[i] else False no_penalties = False fact_random_penalty = 0.0 topic = chosen_topics.get(i, "") chosen_topic_fact_flag = input_batch[curr_i].get( "chosen_topic_fact", "") curr_news_fact = input_batch[curr_i].get("news_fact", "") add_intro = "" if topic and chosen_topic_fact_flag: add_intro = f"Okay, Let's chat about {topic}. " confidence = HIGHEST_CONFIDENCE no_penalties = True attr[ "confidence_case"] += f"topic_fact: {chosen_topic_fact_flag} " attr["response_parts"] = ["prompt"] elif input_batch[curr_i].get("news_api_fact", ""): add_intro = random.choice([ "Sounds like ", "Seems like ", "Makes sense. ", # "Here's what I've heard: ", "Here's something else I've heard: ", "It reminds me that", "This comes to my mind: ", "", ]) no_penalties = True confidence = HIGHEST_CONFIDENCE attr["confidence_case"] += "news_api_fact " elif input_batch[curr_i].get("fact_random_fact", ""): fact_random_penalty = annotations_depths[curr_i].get( "fact_random", 0.0) confidence = DEFAULT_CONFIDENCE attr["confidence_case"] += "fact_random_fact " elif curr_news_fact: if curr_news_fact != "all": confidence = NOUNPHRASE_ENTITY_CONFIDENCE else: confidence = DEFAULT_CONFIDENCE curr_news_title = input_batch[curr_i].get( "news_title", "") if curr_news_title: add_intro = f"I have just read that {curr_news_title}. " attr["confidence_case"] += "news_fact: " + curr_news_fact elif (curr_nounphrase_search or curr_entities_search) and lets_chat_about_flags[i]: confidence = HIGHEST_CONFIDENCE attr[ "confidence_case"] += "nounphrase_entity_and_lets_chat_about " attr["response_parts"] = ["prompt"] elif curr_nounphrase_search or curr_entities_search: confidence = NOUNPHRASE_ENTITY_CONFIDENCE attr["confidence_case"] += "nounphrase_entity " elif lets_chat_about_flags[i]: confidence = LETS_CHAT_ABOUT_CONFIDENDENCE attr["confidence_case"] += "lets_chat_about " attr["response_parts"] = ["prompt"] else: confidence = DEFAULT_CONFIDENCE attr["confidence_case"] += "default " acronym_flag = ABBRS.search(raw_responses[curr_i]) if acronym_flag: confidence = ABBRS_CONFIDENCE attr["confidence_case"] += f"acronyms: {acronym_flag} " logger.debug(f"KG skill: found acronyms: {acronym_flag}") special_char_flag = special_char_re.search( raw_responses[curr_i]) if special_char_flag: confidence = HAS_SPEC_CHAR_CONFIDENCE attr["confidence_case"] += "special_char " logger.debug( f"KG skill: found special_char: {special_char_flag}") if special_intents_flags[i]: confidence = 0.0 attr["confidence_case"] += "special_intents " logger.debug("KG skill: found special_intents") greetings_farewells_flag = greetings_farewells_re.search( raw_responses[curr_i]) if greetings_farewells_flag: confidence = 0.0 attr["confidence_case"] += "greetings_farewells " logger.debug( f"KG skill: found greetings_farewells: {greetings_farewells_flag}" ) penalties = ( annotations_depths[curr_i].get("retrieved_fact", 0.0) + fact_random_penalty + already_was_active + short_long_response if not no_penalties else 0.0) confidence -= penalties if any([ acronym_flag, special_char_flag, special_intents_flags[i], greetings_farewells_flag, short_long_response, ]): logger.debug( f"KG skill: found penalties in response: {raw_responses[curr_i]}, skipping it" ) continue else: curr_attributes.append(attr) curr_confidences.append(max(0.0, confidence)) curr_responses.append( re.sub(r'\s([?.!",;:](?:\s|$))', r"\1", add_intro + raw_responses[curr_i]).replace( " ' t", "'t")) attributes.append(curr_attributes) confidences.append(curr_confidences) responses.append(curr_responses) except Exception as ex: sentry_sdk.capture_exception(ex) logger.exception(ex) responses = [[""]] confidences = [[0.0]] attributes = [[{}]] logger.info( f"knowledge_grounding_skill exec time: {time.time() - st_time}") return jsonify(list(zip(responses, confidences, attributes)))
async def send(self, payload: Dict, callback: Callable): try: st_time = time.time() dialog = deepcopy(payload["payload"]["dialogs"][0]) is_sensitive_case = is_sensitive_situation(dialog["human_utterances"][-1]) all_prev_active_skills = payload["payload"]["all_prev_active_skills"][0] curr_topics = get_topics(dialog["human_utterances"][-1], which="cobot_topics") curr_nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) if len(curr_topics) == 0: curr_topics = ["Phatic"] logger.info(f"Found topics: {curr_topics}") for i in range(len(curr_nounphrases)): np = re.sub(np_remove_expr, "", curr_nounphrases[i]) np = re.sub(rm_spaces_expr, " ", np) if re.search(np_ignore_expr, np): curr_nounphrases[i] = "" else: curr_nounphrases[i] = np.strip() curr_nounphrases = [np for np in curr_nounphrases if len(np) > 0] logger.info(f"Found nounphrases: {curr_nounphrases}") cands = [] confs = [] human_attrs = [] bot_attrs = [] attrs = [] cands += [choice(donotknow_answers)] confs += [0.5] attrs += [{"type": "dummy"}] human_attrs += [{}] bot_attrs += [{}] if len(dialog["utterances"]) > 14 and not is_sensitive_case: questions_same_nps = [] for i, nphrase in enumerate(curr_nounphrases): for q_id in NP_QUESTIONS.get(nphrase, []): questions_same_nps += [QUESTIONS_MAP[str(q_id)]] if len(questions_same_nps) > 0: logger.info("Found special nounphrases for questions. Return question with the same nounphrase.") cands += [choice(questions_same_nps)] confs += [0.5] attrs += [{"type": "nounphrase_question"}] human_attrs += [{}] bot_attrs += [{}] link_to_question, human_attr = get_link_to_question(dialog, all_prev_active_skills) if link_to_question: _prev_bot_uttr = dialog["bot_utterances"][-2]["text"] if len(dialog["bot_utterances"]) > 1 else "" _bot_uttr = dialog["bot_utterances"][-1]["text"] if len(dialog["bot_utterances"]) > 0 else "" _prev_active_skill = ( dialog["bot_utterances"][-1]["active_skill"] if len(dialog["bot_utterances"]) > 0 else "" ) _no_to_first_linkto = any([phrase in _bot_uttr for phrase in LINK_TO_PHRASES]) _no_to_first_linkto = _no_to_first_linkto and all( [phrase not in _prev_bot_uttr for phrase in LINK_TO_PHRASES] ) _no_to_first_linkto = _no_to_first_linkto and is_no(dialog["human_utterances"][-1]) _no_to_first_linkto = _no_to_first_linkto and _prev_active_skill != "dff_friendship_skill" _if_switch_topic = is_switch_topic(dialog["human_utterances"][-1]) bot_uttr_dict = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) > 0 else {} _if_choose_topic = if_choose_topic(dialog["human_utterances"][-1], bot_uttr_dict) _is_ask_me_something = ASK_ME_QUESTION_PATTERN.search(dialog["human_utterances"][-1]["text"]) if len(dialog["human_utterances"]) > 1: _was_cant_do = "cant_do" in get_intents(dialog["human_utterances"][-2]) and ( len(curr_nounphrases) == 0 or is_yes(dialog["human_utterances"][-1]) ) _was_cant_do_stop_it = "cant_do" in get_intents(dialog["human_utterances"][-2]) and is_no( dialog["human_utterances"][-1] ) else: _was_cant_do = False _was_cant_do_stop_it = False if _was_cant_do_stop_it: link_to_question = "Sorry, bye! #+#exit" confs += [1.0] # finish dialog request elif _no_to_first_linkto: confs += [0.99] elif _is_ask_me_something or _if_switch_topic or _was_cant_do or _if_choose_topic: confs += [1.0] # Use it only as response selector retrieve skill output modifier else: confs += [0.05] # Use it only as response selector retrieve skill output modifier cands += [link_to_question] attrs += [{"type": "link_to_for_response_selector"}] human_attrs += [human_attr] bot_attrs += [{}] facts_same_nps = [] for i, nphrase in enumerate(curr_nounphrases): for fact_id in NP_FACTS.get(nphrase, []): facts_same_nps += [ f"Well, now that you've mentioned {nphrase}, I've remembered this. {FACTS_MAP[str(fact_id)]}. " f"{(opinion_request_question() if random.random() < ASK_QUESTION_PROB else '')}" ] if len(facts_same_nps) > 0 and not is_sensitive_case: logger.info("Found special nounphrases for facts. Return fact with the same nounphrase.") cands += [choice(facts_same_nps)] confs += [0.5] attrs += [{"type": "nounphrase_fact"}] human_attrs += [{}] bot_attrs += [{}] total_time = time.time() - st_time logger.info(f"dummy_skill exec time: {total_time:.3f}s") asyncio.create_task( callback(task_id=payload["task_id"], response=[cands, confs, human_attrs, bot_attrs, attrs]) ) except Exception as e: logger.exception(e) sentry_sdk.capture_exception(e) asyncio.create_task(callback(task_id=payload["task_id"], response=e))
def pickup_topic_and_start_small_talk(dialog): """ Pick up topic for small talk and return first response. Args: dialog: dialog from agent Returns: Tuple of (response, topic, confidence) """ last_user_uttr = dialog["human_utterances"][-1] if len(dialog["bot_utterances"]) > 0: last_bot_uttr = dialog["bot_utterances"][-1] else: last_bot_uttr = {"text": "---", "annotations": {}} topic_user_wants_to_discuss = which_topic_lets_chat_about( last_user_uttr, last_bot_uttr) if if_choose_topic(last_user_uttr, last_bot_uttr) or if_switch_topic( last_user_uttr["text"].lower()): # user asks bot to chose topic: `pick up topic/what do you want to talk about/would you like to switch topic` # or bot asks user to chose topic and user says `nothing/anything/don't know` # if user asks to switch the topic topic = offer_topic(dialog) if topic in TOPIC_PATTERNS: if topic == "me": response = f"Let's talk about you. " + TOPIC_SCRIPTS.get( topic, [""])[0] elif topic == "you": response = f"Let's talk about me. " + TOPIC_SCRIPTS.get( topic, [""])[0] else: response = f"Let's talk about {topic}. " + TOPIC_SCRIPTS.get( topic, [""])[0] confidence = BOT_TOPIC_START_CONFIDENCE else: response = "" confidence = 0.0 logger.info(f"Bot initiates script on topic: `{topic}`.") elif topic_user_wants_to_discuss: # user said `let's talk about [topic]` or # bot said `what do you want to talk about/would you like to switch the topic`, # and user answered [topic] (not something, nothing, i don't know - in this case, # it will be gone through previous if) topic = topic_user_wants_to_discuss response = TOPIC_SCRIPTS.get(topic, [""])[0] if topic in NOT_SCRIPTED_TOPICS: confidence = YES_CONTINUE_CONFIDENCE else: confidence = USER_TOPIC_START_CONFIDENCE logger.info(f"User initiates script on topic: `{topic}`.") else: topic = find_topics_in_substring( dialog["human_utterances"][-1]["text"]) topic = topic[-1] if len(topic) else "" if len(topic) > 0: response = TOPIC_SCRIPTS.get(topic, [""])[0] confidence = FOUND_WORD_START_CONFIDENCE logger.info(f"Found word in user utterance on topic: `{topic}`.") else: topic = "" response = "" confidence = 0.0 return response, topic, confidence