def extract_age_group_anywhere_in_conversation(dialog): curr_uttr_text = dialog["human_utterances"][-1]["text"] if re.search(KIDS_WORDS_RE, curr_uttr_text): age_group = "kid" elif re.search(ADULTS_WORDS_RE, curr_uttr_text) or is_sensitive_situation( dialog["human_utterances"][-1]): age_group = "adult" else: age_group = "unknown" return age_group
def extract_age_group_using_activities(dialog): curr_uttr_text = dialog["human_utterances"][-1]["text"] if re.search(KIDS_ACTIVITIES_RE, curr_uttr_text): age_group = "kid" elif re.search(ADULTS_ACTIVITIES_RE, curr_uttr_text) or is_sensitive_situation( dialog["human_utterances"][-1]): age_group = "adult" else: age_group = "unknown" return age_group
def rule_based_prioritization(cand_uttr, dialog): flag = False if (len(dialog["human_utterances"]) == 1 and cand_uttr["skill_name"] == "dff_friendship_skill" and greeting_spec in cand_uttr["text"] and GREETING_FIRST): # prioritize greeting phrase in the beginning of the dialog flag = True if (cand_uttr["skill_name"] == "small_talk_skill" and is_sensitive_situation(dialog["human_utterances"][-1]) and RESTRICTION_FOR_SENSITIVE_CASE): # small talk skill (if hypothesis is available) priority for sensitive situations when required flag = True if cand_uttr["skill_name"] == "misheard_asr" and any([ x in cand_uttr["text"] for x in [misheard_with_spec1, misheard_with_spec2] ]): # prioritize misheard_asr response when low ASR conf flag = True if cand_uttr["confidence"] >= 1.0: flag = True return flag
def tag_based_response_selection(dialog, candidates, scores, confidences, bot_utterances, all_prev_active_skills=None): all_prev_active_skills = all_prev_active_skills if all_prev_active_skills is not None else [] all_prev_active_skills = Counter(all_prev_active_skills) annotated_uttr = dialog["human_utterances"][-1] all_user_intents, all_user_topics, all_user_named_entities, all_user_nounphrases = get_main_info_annotations( annotated_uttr) _is_switch_topic_request = is_switch_topic(annotated_uttr) _is_force_intent = any( [_intent in all_user_intents for _intent in FORCE_INTENTS_IC.keys()]) # if user utterance contains any question (REGEXP & punctuation check!) _is_require_action_intent = is_any_question_sentence_in_utterance({ "text": annotated_uttr.get("annotations", {}).get("sentseg", {}).get("punct_sent", annotated_uttr["text"]) }) # if user utterance contains any question AND requires some intent by socialbot _is_require_action_intent = _is_require_action_intent and any([ _intent in all_user_intents for _intent in REQUIRE_ACTION_INTENTS.keys() ]) _force_intents_detected = [ _intent for _intent in FORCE_INTENTS_IC.keys() if _intent in all_user_intents ] # list of user intents which require some action by socialbot _require_action_intents_detected = [ _intent for _intent in REQUIRE_ACTION_INTENTS.keys() if _intent in all_user_intents ] _force_intents_skills = sum([ FORCE_INTENTS_IC.get(_intent, []) for _intent in _force_intents_detected ], []) # list of intents required by the socialbot _required_actions = sum([ REQUIRE_ACTION_INTENTS.get(_intent, []) for _intent in _require_action_intents_detected ], []) _contains_entities = len( get_entities(annotated_uttr, only_named=False, with_labels=False)) > 0 _is_active_skill_can_not_continue = False _prev_bot_uttr = dialog["bot_utterances"][-1] if len( dialog["bot_utterances"]) > 0 else {} _prev_active_skill = dialog["bot_utterances"][-1]["active_skill"] if len( dialog["bot_utterances"]) > 0 else "" _prev_prev_active_skill = dialog["bot_utterances"][-2][ "active_skill"] if len(dialog["bot_utterances"]) > 1 else "" _no_script_two_times_in_a_row = False if _prev_active_skill and _prev_prev_active_skill: if all([ skill not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS for skill in [_prev_active_skill, _prev_prev_active_skill] ]): _no_script_two_times_in_a_row = True disliked_skills = get_updated_disliked_skills( dialog, can_not_be_disliked_skills=CAN_NOT_BE_DISLIKED_SKILLS) _is_dummy_linkto_available = any([ cand_uttr["skill_name"] == "dummy_skill" and cand_uttr.get("type", "") == "link_to_for_response_selector" for cand_uttr in candidates ]) categorized_hyps = {} categorized_prompts = {} for dasuffix in ["reqda", ""]: for actsuffix in ["active", "continued", "finished"]: for suffix in [ "same_topic_entity_no_db", "same_topic_entity_db", "othr_topic_entity_no_db", "othr_topic_entity_db", ]: categorized_hyps[f"{actsuffix}_{suffix}_{dasuffix}"] = [] categorized_prompts[f"{actsuffix}_{suffix}_{dasuffix}"] = [] CASE = "" acknowledgement_hypothesis = {} for cand_id, cand_uttr in enumerate(candidates): if confidences[cand_id] == 0.0 and cand_uttr[ "skill_name"] not in ACTIVE_SKILLS: logger.info( f"Dropping cand_id: {cand_id} due to toxicity/badlists") continue all_cand_intents, all_cand_topics, all_cand_named_entities, all_cand_nounphrases = get_main_info_annotations( cand_uttr) skill_name = cand_uttr["skill_name"] _is_dialog_abandon = get_dialog_breakdown_annotations( cand_uttr) and PRIORITIZE_NO_DIALOG_BREAKDOWN _is_just_prompt = (cand_uttr["skill_name"] == "dummy_skill" and any([ question_type in cand_uttr.get("type", "") for question_type in ["normal_question", "link_to_for_response_selector"] ])) or cand_uttr.get("response_parts", []) == ["prompt"] if cand_uttr["confidence"] == 1.0: # for those hypotheses where developer forgot to set tag to MUST_CONTINUE cand_uttr["can_continue"] = MUST_CONTINUE _can_continue = cand_uttr.get("can_continue", CAN_NOT_CONTINUE) _user_wants_to_chat_about_topic = ( if_chat_about_particular_topic(annotated_uttr) and "about it" not in annotated_uttr["text"].lower()) _user_does_not_want_to_chat_about_topic = if_not_want_to_chat_about_particular_topic( annotated_uttr) _user_wants_bot_to_choose_topic = if_choose_topic( annotated_uttr, _prev_bot_uttr) if any([ phrase.lower() in cand_uttr["text"].lower() for phrase in LINK_TO_PHRASES ]): # add `prompt` to response_parts if any linkto phrase in hypothesis cand_uttr["response_parts"] = cand_uttr.get("response_parts", []) + ["prompt"] # identifies if candidate contains named entities from last human utterance _same_named_entities = (len( get_common_tokens_in_lists_of_strings( all_cand_named_entities, all_user_named_entities)) > 0) # identifies if candidate contains all (not only named) entities from last human utterance _same_nounphrases = len( get_common_tokens_in_lists_of_strings(all_cand_nounphrases, all_user_nounphrases)) > 0 _same_topic_entity = (_same_named_entities or _same_nounphrases ) and PRIORITIZE_WITH_SAME_TOPIC_ENTITY _is_active_skill = (_prev_active_skill == cand_uttr["skill_name"] or cand_uttr.get("can_continue", "") == MUST_CONTINUE) _is_active_skill = _is_active_skill and skill_name in ACTIVE_SKILLS _is_active_skill = _is_active_skill and (_can_continue in [ MUST_CONTINUE, CAN_CONTINUE_SCENARIO, CAN_NOT_CONTINUE ] or (_can_continue == CAN_CONTINUE_PROMPT and all_prev_active_skills.get(skill_name, []) < 10)) _is_active_skill = _is_active_skill and PRIORITIZE_SCRIPTED_SKILLS if _is_active_skill: # we will forcibly add prompt if current scripted skill finishes scenario, # and has no opportunity to continue at all. _is_active_skill_can_not_continue = _is_active_skill and _can_continue in [ CAN_NOT_CONTINUE ] if _is_force_intent: # =====force intents, choose as best_on_topic hypotheses from skills responding this request===== CASE = "Force intent." if cand_uttr["skill_name"] in _force_intents_skills: categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _is_switch_topic_request or _user_does_not_want_to_chat_about_topic or _user_wants_bot_to_choose_topic: # =====direct request by user to switch the topic of current conversation===== # give priority to dummy linkto hypothesis if available, else other prompts if available. _is_active_skill = ( cand_uttr.get("type", "") == "link_to_for_response_selector" if _is_dummy_linkto_available else _is_just_prompt) # no priority to must_continue to skip incorrect continuation of script _can_continue = CAN_CONTINUE_SCENARIO if _can_continue == MUST_CONTINUE else _can_continue CASE = "Switch topic intent." if len(all_user_named_entities) > 0 or len( all_user_nounphrases) > 0: # -----user defines new topic/entity----- # _same_topic_entity does not depend on hyperparameter in these case _same_topic_entity = _same_named_entities or _same_nounphrases categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) else: # -----user want socialbot to define new topic/entity----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _user_wants_to_chat_about_topic: # user wants to chat about particular topic CASE = "User wants to talk about topic." # in this case we do not give priority to previously active skill (but give to must continue skill!) # because now user wants to talk about something particular _is_active_skill = cand_uttr.get("can_continue", "") == MUST_CONTINUE # _same_topic_entity does not depend on hyperparameter in these case _same_topic_entity = _same_named_entities or _same_nounphrases categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) elif _is_require_action_intent and PRIORITIZE_WITH_REQUIRED_ACT: # =====user intent requires particular action===== CASE = "User intent requires action. USER UTTERANCE CONTAINS QUESTION." _is_grounding_reqda = (skill_name == "dff_grounding_skill" and cand_uttr.get( "type", "") == "universal_response") _is_active_skill = cand_uttr.get( "can_continue", "") == MUST_CONTINUE # no priority to prev active skill _can_continue = CAN_NOT_CONTINUE # no priority to scripted skills if set(all_cand_intents).intersection( set(_required_actions )) or _is_grounding_reqda or _is_active_skill: # -----one of the can intent is in intents required by user----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=True, ) else: # -----NO required dialog acts----- categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) else: # =====user intent does NOT require particular action===== CASE = "General case." categorized_hyps, categorized_prompts = categorize_candidate( cand_id, skill_name, categorized_hyps, categorized_prompts, _is_just_prompt, _is_active_skill, _can_continue, _same_topic_entity, _is_dialog_abandon, _is_required_da=False, ) # a bit of rule based help if (len(dialog["human_utterances"]) == 1 and cand_uttr["skill_name"] == "dff_friendship_skill" and greeting_spec in cand_uttr["text"]): categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif (cand_uttr["skill_name"] == "dff_friendship_skill" and (how_are_you_spec in cand_uttr["text"] or what_i_can_do_spec in cand_uttr["text"]) and len(dialog["utterances"]) < 16): categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) # elif cand_uttr["skill_name"] == 'program_y_dangerous' and cand_uttr['confidence'] == 0.98: # categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif cand_uttr[ "skill_name"] == "small_talk_skill" and is_sensitive_situation( dialog["human_utterances"][-1]): # let small talk to talk about sex ^_^ categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) elif cand_uttr["confidence"] >= 1.0: # -------------------- SUPER CONFIDENCE CASE HERE! -------------------- categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent) if cand_uttr["skill_name"] == "dff_grounding_skill" and [ "acknowledgement" ] == cand_uttr.get("response_parts", []): acknowledgement_hypothesis = deepcopy(cand_uttr) logger.info(f"Current CASE: {CASE}") # now compute current scores as one float value curr_single_scores = compute_curr_single_scores(candidates, scores, confidences) # remove disliked skills from hypotheses if IGNORE_DISLIKED_SKILLS: for category in categorized_hyps: new_ids = [] for cand_id in categorized_hyps[category]: if (candidates[cand_id]["skill_name"] in disliked_skills and candidates[cand_id].get("can_continue", CAN_NOT_CONTINUE) == MUST_CONTINUE): disliked_skills.remove(candidates[cand_id]["skill_name"]) if candidates[cand_id]["skill_name"] not in disliked_skills: new_ids.append(cand_id) categorized_hyps[category] = deepcopy(new_ids) for category in categorized_prompts: new_ids = [] for cand_id in categorized_prompts[category]: if (candidates[cand_id]["skill_name"] in disliked_skills and candidates[cand_id].get("can_continue", CAN_NOT_CONTINUE) == MUST_CONTINUE): disliked_skills.remove(candidates[cand_id]["skill_name"]) if candidates[cand_id]["skill_name"] not in disliked_skills: new_ids.append(cand_id) categorized_prompts[category] = deepcopy(new_ids) best_cand_id = pickup_best_id(categorized_hyps, candidates, curr_single_scores, bot_utterances) best_candidate = candidates[best_cand_id] best_candidate["human_attributes"] = best_candidate.get( "human_attributes", {}) # save updated disliked skills to human attributes of the best candidate best_candidate["human_attributes"]["disliked_skills"] = disliked_skills logger.info(f"Best candidate: {best_candidate}") n_sents_without_prompt = len(sent_tokenize(best_candidate["text"])) _is_best_not_script = best_candidate[ "skill_name"] not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS no_question_by_user = "******" not in dialog["human_utterances"][-1][ "annotations"].get("sentseg", {}).get("punct_sent", dialog["human_utterances"][-1]["text"]) # if `no` to 1st in a row linkto question, and chosen response is not from scripted skill _no_to_first_linkto = is_no(dialog["human_utterances"][-1]) and any([ phrase.lower() in _prev_bot_uttr.get("text", "").lower() for phrase in LINK_TO_PHRASES ]) # if chosen short response or question by not-scripted skill _is_short_or_question_by_not_script = _is_best_not_script and ( "?" in best_candidate["text"] or len(best_candidate["text"].split()) < 4) _no_questions_for_3_steps = not any([ is_any_question_sentence_in_utterance(uttr) for uttr in dialog["bot_utterances"][-3:] ]) if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS: if (_no_script_two_times_in_a_row and _is_short_or_question_by_not_script and no_question_by_user) or (_no_to_first_linkto and _is_best_not_script): # if no scripted skills 2 time sin a row before, current chosen best cand is not scripted, contains `?`, # and user utterance does not contain "?", replace utterance with dummy! best_prompt_id = pickup_best_id(categorized_prompts, candidates, curr_single_scores, bot_utterances) best_candidate = deepcopy(candidates[best_prompt_id]) best_cand_id = best_prompt_id if does_not_require_prompt(candidates, best_cand_id): # the candidate already contains a prompt or a question or of a length more than 200 symbols logger.info( "Best candidate contains prompt, question, request or length of > 200 symbols. Do NOT add prompt." ) pass elif sum(categorized_prompts.values(), []): # best cand is 3d times in a row not scripted skill, let's append linkto # need to add some prompt, and have a prompt _add_prompt_forcibly = best_candidate[ "skill_name"] == _prev_active_skill and _is_active_skill_can_not_continue _add_prompt_forcibly = _add_prompt_forcibly and not _contains_entities # prompts are added: # - in 1 out of 10 cases, if current human utterance does not contain entities, # and no prompt for several last bot utterances # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and current utterance is from active on prev step scripted skill and # it has a status can-not-continue # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and last 2 bot uttr are not from scripted skill, # and current best uttr is also from not-scripted skill if ((prompt_decision() and not _contains_entities and _no_questions_for_3_steps) or (_add_prompt_forcibly and PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS) or (PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and _no_script_two_times_in_a_row and _is_best_not_script)): logger.info("Decided to add a prompt to the best candidate.") best_prompt_id = pickup_best_id(categorized_prompts, candidates, curr_single_scores, bot_utterances) # as we have only one active skill, let's consider active skill as that one providing prompt # but we also need to reassign all the attributes best_prompt = candidates[best_prompt_id] best_candidate[ "text"] = f'{best_candidate["text"]} {best_prompt["text"]}' best_candidate["attributes"] = best_candidate.get("attributes", {}) best_candidate["attributes"]["prompt_skill"] = best_prompt # anyway we must combine used links best_candidate["human_attributes"] = best_candidate.get( "human_attributes", {}) best_candidate["human_attributes"] = join_used_links_in_attributes( best_candidate["human_attributes"], best_prompt.get("human_attributes", {})) if len(best_candidate["human_attributes"]["used_links"]) == 0: best_candidate["human_attributes"].pop("used_links") was_ackn = if_acknowledgement_in_previous_bot_utterance(dialog) best_resp_cont_ackn = "acknowledgement" in best_candidate.get( "response_parts", []) if (ADD_ACKNOWLEDGMENTS_IF_POSSIBLE and acknowledgement_hypothesis and acknowledgement_decision(all_user_intents) and n_sents_without_prompt == 1 and not was_ackn and not best_resp_cont_ackn): logger.info( "Acknowledgement is given, Final hypothesis contains only 1 sentence, no ackn in prev bot uttr," "and we decided to add an acknowledgement to the best candidate.") best_candidate[ "text"] = f'{acknowledgement_hypothesis["text"]} {best_candidate["text"]}' best_candidate["response_parts"] = ["acknowledgement" ] + best_candidate.get( "response_parts", []) return best_candidate, best_cand_id, curr_single_scores
async def send(self, payload: Dict, callback: Callable): try: st_time = time.time() dialog = deepcopy(payload["payload"]["dialogs"][0]) is_sensitive_case = is_sensitive_situation(dialog["human_utterances"][-1]) all_prev_active_skills = payload["payload"]["all_prev_active_skills"][0] curr_topics = get_topics(dialog["human_utterances"][-1], which="cobot_topics") curr_nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) if len(curr_topics) == 0: curr_topics = ["Phatic"] logger.info(f"Found topics: {curr_topics}") for i in range(len(curr_nounphrases)): np = re.sub(np_remove_expr, "", curr_nounphrases[i]) np = re.sub(rm_spaces_expr, " ", np) if re.search(np_ignore_expr, np): curr_nounphrases[i] = "" else: curr_nounphrases[i] = np.strip() curr_nounphrases = [np for np in curr_nounphrases if len(np) > 0] logger.info(f"Found nounphrases: {curr_nounphrases}") cands = [] confs = [] human_attrs = [] bot_attrs = [] attrs = [] cands += [choice(donotknow_answers)] confs += [0.5] attrs += [{"type": "dummy"}] human_attrs += [{}] bot_attrs += [{}] if len(dialog["utterances"]) > 14 and not is_sensitive_case: questions_same_nps = [] for i, nphrase in enumerate(curr_nounphrases): for q_id in NP_QUESTIONS.get(nphrase, []): questions_same_nps += [QUESTIONS_MAP[str(q_id)]] if len(questions_same_nps) > 0: logger.info("Found special nounphrases for questions. Return question with the same nounphrase.") cands += [choice(questions_same_nps)] confs += [0.5] attrs += [{"type": "nounphrase_question"}] human_attrs += [{}] bot_attrs += [{}] link_to_question, human_attr = get_link_to_question(dialog, all_prev_active_skills) if link_to_question: _prev_bot_uttr = dialog["bot_utterances"][-2]["text"] if len(dialog["bot_utterances"]) > 1 else "" _bot_uttr = dialog["bot_utterances"][-1]["text"] if len(dialog["bot_utterances"]) > 0 else "" _prev_active_skill = ( dialog["bot_utterances"][-1]["active_skill"] if len(dialog["bot_utterances"]) > 0 else "" ) _no_to_first_linkto = any([phrase in _bot_uttr for phrase in LINK_TO_PHRASES]) _no_to_first_linkto = _no_to_first_linkto and all( [phrase not in _prev_bot_uttr for phrase in LINK_TO_PHRASES] ) _no_to_first_linkto = _no_to_first_linkto and is_no(dialog["human_utterances"][-1]) _no_to_first_linkto = _no_to_first_linkto and _prev_active_skill != "dff_friendship_skill" _if_switch_topic = is_switch_topic(dialog["human_utterances"][-1]) bot_uttr_dict = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) > 0 else {} _if_choose_topic = if_choose_topic(dialog["human_utterances"][-1], bot_uttr_dict) _is_ask_me_something = ASK_ME_QUESTION_PATTERN.search(dialog["human_utterances"][-1]["text"]) if len(dialog["human_utterances"]) > 1: _was_cant_do = "cant_do" in get_intents(dialog["human_utterances"][-2]) and ( len(curr_nounphrases) == 0 or is_yes(dialog["human_utterances"][-1]) ) _was_cant_do_stop_it = "cant_do" in get_intents(dialog["human_utterances"][-2]) and is_no( dialog["human_utterances"][-1] ) else: _was_cant_do = False _was_cant_do_stop_it = False if _was_cant_do_stop_it: link_to_question = "Sorry, bye! #+#exit" confs += [1.0] # finish dialog request elif _no_to_first_linkto: confs += [0.99] elif _is_ask_me_something or _if_switch_topic or _was_cant_do or _if_choose_topic: confs += [1.0] # Use it only as response selector retrieve skill output modifier else: confs += [0.05] # Use it only as response selector retrieve skill output modifier cands += [link_to_question] attrs += [{"type": "link_to_for_response_selector"}] human_attrs += [human_attr] bot_attrs += [{}] facts_same_nps = [] for i, nphrase in enumerate(curr_nounphrases): for fact_id in NP_FACTS.get(nphrase, []): facts_same_nps += [ f"Well, now that you've mentioned {nphrase}, I've remembered this. {FACTS_MAP[str(fact_id)]}. " f"{(opinion_request_question() if random.random() < ASK_QUESTION_PROB else '')}" ] if len(facts_same_nps) > 0 and not is_sensitive_case: logger.info("Found special nounphrases for facts. Return fact with the same nounphrase.") cands += [choice(facts_same_nps)] confs += [0.5] attrs += [{"type": "nounphrase_fact"}] human_attrs += [{}] bot_attrs += [{}] total_time = time.time() - st_time logger.info(f"dummy_skill exec time: {total_time:.3f}s") asyncio.create_task( callback(task_id=payload["task_id"], response=[cands, confs, human_attrs, bot_attrs, attrs]) ) except Exception as e: logger.exception(e) sentry_sdk.capture_exception(e) asyncio.create_task(callback(task_id=payload["task_id"], response=e))