def utt_non_punct_dialog(dialog: Dict): """ Used by: book_skill """ dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) return [{"dialogs": [dialog]}]
def convert_formatter_dialog(dialog: Dict) -> List[Dict]: # Used by: convert dialog_20 = utils.get_last_n_turns(dialog, bot_last_turns=20) dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") return [{ "utterances_histories": [[utt["text"] for utt in dialog_20["utterances"]]], "personality": [dialog["bot"]["persona"]], "num_ongoing_utt": [ utils.count_ongoing_skill_utterances(dialog["bot_utterances"], "convert_reddit") ], "human_attributes": [dialog["human"]["attributes"]], }]
def alice_formatter_dialog(dialog: Dict) -> List: # Used by: alice dialog = utils.get_last_n_turns(dialog, bot_last_turns=4) dialog = utils.remove_clarification_turns_from_dialog(dialog) return utils.last_n_human_utt_dialog_formatter(dialog, last_n_utts=2, only_last_sentence=True)
def entity_detection_formatter_dialog(dialog: Dict) -> List[Dict]: num_last_utterances = 2 dialog = utils.get_last_n_turns(dialog, bot_last_turns=1) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") context = [[ uttr["text"] for uttr in dialog["utterances"][-num_last_utterances:] ]] return [{"sentences": context}]
def utt_sentseg_punct_dialog(dialog: Dict): """ Used by: skill_with_attributes_formatter; punct_dialogs_formatter, dummy_skill_formatter, base_response_selector_formatter """ dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") return [{"dialogs": [dialog]}]
def dialog_breakdown_formatter(dialog: Dict) -> List[Dict]: # Used by: dialog_breakdown dialog = utils.get_last_n_turns(dialog, bot_last_turns=2) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") context = " ".join([uttr["text"] for uttr in dialog["utterances"][-4:-1]]) return [{ "context": [context], "curr_utterance": [dialog["human_utterances"][-1]["text"]] }]
def cobot_formatter_dialog(dialog: Dict): # Used by: cobot_dialogact_formatter, cobot_classifiers_formatter dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="segments") utterances_histories = [] for utt in dialog["utterances"]: utterances_histories.append(utt["text"]) return [{"utterances_histories": [utterances_histories]}]
def topic_recommendation_formatter(dialog: Dict): dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) active_skills, topics = [], [] for utt in dialog["utterances"]: active_skills.append(utt.get("active_skill", "")) topics += utt.get("annotations", {}).get("cobot_topics", {}).get("text", []) active_skills = [skill for skill in active_skills if skill] return [{"active_skills": [active_skills], "cobot_topics": [topics]}]
def hypotheses_list_for_dialog_breakdown(dialog: Dict) -> List[Dict]: # Used by: dialog_breakdown dialog = utils.get_last_n_turns(dialog, bot_last_turns=2) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") context = " ".join([uttr["text"] for uttr in dialog["utterances"][-3:]]) hyps = {"context": [], "curr_utterance": []} for hyp in dialog["human_utterances"][-1]["hypotheses"]: hyps["context"].append(context) hyps["curr_utterance"].append(hyp["text"]) return [hyps]
def last_utt_and_history_dialog(dialog: Dict) -> List: # Used by: topicalchat retrieval skills dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") sent = dialog["human_utterances"][-1]["annotations"].get( "spelling_preprocessing", dialog["human_utterances"][-1]["text"]) return [{ "sentences": [sent], "utterances_histories": [[utt["text"] for utt in dialog["utterances"]]] }]
def game_cooperative_skill_formatter(dialog: Dict): dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") dialog["human"]["attributes"] = { "game_cooperative_skill": dialog["human"]["attributes"].get("game_cooperative_skill", {}), "used_links": dialog["human"]["attributes"].get("used_links", {}), } return [{"dialogs": [dialog]}]
def sent_rewrite_formatter_w_o_last_dialog(dialog: Dict) -> List[Dict]: dialog = utils.get_last_n_turns(dialog, utils.LAST_N_TURNS + 1) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="segments") utterances_histories = [] annotation_histories = [] for utt in dialog["utterances"][:-1]: annotation_histories.append(deepcopy(utt["annotations"])) utterances_histories.append(utt["text"]) return [{ "utterances_histories": [utterances_histories], "annotation_histories": [annotation_histories] }]
def full_history_dialog(dialog: Dict): """ Used ONLY by: response selector """ all_prev_active_skills = [ uttr.get("active_skill", "") for uttr in dialog["bot_utterances"] ] all_prev_active_skills = [ skill_name for skill_name in all_prev_active_skills if skill_name ][-15:] dialog = utils.get_last_n_turns(dialog, bot_last_turns=10) return [{ "dialogs": [dialog], "all_prev_active_skills": [all_prev_active_skills] }]
def fact_random_formatter_dialog(dialog: Dict): # Used by: fact-random annotator dialog = utils.get_last_n_turns(dialog, bot_last_turns=1) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") last_human_utt = dialog["human_utterances"][-1] entity_info_list = last_human_utt["annotations"].get( "entity_linking", [{}]) entity_substr_list = [] for entity_info in entity_info_list: if "entity_pages" in entity_info and entity_info["entity_pages"]: entity_substr_list.append(entity_info["entity_substr"]) return [{"text": last_human_utt["text"], "entities": entity_substr_list}]
def convers_evaluator_annotator_formatter(dialog: Dict) -> List[Dict]: dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) conv = dict() hypotheses = dialog["human_utterances"][-1]["hypotheses"] conv["hypotheses"] = [h["text"] for h in hypotheses] conv["currentUtterance"] = dialog["utterances"][-1]["text"] # cobot recommends to take 2 last utt for conversation evaluation service conv["pastUtterances"] = [ uttr["text"] for uttr in dialog["human_utterances"] ][-3:-1] conv["pastResponses"] = [ uttr["text"] for uttr in dialog["bot_utterances"] ][-2:] return [conv]
def utt_sentrewrite_modified_last_dialog(dialog: Dict): # Used by: book_skill_formatter; misheard_asr_formatter, cobot_qa_formatter all_prev_active_skills = [ uttr.get("active_skill", "") for uttr in dialog["bot_utterances"] ] all_prev_active_skills = [ skill_name for skill_name in all_prev_active_skills if skill_name ] dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="modified_sents") return [{ "dialogs": [dialog], "all_prev_active_skills": [all_prev_active_skills] }]
def hypothesis_histories_list(dialog: Dict): hypotheses = dialog["human_utterances"][-1]["hypotheses"] dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="segments") utterances_histories_batch = [] for hyp in hypotheses: utterances_histories = [] for utt in dialog["utterances"]: utt_text = utt["text"] if isinstance(utt_text, list): utt_text = " ".join(utt_text) utterances_histories.append(utt_text) # hyp["text"] is a string. We need to pass here list of strings. utterances_histories.append(hyp["text"]) utterances_histories_batch.append(utterances_histories) return [{"utterances_with_histories": utterances_histories_batch}]
def eliza_formatter_dialog(dialog: Dict) -> List[Dict]: # Used by: eliza_formatter dialog = utils.get_last_n_turns(dialog) dialog = utils.remove_clarification_turns_from_dialog(dialog) history = [] prev_human_utterance = None for utt in dialog["utterances"]: if utt["user"]["user_type"] == "human": prev_human_utterance = utt["annotations"].get( "spelling_preprocessing", utt["text"]) elif utt["user"]["user_type"] == "bot" and utt[ "active_skill"] == "eliza" and prev_human_utterance is not None: history.append(prev_human_utterance) last_utterance = dialog["human_utterances"][-1]["annotations"].get( "spelling_preprocessing", dialog["human_utterances"][-1]["text"]) return [{ "last_utterance_batch": [last_utterance], "human_utterance_history_batch": [history], }]
def entity_storer_formatter(dialog: Dict) -> List[Dict]: human_utter_index = len(dialog["human_utterances"]) - 1 attributes = { "entities": dialog.get("human", {}).get("attributes", {}).get("entities", {}) } dialog = utils.get_last_n_turns(dialog, bot_last_turns=1, human_last_turns=2) dialog = utils.replace_with_annotated_utterances(dialog, mode="clean_sent") # rm all execpt human_utterances, bot_utterances # we need only: text, annotations, active_skill new_dialog = utils.clean_up_utterances_to_avoid_unwanted_keys( dialog, types_utterances=["human_utterances", "bot_utterances"]) new_dialog["human"] = {"attributes": attributes} return [{ "human_utter_indexes": [human_utter_index], "dialogs": [new_dialog] }]
def el_formatter_dialog(dialog: Dict): # Used by: entity_linking annotator num_last_utterances = 2 ner_output = get_entities(dialog["human_utterances"][-1], only_named=True, with_labels=True) nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False) entity_substr_list = [] if ner_output: for entity in ner_output: if entity and isinstance( entity, dict ) and "text" in entity and entity["text"].lower() != "alexa": entity_substr_list.append(entity["text"]) entity_substr_lower_list = { entity_substr.lower() for entity_substr in entity_substr_list } dialog = utils.get_last_n_turns(dialog, bot_last_turns=1) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") context = [[ uttr["text"] for uttr in dialog["utterances"][-num_last_utterances:] ]] if nounphrases: entity_substr_list += [ nounphrase for nounphrase in nounphrases if nounphrase.lower() not in entity_substr_lower_list ] entity_substr_list = list(set(entity_substr_list)) return [{ "entity_substr": [entity_substr_list], "template": [""], "context": context }]
def fact_retrieval_formatter_dialog(dialog: Dict): # Used by: odqa annotator dialog = utils.get_last_n_turns(dialog, bot_last_turns=1) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") dialog_history = [ " ".join([uttr["text"] for uttr in dialog["utterances"][-3:]]) ] last_human_utt = dialog["human_utterances"][-1] nounphrases = [ last_human_utt["annotations"].get("cobot_entities", {}).get("entities", []) ] entity_info_list = last_human_utt["annotations"].get( "entity_linking", [{}]) entity_pages_list = [] entity_ids_list = [] entity_substr_list = [] entity_pages_titles_list = [] for entity_info in entity_info_list: if "entity_pages" in entity_info and entity_info["entity_pages"]: entity_pages_list.append(entity_info["entity_pages"]) entity_ids_list.append(entity_info["entity_ids"]) entity_substr_list.append(entity_info["entity_substr"]) entity_pages_titles_list.append(entity_info["entity_pages_titles"]) return [{ "human_sentences": [last_human_utt["text"]], "dialog_history": dialog_history, "nounphrases": nounphrases, "entity_substr": [entity_substr_list], "entity_pages": [entity_pages_list], "entity_ids": [entity_ids_list], "entity_pages_titles": [entity_pages_titles_list], }]
def base_skill_selector_formatter_dialog(dialog: Dict) -> List[Dict]: # Used by: base_skill_selector_formatter dialog = utils.get_last_n_turns(dialog, bot_last_turns=5) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent") return [{"states_batch": [dialog]}]
def utt_sentrewrite_modified_last_dialog_emotion_skill(dialog: Dict): dialog = utils.get_last_n_turns(dialog, bot_last_turns=2) dialog = utils.remove_clarification_turns_from_dialog(dialog) dialog = utils.replace_with_annotated_utterances(dialog, mode="modified_sents") return [{"dialogs": [dialog]}]