def _get_utter_from_dict(vocab: Dict[str, int], image_url_id: Dict[str, int], utter_dict: dict) -> Utterance: """Extract Utterance object from JSON dict. Args: vocab (Dict[str, int]): Vocabulary. image_url_id (Dict[str, int]): Image URL to index. utter_dict (dict): JSON dict. Returns: Utterance: Extracted Utterance. """ utter = utter_dict.get('utterance') _speaker: str = utter_dict.get('speaker') _utter_type: str = utter_dict.get('type') _text: str = utter.get('nlg') _pos_images: List[str] = utter.get('images') _neg_images: List[str] = utter.get('false images') # Some attributes may be empty. if _text is None: _text = "" if _utter_type is None: _utter_type = "" if _pos_images is None: _pos_images = [] if _neg_images is None: _neg_images = [] # Convert speaker into an integer. speaker: int = -1 if _speaker == 'user': speaker = USER_SPEAKER elif _speaker == 'system': speaker = SYS_SPEAKER assert speaker != -1 # Convert utterance type into an integer. utter_type: int = DatasetConfig.utterance_type_dict.get(_utter_type, 0) # We don't care the type of system response. if speaker == SYS_SPEAKER: utter_type = 0 # Convert text into a list of integers. words: List[str] = word_tokenize(_text) text: List[int] = [vocab.get(word.lower(), UNK_ID) for word in words] # Images pos_images: List[int] = [ image_url_id.get(img, 0) for img in _pos_images ] neg_images: List[int] = [ image_url_id.get(img, 0) for img in _neg_images ] utter = Utterance(speaker, utter_type, text, pos_images, neg_images) return utter
def get_knowledge_items(dialog: Dialog, ordinal_number: Dict[int, int], task: int) -> List[TidyDialog]: """Get items for knowledge task from a single dialog. Args: dialog (Dialog): Dialog. ordinal_number (Dict[int, int]): Ordinal numbers. task (int): Task. Returns: List[TidyDialog]: Extracted tidy dialogs. """ expected_utter_types = {} if task == KNOWLEDGE_STYLETIP_SUBTASK: expected_utter_types = DatasetConfig.utterance_knowledge_styletip_types elif task == KNOWLEDGE_ATTRIBUTE_SUBTASK: expected_utter_types = DatasetConfig.utterance_knowledge_attribute_types elif task == KNOWLEDGE_CELEBRITY_SUBTASK: expected_utter_types = DatasetConfig.utterance_knowledge_celebrity_types dialogs: List[TidyDialog] = [] utterances = get_init_pad_utters() context_size = DatasetConfig.dialog_context_size utter_type = None has_shown = False products = [] selected_products = [] for utter in dialog: pos_images = [image for image in utter.pos_images if image > 0] if utter.speaker == USER_SPEAKER: utterances.append(TidyUtterance(utter)) selected_products = get_products(ordinal_number, utter.text, products) utter_type = utter.utter_type elif utter.speaker == SYS_SPEAKER: desc = task == KNOWLEDGE_ATTRIBUTE_SUBTASK and has_shown and \ utter_type in DatasetConfig.utterance_recommend_types and \ len(utter.text) > 10 if utter_type in expected_utter_types or desc: if desc: selected_products = get_products(ordinal_number, utter.text, products) utterances = utterances[-context_size:] text = copy.deepcopy(utter.text) special_utter = Utterance(utter.speaker, utter.utter_type, text, selected_products, []) special_utter = TidyUtterance(special_utter) dialogs.append(copy.deepcopy(utterances + [special_utter])) utter_type = None utterances.append(TidyUtterance(utter)) has_shown = False if pos_images: products = pos_images if utter_type in DatasetConfig.utterance_recommend_types: has_shown = True return dialogs
def get_init_pad_utters() -> List[TidyUtterance]: """Get initial padding utterances. Returns: List[TidyUtterance] """ utters: List[TidyUtterance] = [] for i in range(DatasetConfig.dialog_context_size): if (DatasetConfig.dialog_context_size - i - 1) % 2 == 0: speaker = SYS_SPEAKER else: speaker = USER_SPEAKER utter = Utterance(speaker, -1, [], []) utters.append(TidyUtterance(utter)) return utters
def standardized_dialog(dialog: Dialog) -> Dialog: """Standardized raw dialog. Args: dialog (Dialog): Raw dialog. Returns: Dialog: Standard dialog. """ std_dialog: Dialog = [] for utter in dialog: if not std_dialog and utter.speaker != USER_SPEAKER: std_dialog.append(Utterance(USER_SPEAKER, -1, [], [], [])) if not std_dialog or utter.speaker != std_dialog[-1].speaker: std_dialog.append(utter) else: std_dialog[-1].utter_type = utter.utter_type std_dialog[-1].text += utter.text std_dialog[-1].pos_images += utter.pos_images #std_dialog[-1].neg_images += utter.neg_images return std_dialog
def _get_utter_from_dict(vocab: Dict[str, int], obj_id: Dict[str, int], utter_dict: dict, utter_coref: dict, speaker: str) -> Utterance: """Extract Utterance object from JSON dict. Args: vocab (Dict[str, int]): Vocabulary. obj_id (Dict[str, int]): Obj_ID to index. utter_dict (dict): JSON dict. utter_coref (dict): JSON dict. Returns: Utterance: Extracted Utterance. """ if speaker == 'sys': _speaker: str = 'system' _utter_type: str = (ast.literal_eval(utter_dict.get('system_transcript_annotated')))[0]['intent'].split(':')[0] _text: str = utter_dict['system_transcript'] if speaker == 'user': _speaker: str = 'user' _utter_type: str = (ast.literal_eval(utter_dict.get('system_transcript_annotated')))[0]['intent'].split(':')[0] _text: str = utter_dict['transcript'] object_list = [] for state in utter_dict.get('belief_state'): for sub_state in state['slots']: for sub_sub_state in sub_state: if "OBJECT" in sub_sub_state: object_list.append(int(sub_sub_state[-1])) _pos_obj: List[str] = [] for product in object_list: for key in utter_dict: if utter_dict[key] == product: _pos_obj.append(key) #_pos_obj: List[str] = list(utter_coref.keys()) #_neg_obj: List[str] = [] # Some attributes may be empty. if _text is None: _text = "" if _utter_type is None: _utter_type = "" if _pos_obj is None: _pos_obj = [] #if _neg_obj is None: # _neg_obj = [] # Convert speaker into an integer. speaker: int = -1 if _speaker == 'user': speaker = USER_SPEAKER elif _speaker == 'system': speaker = SYS_SPEAKER assert speaker != -1 # Convert utterance type into an integer. utter_type: int = DatasetConfig.utterance_type_dict.get(_utter_type, 0) # We don't care the type of system response. if speaker == SYS_SPEAKER: utter_type = 0 # Convert text into a list of integers. words: List[str] = word_tokenize(_text) text: List[int] = [vocab.get(word.lower(), UNK_ID) for word in words] # Images pos_obj: List[str] = _pos_obj #neg_obj: List[int] = [obj_id.get(img, 0) # for img in _neg_obj] utter = Utterance(speaker, utter_type, text, pos_obj) #, neg_obj) return utter