예제 #1
0
파일: raw_data.py 프로젝트: chorseng/MAGIC
    def _get_utter_from_dict(vocab: Dict[str, int], image_url_id: Dict[str,
                                                                       int],
                             utter_dict: dict) -> Utterance:
        """Extract Utterance object from JSON dict.

        Args:
            vocab (Dict[str, int]): Vocabulary.
            image_url_id (Dict[str, int]): Image URL to index.
            utter_dict (dict): JSON dict.

        Returns:
            Utterance: Extracted Utterance.

        """
        utter = utter_dict.get('utterance')

        _speaker: str = utter_dict.get('speaker')
        _utter_type: str = utter_dict.get('type')
        _text: str = utter.get('nlg')
        _pos_images: List[str] = utter.get('images')
        _neg_images: List[str] = utter.get('false images')

        # Some attributes may be empty.
        if _text is None:
            _text = ""
        if _utter_type is None:
            _utter_type = ""
        if _pos_images is None:
            _pos_images = []
        if _neg_images is None:
            _neg_images = []

        # Convert speaker into an integer.
        speaker: int = -1
        if _speaker == 'user':
            speaker = USER_SPEAKER
        elif _speaker == 'system':
            speaker = SYS_SPEAKER
        assert speaker != -1

        # Convert utterance type into an integer.
        utter_type: int = DatasetConfig.utterance_type_dict.get(_utter_type, 0)
        # We don't care the type of system response.
        if speaker == SYS_SPEAKER:
            utter_type = 0

        # Convert text into a list of integers.
        words: List[str] = word_tokenize(_text)
        text: List[int] = [vocab.get(word.lower(), UNK_ID) for word in words]

        # Images
        pos_images: List[int] = [
            image_url_id.get(img, 0) for img in _pos_images
        ]
        neg_images: List[int] = [
            image_url_id.get(img, 0) for img in _neg_images
        ]

        utter = Utterance(speaker, utter_type, text, pos_images, neg_images)
        return utter
예제 #2
0
def get_knowledge_items(dialog: Dialog, ordinal_number: Dict[int, int],
                        task: int) -> List[TidyDialog]:
    """Get items for knowledge task from a single dialog.

    Args:
        dialog (Dialog): Dialog.
        ordinal_number (Dict[int, int]): Ordinal numbers.
        task (int): Task.

    Returns:
        List[TidyDialog]: Extracted tidy dialogs.

    """
    expected_utter_types = {}
    if task == KNOWLEDGE_STYLETIP_SUBTASK:
        expected_utter_types = DatasetConfig.utterance_knowledge_styletip_types
    elif task == KNOWLEDGE_ATTRIBUTE_SUBTASK:
        expected_utter_types = DatasetConfig.utterance_knowledge_attribute_types
    elif task == KNOWLEDGE_CELEBRITY_SUBTASK:
        expected_utter_types = DatasetConfig.utterance_knowledge_celebrity_types

    dialogs: List[TidyDialog] = []
    utterances = get_init_pad_utters()
    context_size = DatasetConfig.dialog_context_size
    utter_type = None
    has_shown = False
    products = []
    selected_products = []
    for utter in dialog:
        pos_images = [image for image in utter.pos_images if image > 0]

        if utter.speaker == USER_SPEAKER:
            utterances.append(TidyUtterance(utter))
            selected_products = get_products(ordinal_number, utter.text,
                                             products)
            utter_type = utter.utter_type
        elif utter.speaker == SYS_SPEAKER:
            desc = task == KNOWLEDGE_ATTRIBUTE_SUBTASK and has_shown and \
                   utter_type in DatasetConfig.utterance_recommend_types and \
                   len(utter.text) > 10
            if utter_type in expected_utter_types or desc:
                if desc:
                    selected_products = get_products(ordinal_number,
                                                     utter.text, products)
                utterances = utterances[-context_size:]
                text = copy.deepcopy(utter.text)
                special_utter = Utterance(utter.speaker, utter.utter_type,
                                          text, selected_products, [])
                special_utter = TidyUtterance(special_utter)
                dialogs.append(copy.deepcopy(utterances + [special_utter]))
                utter_type = None
            utterances.append(TidyUtterance(utter))
            has_shown = False
            if pos_images:
                products = pos_images
                if utter_type in DatasetConfig.utterance_recommend_types:
                    has_shown = True
    return dialogs
예제 #3
0
def get_init_pad_utters() -> List[TidyUtterance]:
    """Get initial padding utterances.
    Returns:
        List[TidyUtterance]
    """
    utters: List[TidyUtterance] = []
    for i in range(DatasetConfig.dialog_context_size):
        if (DatasetConfig.dialog_context_size - i - 1) % 2 == 0:
            speaker = SYS_SPEAKER
        else:
            speaker = USER_SPEAKER
        utter = Utterance(speaker, -1, [], [])
        utters.append(TidyUtterance(utter))
    return utters
예제 #4
0
def standardized_dialog(dialog: Dialog) -> Dialog:
    """Standardized raw dialog.
    Args:
        dialog (Dialog): Raw dialog.
    Returns:
        Dialog: Standard dialog.
    """
    std_dialog: Dialog = []
    for utter in dialog:
        if not std_dialog and utter.speaker != USER_SPEAKER:
            std_dialog.append(Utterance(USER_SPEAKER, -1, [], [], []))
        if not std_dialog or utter.speaker != std_dialog[-1].speaker:
            std_dialog.append(utter)
        else:
            std_dialog[-1].utter_type = utter.utter_type
            std_dialog[-1].text += utter.text
            std_dialog[-1].pos_images += utter.pos_images
            #std_dialog[-1].neg_images += utter.neg_images
    return std_dialog
예제 #5
0
    def _get_utter_from_dict(vocab: Dict[str, int],
                             obj_id: Dict[str, int],
                             utter_dict: dict,
                             utter_coref: dict,
                             speaker: str) -> Utterance:
        """Extract Utterance object from JSON dict.
        Args:
            vocab (Dict[str, int]): Vocabulary.
            obj_id (Dict[str, int]): Obj_ID to index.
            utter_dict (dict): JSON dict.
            utter_coref (dict): JSON dict.
        Returns:
            Utterance: Extracted Utterance.
        """
        
        if speaker == 'sys':
            _speaker: str = 'system'
            _utter_type: str = (ast.literal_eval(utter_dict.get('system_transcript_annotated')))[0]['intent'].split(':')[0]
            _text: str = utter_dict['system_transcript']
        if speaker == 'user':
            _speaker: str = 'user'
            _utter_type: str = (ast.literal_eval(utter_dict.get('system_transcript_annotated')))[0]['intent'].split(':')[0]
            _text: str = utter_dict['transcript']
        object_list = []
        for state in utter_dict.get('belief_state'):
            for sub_state in state['slots']:
                for sub_sub_state in sub_state:
                    if "OBJECT" in sub_sub_state:
                        object_list.append(int(sub_sub_state[-1]))
        _pos_obj: List[str] = [] 
        for product in object_list:
             for key in utter_dict:
                 if utter_dict[key] == product:                
                     _pos_obj.append(key) 
        #_pos_obj: List[str] = list(utter_coref.keys())
        #_neg_obj: List[str] = []

        # Some attributes may be empty.
        if _text is None:
            _text = ""
        if _utter_type is None:
            _utter_type = ""
        if _pos_obj is None:
            _pos_obj = []
        #if _neg_obj is None:
        #    _neg_obj = []

        # Convert speaker into an integer.
        speaker: int = -1
        if _speaker == 'user':
            speaker = USER_SPEAKER
        elif _speaker == 'system':
            speaker = SYS_SPEAKER
        assert speaker != -1

        # Convert utterance type into an integer.
        utter_type: int = DatasetConfig.utterance_type_dict.get(_utter_type, 0)
        # We don't care the type of system response.
        if speaker == SYS_SPEAKER:
            utter_type = 0

        # Convert text into a list of integers.
        words: List[str] = word_tokenize(_text)
        text: List[int] = [vocab.get(word.lower(), UNK_ID) for word in words]

        # Images
        pos_obj: List[str] = _pos_obj
        #neg_obj: List[int] = [obj_id.get(img, 0)
        #                         for img in _neg_obj]

        utter = Utterance(speaker, utter_type, text, pos_obj) #, neg_obj)
        return utter