예제 #1
0
    def _set_character_vec(self, observation: Message) -> Message:
        """
        Tokenize the model's character.

        :param observation:
            observation with the character text

        :return observation:
            return observation with the character tokenized.
        """
        if self.opt[
                'expanded_attention_self_character_key'] not in observation:
            return observation
        character = observation[
            self.opt['expanded_attention_self_character_key']]
        if self.classifier_expanded_attn:
            assert isinstance(self.classifier_dict, DictionaryAgent)
            assert self.classifier_dict is not None
            dictionary = self.classifier_dict
        else:
            dictionary = self.dict
        if '_self_name' in character:
            # We need to extract from the context
            character = extract_characters(character)['_self_name']
        observation['character_vec'] = dictionary.txt2vec(character)
        return observation
예제 #2
0
    def _predict_character_from_context(self, context: str,
                                        characters: Dict[str, str],
                                        who: str) -> str:
        """
        Given context, predict who the character is.

        :param context:
            dialogue context
        :param characters:
            available characters to choose from
        :param who:
            whether to predict self or partner

        :return whoareyou:
            return predicted self character
        """
        assert (
            not self.annotate_speaker
        ), "if annotate speaker, characters would be in dialogue history"
        control_token = WHO_ARE_YOU if who == 'self' else WHO_AM_I
        utterances = self.get_utterances_from_full_context(
            context, include_context=self.include_context)

        if self.num_utterances > 0:
            utterances = utterances[:self.num_utterances]

        utterances.insert(-2, control_token)
        limited_context = self.delimiter.join(utterances)
        label_candidates = extract_characters(context)
        act = self.predict(limited_context,
                           predictor_label_candidates=label_candidates)
        return act['text']
예제 #3
0
    def augment_context(self,
                        full_context: str,
                        candidate: str,
                        include_context: Optional[bool] = True) -> List[str]:
        """
        Given context and candidate, augment the context for predicting whoami.

        :param full_context:
            dialogue context:
        :param candidate:
            candidate response
        :param include_context:
            whether to include the context strings

        :return augmented_context:
            return the augmented context.
        """
        utterances = self.get_utterances_from_full_context(
            full_context, include_context=self.include_context)

        if self.num_utterances > 0:
            utterances = utterances[:self.num_utterances]

        characters = extract_characters(full_context)
        self_character = characters.get('_self_name', None)
        partner_character = characters.get('_partner_name', None)
        if not self_character:
            self_character = self._predict_character_from_context(full_context,
                                                                  characters,
                                                                  who='self')
        if not partner_character:
            partner_character = self._predict_character_from_context(
                full_context, characters, who='partner')

        light_context = [l for l in utterances if self.is_context(l)]
        dialogue = [l for l in utterances if not self.is_context(l)]
        reversed_dialogue = []
        for i, utt in enumerate(reversed(dialogue)):
            if i % 2 == 0:
                # Even: partner spoke
                reversed_dialogue.append((partner_character, utt))
            else:
                # Odd: you spoke
                reversed_dialogue.append((self_character, utt))
        processed_dialogue = [
            maybe_annotate(
                *d,
                self.annotate_speaker,  # type: ignore
                self.speaker_separator,  # type: ignore
                self.speaker_annotation_position,  # type: ignore
            ) for d in reversed(reversed_dialogue)
        ]
        if include_context:
            utterances = light_context + processed_dialogue
        else:
            utterances = processed_dialogue

        utterances += (WHO_AM_I, candidate)
        return utterances
예제 #4
0
    def get_class_to_rerank_for(cls, observation: Message,
                                full_context: str) -> Optional[str]:
        """
        The class from the predictor (classifier) that we want to rerank for.

        For LIGHT, this is the _self_name character.
        """
        characters = extract_characters(full_context)
        self_character = characters.get('_self_name', None)
        return self_character
예제 #5
0
 def _build_candidates(cls, episodes: List[Message]) -> List[str]:
     """
     Build up set of candidates (i.e., characters) from the data.
     """
     candidates = set()
     for ep in episodes:
         character_mapping = extract_characters(ep[0]['text'])
         for c in character_mapping.values():
             candidates.add(c)
     return list(candidates)
예제 #6
0
 def set_batch_context(
     self: TSType, batch_context_list: List[List[int]], batch_idx: int
 ) -> TSType:
     """
     Override to save de-tokenized version of context.
     """
     self.context = batch_context_list[batch_idx]
     self.context_str = self.agent._v2t(self.context)
     self.character = extract_characters(self.context_str)['_self_name']
     return self
예제 #7
0
    def _explode_episode(
        cls,
        episode: List[Message],
        exclude_from_context: List[str],
        use_speech_prefix: bool,
    ) -> Tuple[str, Dict[str, str], List[Tuple[str, str, str]]]:
        """
        Extract context, characters, and list of utterances from an episode.

        Additionally return initial start and end indices to use when constructing
        new eps.

        :param episode:
            list of examples
        :param exclude_from_context:
            list of context keys to exclude from the light context
        :param use_speech_prefix:
            if true, prepend label text with speech prefix.

        :return (context, characters, utterances):
            context: string context
            characters: dict mapping char key to character
            utterances: list of tuples (speaker_name, utterance, listener_name)
        """
        utterances = []
        context = episode[0]['text']
        characters = extract_characters(context)
        me = characters['_self_name']
        you = characters['_partner_name']
        if not context.split('\n')[-1].startswith('_'):
            # begin conversation with partner
            utterances.append((you, context.split('\n')[-1], me))
            context = '\n'.join(context.split('\n')[:-1])

        if exclude_from_context:
            context = '\n'.join([
                c for c in context.split('\n')
                if not any(c.startswith(x) for x in exclude_from_context)
            ])

        for i, ex in enumerate(episode):
            if i != 0:
                # skip context
                utterances.append((you, ex['text'], me))
            prefix = '_self_say ' if use_speech_prefix else ''
            utterances.append((me, f"{prefix}{ex['labels'][0]}", you))

        return context, characters, utterances
예제 #8
0
    def get_predictor_label_candidates(cls, observation: Message,
                                       context: str) -> List[str]:
        """
        Get the list of possible predictor classes.

        In this case, it's not static b/c the characters in each conversation are
        different.
        """
        characters = extract_characters(context)
        self_character = characters.get('_self_name', None)
        partner_character = characters.get('_partner_name', None)
        label_candidates = []
        if self_character is not None:
            label_candidates.append(self_character)
        if partner_character is not None:
            label_candidates.append(partner_character)
        return label_candidates
예제 #9
0
 def _setup_data(self, path):
     super()._setup_data(path)
     logging.info('Building Candidates')
     self.candidates = SpeakerClassifierTeacher._build_candidates(
         self.episodes)
     logging.info('Setting up character labels')
     for ep in self.episodes:
         context_str = ep[0]['text']
         for ex in ep:
             n_cands = self.opt['num_train_inline_candidates']
             if n_cands > 0 and DatatypeHelper.is_training(self.datatype):
                 speaker, listener = extract_characters(
                     context_str).values()
                 label_cands = [speaker, listener]
                 while speaker in label_cands and listener in label_cands:
                     label_cands = random.sample(self.candidates,
                                                 n_cands - 2)
                 label_cands += [speaker, listener]
                 random.shuffle(label_cands)
             else:
                 label_cands = self.candidates
             ex.force_set('character_candidates', label_cands)
예제 #10
0
    def custom_evaluation(
        self,
        teacher_action: Message,
        labels: Optional[Tuple[str]],
        model_response: Message,
    ) -> None:
        """
        Compute RPA for a model response.

        :param teacher_action:
            The message last sent from this teacher.
        :param labels:
            The previous correct labels
        :param model_response:
            The raw response from the model
        """
        if not model_response or not model_response.get('text'):
            return
        self.context.append(teacher_action['text'])
        context = self.delimiter.join(self.context)
        characters = extract_characters(context)
        correct_character = characters['_self_name']
        model_text = model_response['text']
        classifier_act = self.classifier.classify(context, model_text)
        predicted_character = classifier_act['text']
        correct_prediction = int(predicted_character == correct_character)
        self.metrics.add('character_accuracy',
                         AverageMetric(correct_prediction))
        scores = F.softmax(classifier_act['sorted_scores'].float(), dim=0)
        if teacher_action['episode_done']:
            self.context = []
        else:
            assert labels
            self.context.append(labels[0])

        return predicted_character == correct_character
예제 #11
0
 def episode_mutation(self, episode: List[Message]) -> List[Message]:
     characters = extract_characters(episode[0]['text'])
     for ep in episode:
         ep.force_set('self_character', characters['_self_name'])
     return episode