Ejemplo n.º 1
0
    def get_final_chat_data(self) -> Dict[str, Any]:
        """
        Add relevant fields to the final chat data.
        """

        if self.check_acceptability:
            human_messages, violation_types = self._prepare_acceptability_checking(
            )
            violations_string = self.acceptability_checker.check_messages(
                messages=human_messages,
                is_worker_0=False,
                violation_types=violation_types,
            )
        else:
            violations_string = None

        data = {
            'dialog': self.dialog,
            'workers': [get_mturk_id_from_mephisto_wrapper(self.agent)],
            'bad_workers': [],
            'acceptability_violations': (violations_string, ),
            'hit_ids': [self.agent.mephisto_agent.task_run_id],
            'assignment_ids': [self.agent.mephisto_agent.assignment_id],
            'task_description': {
                'annotations_config': self.opt['annotations_config'],
                'model_1_nickname': self.bots[0].worker_id,
                'model_1_file': self.bots[0].model_agent.opt.get('model_file'),
                'model_1_opt': self.bots[0].model_agent.opt,
                'model_2_nickname': self.bots[1].worker_id,
                'model_2_file': self.bots[1].model_agent.opt.get('model_file'),
                'model_2_opt': self.bots[1].model_agent.opt,
            },
        }
        # 'bad_workers' is for compatibility. Before, it was only non-empty if a
        # worker abandoned, returned, etc. a HIT, but now we don't even save chat
        # data in that case
        if self.check_acceptability:
            data['acceptability_violations'] = (violations_string, )
            # Make a tuple for compatibility with a human/human conversation in
            # which we check both sides for acceptability

        context_data = {
            'personas':
            self.personas,
            'context_dataset':
            self.context_info.get('context_dataset'),
            'person1_seed_utterance':
            self.context_info.get('person1_seed_utterance'),
            'person2_seed_utterance':
            self.context_info.get('person2_seed_utterance'),
            'additional_context':
            self.context_info.get('additional_context'),
        }
        data.update(context_data)
        return data
Ejemplo n.º 2
0
 def __init__(self, opt, agent: "MephistoAgentWrapper"):
     super().__init__(opt, agent)
     self.min_correct = ONBOARD_CONFIG['min_correct']
     self.max_incorrect = ONBOARD_CONFIG['max_incorrect']
     self.onboard_task_data = opt['onboard_task_data']
     self.status = 'DISCONNECT'
     self.onboard_statistics = opt['onboard_statistics']
     self.statistics_condition = opt['statistics_condition']
     self.max_onboard_time = opt['max_onboard_time']
     self.onboarding_qualification = opt['onboarding_qualification']
     self.worker_id = get_mturk_id_from_mephisto_wrapper(self.agent)
Ejemplo n.º 3
0
    def __init__(self, opt, agent: "MephistoAgentWrapper"):
        super().__init__(opt, agent)

        self.skip_onboarding = opt['skip_onboarding']

        self.onboard_task_data = opt['onboard_task_data']
        self.status = 'DISCONNECT'
        self.onboard_statistics = opt['onboard_statistics']
        self.statistics_condition = opt['statistics_condition']
        self.max_onboard_time = opt['max_onboard_time']
        self.onboarding_qualification = opt['onboarding_qualification']
        self.worker_id = get_mturk_id_from_mephisto_wrapper(self.agent)
        self.annotations = None
Ejemplo n.º 4
0
    def get_final_chat_data(self) -> Dict[str, Any]:
        """
        Return specific info about the conversation, the context, acceptability, etc.
        """

        if self.check_acceptability:
            human_messages, violation_types = self._prepare_acceptability_checking(
            )
            violations_string = self.acceptability_checker.check_messages(
                messages=human_messages,
                is_worker_0=False,
                violation_types=violation_types,
            )
        else:
            violations_string = None

        data = {
            'dialog': self.dialog,
            'workers': [get_mturk_id_from_mephisto_wrapper(self.agent)],
            'bad_workers': [],
            'acceptability_violations': (violations_string, ),
            'hit_ids': [self.agent.mephisto_agent.task_run_id],
            'assignment_ids': [self.agent.mephisto_agent.assignment_id],
            'task_description': {
                'annotations_config': self.opt['annotations_config'],
                'model_nickname': self.bot.worker_id,
                'model_file': self.bot.model_agent.opt.get('model_file'),
                'model_opt': self.bot.model_agent.opt,
            },
        }
        # TODO: once the analysis scripts are fully switched over to DataBrowser, remove
        #  the 'workers' and 'assignment_ids' keys, which will now be duplicated in the
        #  returned Unit
        # TODO: 'bad_workers' is for compatibility. Before, it was only non-empty if a
        #  worker abandoned, returned, etc. a HIT, but now we don't even save chat
        #  data in that case. Remove this key once fully once on DataBrowser
        if self.check_acceptability:
            data['acceptability_violations'] = (violations_string, )
            # Make a tuple for compatibility with a human/human conversation in
            # which we check both sides for acceptability

        return data
Ejemplo n.º 5
0
    def get_final_chat_data(self) -> Dict[str, Any]:
        """
        Return specific info about the conversation, the context, acceptability, etc.
        """

        if self.check_acceptability:
            human_texts = [
                message['text'] for message in self.dialog
                if message['agent_idx'] == 0
            ]
            violation_types = [
                'min_words', 'all_caps', 'exact_match', 'safety'
            ]
            if self.opt['conversation_start_mode'] == 'bst':
                # The BST mode starts the conversation with two previous utterances, so
                # there should be no new greeting. Also, the first human response is one
                # of the previous utterances, so it shouldn't get checked.
                violation_types.append('penalize_greetings')
                human_texts = human_texts[1:]

            violations_string = self.acceptability_checker.check_messages(
                messages=human_texts,
                is_worker_0=False,
                violation_types=violation_types)
        else:
            violations_string = None

        data = {
            'personas':
            self.personas,
            'context_dataset':
            self.context_info.get('context_dataset'),
            'person1_seed_utterance':
            self.context_info.get('person1_seed_utterance'),
            'person2_seed_utterance':
            self.context_info.get('person2_seed_utterance'),
            'additional_context':
            self.context_info.get('additional_context'),
            'dialog':
            self.dialog,
            'workers': [get_mturk_id_from_mephisto_wrapper(self.agent)],
            'bad_workers': [],
            'acceptability_violations': (violations_string, ),
            'hit_ids': [self.agent.mephisto_agent.task_run_id],
            'assignment_ids': [self.agent.mephisto_agent.assignment_id],
            'task_description': {
                'annotations_config': self.opt['annotations_config'],
                'model_nickname': self.bot.worker_id,
                'model_file': self.bot.model_agent.opt.get('model_file'),
                'model_opt': self.bot.model_agent.opt,
            },
        }
        # 'bad_workers' is for compatibility. Before, it was only non-empty if a
        # worker abandoned, returned, etc. a HIT, but now we don't even save chat
        # data in that case
        if self.check_acceptability:
            data['acceptability_violations'] = (violations_string, )
            # Make a tuple for compatibility with a human/human conversation in
            # which we check both sides for acceptability

        return data