def get_final_chat_data(self) -> Dict[str, Any]: """ Add relevant fields to the final chat data. """ if self.check_acceptability: human_messages, violation_types = self._prepare_acceptability_checking( ) violations_string = self.acceptability_checker.check_messages( messages=human_messages, is_worker_0=False, violation_types=violation_types, ) else: violations_string = None data = { 'dialog': self.dialog, 'workers': [get_mturk_id_from_mephisto_wrapper(self.agent)], 'bad_workers': [], 'acceptability_violations': (violations_string, ), 'hit_ids': [self.agent.mephisto_agent.task_run_id], 'assignment_ids': [self.agent.mephisto_agent.assignment_id], 'task_description': { 'annotations_config': self.opt['annotations_config'], 'model_1_nickname': self.bots[0].worker_id, 'model_1_file': self.bots[0].model_agent.opt.get('model_file'), 'model_1_opt': self.bots[0].model_agent.opt, 'model_2_nickname': self.bots[1].worker_id, 'model_2_file': self.bots[1].model_agent.opt.get('model_file'), 'model_2_opt': self.bots[1].model_agent.opt, }, } # 'bad_workers' is for compatibility. Before, it was only non-empty if a # worker abandoned, returned, etc. a HIT, but now we don't even save chat # data in that case if self.check_acceptability: data['acceptability_violations'] = (violations_string, ) # Make a tuple for compatibility with a human/human conversation in # which we check both sides for acceptability context_data = { 'personas': self.personas, 'context_dataset': self.context_info.get('context_dataset'), 'person1_seed_utterance': self.context_info.get('person1_seed_utterance'), 'person2_seed_utterance': self.context_info.get('person2_seed_utterance'), 'additional_context': self.context_info.get('additional_context'), } data.update(context_data) return data
def __init__(self, opt, agent: "MephistoAgentWrapper"): super().__init__(opt, agent) self.min_correct = ONBOARD_CONFIG['min_correct'] self.max_incorrect = ONBOARD_CONFIG['max_incorrect'] self.onboard_task_data = opt['onboard_task_data'] self.status = 'DISCONNECT' self.onboard_statistics = opt['onboard_statistics'] self.statistics_condition = opt['statistics_condition'] self.max_onboard_time = opt['max_onboard_time'] self.onboarding_qualification = opt['onboarding_qualification'] self.worker_id = get_mturk_id_from_mephisto_wrapper(self.agent)
def __init__(self, opt, agent: "MephistoAgentWrapper"): super().__init__(opt, agent) self.skip_onboarding = opt['skip_onboarding'] self.onboard_task_data = opt['onboard_task_data'] self.status = 'DISCONNECT' self.onboard_statistics = opt['onboard_statistics'] self.statistics_condition = opt['statistics_condition'] self.max_onboard_time = opt['max_onboard_time'] self.onboarding_qualification = opt['onboarding_qualification'] self.worker_id = get_mturk_id_from_mephisto_wrapper(self.agent) self.annotations = None
def get_final_chat_data(self) -> Dict[str, Any]: """ Return specific info about the conversation, the context, acceptability, etc. """ if self.check_acceptability: human_messages, violation_types = self._prepare_acceptability_checking( ) violations_string = self.acceptability_checker.check_messages( messages=human_messages, is_worker_0=False, violation_types=violation_types, ) else: violations_string = None data = { 'dialog': self.dialog, 'workers': [get_mturk_id_from_mephisto_wrapper(self.agent)], 'bad_workers': [], 'acceptability_violations': (violations_string, ), 'hit_ids': [self.agent.mephisto_agent.task_run_id], 'assignment_ids': [self.agent.mephisto_agent.assignment_id], 'task_description': { 'annotations_config': self.opt['annotations_config'], 'model_nickname': self.bot.worker_id, 'model_file': self.bot.model_agent.opt.get('model_file'), 'model_opt': self.bot.model_agent.opt, }, } # TODO: once the analysis scripts are fully switched over to DataBrowser, remove # the 'workers' and 'assignment_ids' keys, which will now be duplicated in the # returned Unit # TODO: 'bad_workers' is for compatibility. Before, it was only non-empty if a # worker abandoned, returned, etc. a HIT, but now we don't even save chat # data in that case. Remove this key once fully once on DataBrowser if self.check_acceptability: data['acceptability_violations'] = (violations_string, ) # Make a tuple for compatibility with a human/human conversation in # which we check both sides for acceptability return data
def get_final_chat_data(self) -> Dict[str, Any]: """ Return specific info about the conversation, the context, acceptability, etc. """ if self.check_acceptability: human_texts = [ message['text'] for message in self.dialog if message['agent_idx'] == 0 ] violation_types = [ 'min_words', 'all_caps', 'exact_match', 'safety' ] if self.opt['conversation_start_mode'] == 'bst': # The BST mode starts the conversation with two previous utterances, so # there should be no new greeting. Also, the first human response is one # of the previous utterances, so it shouldn't get checked. violation_types.append('penalize_greetings') human_texts = human_texts[1:] violations_string = self.acceptability_checker.check_messages( messages=human_texts, is_worker_0=False, violation_types=violation_types) else: violations_string = None data = { 'personas': self.personas, 'context_dataset': self.context_info.get('context_dataset'), 'person1_seed_utterance': self.context_info.get('person1_seed_utterance'), 'person2_seed_utterance': self.context_info.get('person2_seed_utterance'), 'additional_context': self.context_info.get('additional_context'), 'dialog': self.dialog, 'workers': [get_mturk_id_from_mephisto_wrapper(self.agent)], 'bad_workers': [], 'acceptability_violations': (violations_string, ), 'hit_ids': [self.agent.mephisto_agent.task_run_id], 'assignment_ids': [self.agent.mephisto_agent.assignment_id], 'task_description': { 'annotations_config': self.opt['annotations_config'], 'model_nickname': self.bot.worker_id, 'model_file': self.bot.model_agent.opt.get('model_file'), 'model_opt': self.bot.model_agent.opt, }, } # 'bad_workers' is for compatibility. Before, it was only non-empty if a # worker abandoned, returned, etc. a HIT, but now we don't even save chat # data in that case if self.check_acceptability: data['acceptability_violations'] = (violations_string, ) # Make a tuple for compatibility with a human/human conversation in # which we check both sides for acceptability return data