def setUp(self): self.work_state_1 = WorkerState(TEST_WORKER_ID_1, 10) self.work_state_2 = WorkerState(TEST_WORKER_ID_2) argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() self.opt = argparser.parse_args([]) self.opt['task'] = 'unittest' self.opt['assignment_duration_in_seconds'] = 6 mturk_agent_ids = ['mturk_agent_1'] self.mturk_manager = MTurkManager(opt=self.opt, mturk_agent_ids=mturk_agent_ids) self.worker_manager = WorkerManager(self.mturk_manager, self.opt)
def setUp(self): self.agent_state1 = AssignState() self.agent_state2 = AssignState(status=AssignState.STATUS_IN_TASK) argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() self.opt = argparser.parse_args(print_args=False) self.opt['task'] = 'unittest' self.opt['assignment_duration_in_seconds'] = 6 mturk_agent_ids = ['mturk_agent_1'] self.mturk_manager = MTurkManager(opt=self.opt, mturk_agent_ids=mturk_agent_ids) self.worker_manager = self.mturk_manager.worker_manager
def setUp(self): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() self.opt = argparser.parse_args(print_args=False) self.opt['task'] = 'unittest' self.opt['assignment_duration_in_seconds'] = 6 mturk_agent_ids = ['mturk_agent_1'] self.mturk_manager = MTurkManager(opt=self.opt.copy(), mturk_agent_ids=mturk_agent_ids) self.worker_manager = self.mturk_manager.worker_manager self.turk_agent = MTurkAgent(self.opt.copy(), self.mturk_manager, TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_1, TEST_WORKER_ID_1)
def __init__(self, port=DEFAULT_PORT, db_file=DEFAULT_DB_FILE, is_sandbox=False): self.state = {'is_sandbox': is_sandbox} self.subs = {} self.sources = {} self.port = port self.data_handler = MTurkDataHandler(file_name=db_file) self.mturk_manager = MTurkManager.make_taskless_instance(is_sandbox) self.mturk_manager.db_logger = self.data_handler # TODO load some state from DB handlers = [ (r"/app/(.*)", AppHandler, {'app': self}), (r"/tasks", TaskListHandler, {'app': self}), (r"/workers", WorkerListHandler, {'app': self}), (r"/runs/(.*)", RunHandler, {'app': self}), (r"/workers/(.*)", WorkerHandler, {'app': self}), (r"/assignments/(.*)", AssignmentHandler, {'app': self}), (r"/approve/(.*)", ApprovalHandler, {'app': self}), (r"/reject/(.*)", RejectionHandler, {'app': self}), (r"/reverse_rejection/(.*)", ReverseHandler, {'app': self}), (r"/block/(.*)", BlockHandler, {'app': self}), (r"/bonus/(.*)", BonusHandler, {'app': self}), (r"/error/(.*)", ErrorHandler, {'app': self}), (r"/socket", SocketHandler, {'app': self}), (r"/", RedirectHandler), ] super(Application, self).__init__(handlers, **tornado_settings)
def setUp(self): disconnect_path = os.path.join(parent_dir, 'disconnect-test.pickle') if os.path.exists(disconnect_path): os.remove(disconnect_path) argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() self.opt = argparser.parse_args([]) self.opt['task'] = 'unittest' self.opt['assignment_duration_in_seconds'] = 6 mturk_agent_ids = ['mturk_agent_1'] self.mturk_manager = MTurkManager( opt=self.opt.copy(), mturk_agent_ids=mturk_agent_ids ) self.worker_manager = self.mturk_manager.worker_manager self.worker_state_1 = self.worker_manager.worker_alive(TEST_WORKER_ID_1) self.worker_state_2 = self.worker_manager.worker_alive(TEST_WORKER_ID_2) self.worker_state_3 = self.worker_manager.worker_alive(TEST_WORKER_ID_3)
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_1_id = 'mturk_agent_1' mturk_agent_2_id = 'mturk_agent_2' human_agent_1_id = 'human_1' mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = MTurkMultiAgentDialogOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function( onboard_function=run_onboard ) # Set onboard_function to None to skip onboarding mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True global worker_count worker_count = 0 def get_worker_role(worker): global worker_count worker_role = mturk_agent_ids[worker_count % len(mturk_agent_ids)] worker_count += 1 return worker_role def run_conversation(mturk_manager, opt, workers): # Create mturk agents mturk_agent_1 = workers[0] mturk_agent_2 = workers[1] # Create the local human agents human_agent_1 = LocalHumanAgent(opt=None) human_agent_1.id = human_agent_1_id world = MTurkMultiAgentDialogWorld( opt=opt, agents=[human_agent_1, mturk_agent_1, mturk_agent_2]) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task(eligibility_function=check_worker_eligibility, role_function=get_worker_role, task_function=run_conversation) except: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): completed_workers = [] argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_id = 'Worker' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=[mturk_agent_id] ) mturk_manager.setup_server() qual_name = 'ParlAIExcludeQual{}t{}'.format( random.randint(10000, 99999), random.randint(10000, 99999)) qual_desc = ( 'Qualification for a worker not correctly completing the ' 'first iteration of a task. Used to filter to different task pools.' ) qualification_id = \ mturk_utils.find_or_create_qualification(qual_name, qual_desc) print('Created qualification: ', qualification_id) def run_onboard(worker): world = QualificationFlowOnboardWorld(opt, worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) try: mturk_manager.start_new_run() agent_qualifications = [{ 'QualificationTypeId': qualification_id, 'Comparator': 'DoesNotExist', 'RequiredToPreview': True }] mturk_manager.create_hits(qualifications=agent_qualifications) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): mturk_agent = workers[0] world = QualificationFlowSoloWorld( opt=opt, mturk_agent=mturk_agent, qualification_id=qualification_id, firstTime=(mturk_agent.worker_id not in completed_workers), ) while not world.episode_done(): world.parley() completed_workers.append(mturk_agent.worker_id) world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_utils.delete_qualification(qualification_id) mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # Initialize a SQuAD teacher agent, which we will get context from module_name = 'parlai.tasks.squad.agents' class_name = 'DefaultTeacher' my_module = importlib.import_module(module_name) task_class = getattr(my_module, class_name) task_opt = {} task_opt['datatype'] = 'train' task_opt['datapath'] = opt['datapath'] mturk_agent_id = 'Worker' mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id]) mturk_manager.setup_server() def run_onboard(worker): world = QADataCollectionOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=None) try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.ready_to_accept_workers() def check_workers_eligibility(workers): return workers eligibility_function = { 'func': check_workers_eligibility, 'multiple': True, } def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): task = task_class(task_opt) mturk_agent = workers[0] world = QADataCollectionWorld(opt=opt, task=task, mturk_agent=mturk_agent) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.start_task(eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=5, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument( '-mx_rsp_time', '--max_resp_time', default=150, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '--ag_shutdown_time', default=120, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat', ) opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) opt['extract_personas_path'] = os.path.join(opt['datapath'], opt['task']) opt.update(task_config) mturk_agent_ids = ['PERSON_1'] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) mturk_manager.setup_server(task_directory_path=directory_path) personas_generator = PersonasGenerator(opt) opt['personas_generator'] = personas_generator try: mturk_manager.start_new_run() mturk_manager.create_hits() if not opt['is_sandbox']: # ADD BLOCKED WORKERS HERE blocked_worker_list = [] for w in blocked_worker_list: mturk_manager.block_worker( w, 'We found that you have unexpected behaviors in our ' 'previous HITs. For more questions please email us.', ) def run_onboard(worker): pass mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): worker = workers[0] world = RephrasePersonaWorld(opt, worker) while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
class TestMTurkManagerWorkflows(unittest.TestCase): """ Various test cases to replicate a whole mturk workflow. """ def setUp(self): patcher = mock.patch('builtins.input', return_value='y') self.addCleanup(patcher.stop) patcher.start() # Mock functions that hit external APIs and such self.server_utils = MTurkManagerFile.server_utils self.mturk_utils = MTurkManagerFile.mturk_utils self.server_utils.setup_server = mock.MagicMock( return_value='https://127.0.0.1') self.server_utils.setup_legacy_server = mock.MagicMock( return_value='https://127.0.0.1') self.server_utils.delete_server = mock.MagicMock() self.mturk_utils.setup_aws_credentials = mock.MagicMock() self.mturk_utils.calculate_mturk_cost = mock.MagicMock(return_value=1) self.mturk_utils.check_mturk_balance = mock.MagicMock( return_value=True) self.mturk_utils.create_hit_config = mock.MagicMock() self.mturk_utils.setup_sns_topic = mock.MagicMock( return_value=TOPIC_ARN) self.mturk_utils.delete_sns_topic = mock.MagicMock() self.mturk_utils.delete_qualification = mock.MagicMock() self.mturk_utils.find_or_create_qualification = mock.MagicMock( return_value=QUALIFICATION_ID) self.mturk_utils.find_qualification = mock.MagicMock( return_value=QUALIFICATION_ID) self.mturk_utils.give_worker_qualification = mock.MagicMock() self.mturk_utils.remove_worker_qualification = mock.MagicMock() self.mturk_utils.create_hit_type = mock.MagicMock( return_value=HIT_TYPE_ID) self.mturk_utils.subscribe_to_hits = mock.MagicMock() self.mturk_utils.create_hit_with_hit_type = mock.MagicMock( return_value=(MTURK_PAGE_URL, FAKE_HIT_ID, 'MTURK_HIT_DATA')) self.mturk_utils.get_mturk_client = mock.MagicMock( return_value=mock.MagicMock()) self.onboarding_agents = {} self.worlds_agents = {} # Set up an MTurk Manager and get it ready for accepting workers self.fake_socket = MockSocket() time.sleep(0.1) argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() self.opt = argparser.parse_args() self.opt['task'] = 'unittest' self.opt['assignment_duration_in_seconds'] = 1 self.opt['hit_title'] = 'test_hit_title' self.opt['hit_description'] = 'test_hit_description' self.opt['task_description'] = 'test_task_description' self.opt['hit_keywords'] = 'test_hit_keywords' self.opt['reward'] = 0.1 self.opt['is_debug'] = True self.opt['log_level'] = 0 self.opt['num_conversations'] = 1 self.mturk_agent_ids = ['mturk_agent_1', 'mturk_agent_2'] self.mturk_manager = MTurkManager(opt=self.opt, mturk_agent_ids=self.mturk_agent_ids, is_test=True) self.mturk_manager.port = self.fake_socket.port self.mturk_manager.setup_server() self.mturk_manager.start_new_run() self.mturk_manager.ready_to_accept_workers() self.mturk_manager.set_onboard_function(self.onboard_agent) self.mturk_manager.create_hits() def assign_worker_roles(workers): workers[0].id = 'mturk_agent_1' workers[1].id = 'mturk_agent_2' def run_task_wait(): self.mturk_manager.start_task(lambda w: True, assign_worker_roles, self.run_conversation) self.task_thread = threading.Thread(target=run_task_wait) self.task_thread.start() self.agent_1 = MockAgent(TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_1, TEST_WORKER_ID_1, TASK_GROUP_ID_1) self.agent_1_2 = MockAgent(TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_3, TEST_WORKER_ID_1, TASK_GROUP_ID_1) self.agent_2 = MockAgent(TEST_HIT_ID_2, TEST_ASSIGNMENT_ID_2, TEST_WORKER_ID_2, TASK_GROUP_ID_1) def tearDown(self): self.agent_1.always_beat = False self.agent_2.always_beat = False for key in self.worlds_agents.keys(): self.worlds_agents[key] = True self.mturk_manager.shutdown() self.fake_socket.close() self.task_thread.join() def onboard_agent(self, worker): self.onboarding_agents[worker.worker_id] = False while (worker.worker_id in self.onboarding_agents) and ( self.onboarding_agents[worker.worker_id] is False): time.sleep(0.05) return def run_conversation(self, mturk_manager, opt, workers): for worker in workers: self.worlds_agents[worker.worker_id] = False for worker in workers: while self.worlds_agents[worker.worker_id] is False: time.sleep(0.05) for worker in workers: worker.shutdown(timeout=-1) def alive_agent(self, agent): agent.register_to_socket(self.fake_socket) agent.wait_for_alive() agent.send_heartbeat() def test_successful_convo(self): manager = self.mturk_manager # Alive first agent agent_1 = self.agent_1 self.alive_agent(agent_1) assert_equal_by(lambda: agent_1.worker_id in self.onboarding_agents, True, 2) agent_1_object = manager.worker_manager.get_agent_for_assignment( agent_1.assignment_id) self.assertFalse(self.onboarding_agents[agent_1.worker_id]) self.assertEqual(agent_1_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_1.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_WAITING, 2) # Alive second agent agent_2 = self.agent_2 self.alive_agent(agent_2) assert_equal_by(lambda: agent_2.worker_id in self.onboarding_agents, True, 2) agent_2_object = manager.worker_manager.get_agent_for_assignment( agent_2.assignment_id) self.assertFalse(self.onboarding_agents[agent_2.worker_id]) self.assertEqual(agent_2_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_2.worker_id] = True assert_equal_by(agent_2_object.get_status, AssignState.STATUS_WAITING, 2) # Assert agents move to task assert_equal_by(agent_2_object.get_status, AssignState.STATUS_IN_TASK, 2) assert_equal_by(lambda: agent_2.worker_id in self.worlds_agents, True, 2) self.assertIn(agent_1.worker_id, self.worlds_agents) # Complete agents self.worlds_agents[agent_1.worker_id] = True self.worlds_agents[agent_2.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_DONE, 2) assert_equal_by(agent_2_object.get_status, AssignState.STATUS_DONE, 2) # Assert conversation is complete for manager and agents assert_equal_by(lambda: manager.completed_conversations, 1, 2) assert_equal_by( lambda: len([ p for p in agent_1.message_packet if p.data['text'] == data_model.COMMAND_SHOW_DONE_BUTTON ]), 1, 2, ) assert_equal_by( lambda: len([ p for p in agent_2.message_packet if p.data['text'] == data_model.COMMAND_SHOW_DONE_BUTTON ]), 1, 2, ) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 2, 2) def test_disconnect_end(self): manager = self.mturk_manager # Alive first agent agent_1 = self.agent_1 self.alive_agent(agent_1) assert_equal_by(lambda: agent_1.worker_id in self.onboarding_agents, True, 2) agent_1_object = manager.worker_manager.get_agent_for_assignment( agent_1.assignment_id) self.assertFalse(self.onboarding_agents[agent_1.worker_id]) self.assertEqual(agent_1_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_1.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_WAITING, 2) # Alive second agent agent_2 = self.agent_2 self.alive_agent(agent_2) assert_equal_by(lambda: agent_2.worker_id in self.onboarding_agents, True, 2) agent_2_object = manager.worker_manager.get_agent_for_assignment( agent_2.assignment_id) self.assertFalse(self.onboarding_agents[agent_2.worker_id]) self.assertEqual(agent_2_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_2.worker_id] = True assert_equal_by(agent_2_object.get_status, AssignState.STATUS_WAITING, 2) # Assert agents move to task assert_equal_by(agent_2_object.get_status, AssignState.STATUS_IN_TASK, 2) assert_equal_by(lambda: agent_2.worker_id in self.worlds_agents, True, 2) self.assertIn(agent_1.worker_id, self.worlds_agents) # Disconnect agent agent_2.always_beat = False assert_equal_by(agent_1_object.get_status, AssignState.STATUS_PARTNER_DISCONNECT, 3) assert_equal_by(agent_2_object.get_status, AssignState.STATUS_DISCONNECT, 3) self.worlds_agents[agent_1.worker_id] = True self.worlds_agents[agent_2.worker_id] = True agent_2.always_beat = True agent_2.send_alive() # Assert workers get the correct command assert_equal_by( lambda: len([ p for p in agent_1.message_packet if p.data['text'] == data_model.COMMAND_INACTIVE_DONE ]), 1, 2, ) assert_equal_by( lambda: len([ p for p in agent_2.message_packet if p.data['text'] == data_model.COMMAND_INACTIVE_HIT ]), 1, 2, ) # assert conversation not marked as complete self.assertEqual(manager.completed_conversations, 0) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 2, 2) def test_expire_onboarding(self): manager = self.mturk_manager # Alive first agent agent_1 = self.agent_1 self.alive_agent(agent_1) assert_equal_by(lambda: agent_1.worker_id in self.onboarding_agents, True, 10) agent_1_object = manager.worker_manager.get_agent_for_assignment( agent_1.assignment_id) self.assertFalse(self.onboarding_agents[agent_1.worker_id]) self.assertEqual(agent_1_object.get_status(), AssignState.STATUS_ONBOARDING) manager._expire_onboarding_pool() assert_equal_by( lambda: len([ p for p in agent_1.message_packet if p.data['text'] == data_model.COMMAND_EXPIRE_HIT ]), 1, 10, ) self.onboarding_agents[agent_1.worker_id] = True self.assertEqual(agent_1_object.get_status(), AssignState.STATUS_EXPIRED) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 1, 10, ) def test_reconnect_complete(self): manager = self.mturk_manager # Alive first agent agent_1 = self.agent_1 self.alive_agent(agent_1) assert_equal_by(lambda: agent_1.worker_id in self.onboarding_agents, True, 2) agent_1_object = manager.worker_manager.get_agent_for_assignment( agent_1.assignment_id) self.assertFalse(self.onboarding_agents[agent_1.worker_id]) self.assertEqual(agent_1_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_1.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_WAITING, 2) # Alive second agent agent_2 = self.agent_2 self.alive_agent(agent_2) assert_equal_by(lambda: agent_2.worker_id in self.onboarding_agents, True, 2) agent_2_object = manager.worker_manager.get_agent_for_assignment( agent_2.assignment_id) self.assertFalse(self.onboarding_agents[agent_2.worker_id]) self.assertEqual(agent_2_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_2.worker_id] = True assert_equal_by(agent_2_object.get_status, AssignState.STATUS_WAITING, 2) # Assert agents move to task assert_equal_by(agent_2_object.get_status, AssignState.STATUS_IN_TASK, 2) assert_equal_by(lambda: agent_2.worker_id in self.worlds_agents, True, 2) self.assertIn(agent_1.worker_id, self.worlds_agents) # Simulate reconnect to task stored_conv_id = agent_2.conversation_id stored_agent_id = agent_2.id agent_2.conversation_id = None agent_2.id = None agent_2.send_alive() assert_equal_by( lambda: len([ p for p in agent_2.message_packet if p.data['text'] == data_model.COMMAND_RESTORE_STATE ]), 1, 4, ) self.assertEqual(agent_2.id, stored_agent_id) self.assertEqual(agent_2.conversation_id, stored_conv_id) # Complete agents self.worlds_agents[agent_1.worker_id] = True self.worlds_agents[agent_2.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_DONE, 2) assert_equal_by(agent_2_object.get_status, AssignState.STATUS_DONE, 2) # Assert conversation is complete for manager and agents assert_equal_by(lambda: manager.completed_conversations, 1, 2) assert_equal_by( lambda: len([ p for p in agent_1.message_packet if p.data['text'] == data_model.COMMAND_SHOW_DONE_BUTTON ]), 1, 2, ) assert_equal_by( lambda: len([ p for p in agent_2.message_packet if p.data['text'] == data_model.COMMAND_SHOW_DONE_BUTTON ]), 1, 2, ) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 2, 2) def test_attempt_break_unique(self): manager = self.mturk_manager unique_worker_qual = 'is_unique_qual' manager.is_unique = True manager.opt['unique_qual_name'] = unique_worker_qual manager.unique_qual_name = unique_worker_qual # Alive first agent agent_1 = self.agent_1 self.alive_agent(agent_1) assert_equal_by(lambda: agent_1.worker_id in self.onboarding_agents, True, 2) agent_1_object = manager.worker_manager.get_agent_for_assignment( agent_1.assignment_id) self.assertFalse(self.onboarding_agents[agent_1.worker_id]) self.assertEqual(agent_1_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_1.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_WAITING, 2) # Alive second agent agent_2 = self.agent_2 self.alive_agent(agent_2) assert_equal_by(lambda: agent_2.worker_id in self.onboarding_agents, True, 2) agent_2_object = manager.worker_manager.get_agent_for_assignment( agent_2.assignment_id) self.assertFalse(self.onboarding_agents[agent_2.worker_id]) self.assertEqual(agent_2_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_2.worker_id] = True assert_equal_by(agent_2_object.get_status, AssignState.STATUS_WAITING, 2) # Assert agents move to task assert_equal_by(agent_2_object.get_status, AssignState.STATUS_IN_TASK, 2) assert_equal_by(lambda: agent_2.worker_id in self.worlds_agents, True, 2) self.assertIn(agent_1.worker_id, self.worlds_agents) # Complete agents self.worlds_agents[agent_1.worker_id] = True self.worlds_agents[agent_2.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_DONE, 2) assert_equal_by(agent_2_object.get_status, AssignState.STATUS_DONE, 2) # Assert conversation is complete for manager and agents assert_equal_by(lambda: manager.completed_conversations, 1, 2) assert_equal_by( lambda: len([ p for p in agent_1.message_packet if p.data['text'] == data_model.COMMAND_SHOW_DONE_BUTTON ]), 1, 2, ) assert_equal_by( lambda: len([ p for p in agent_2.message_packet if p.data['text'] == data_model.COMMAND_SHOW_DONE_BUTTON ]), 1, 2, ) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 2, 2) # ensure qualification was 'granted' self.mturk_utils.find_qualification.assert_called_with( unique_worker_qual, manager.is_sandbox) self.mturk_utils.give_worker_qualification.assert_any_call( agent_1.worker_id, QUALIFICATION_ID, None, manager.is_sandbox) self.mturk_utils.give_worker_qualification.assert_any_call( agent_2.worker_id, QUALIFICATION_ID, None, manager.is_sandbox) # Try to alive with the first agent a second time agent_1_2 = self.agent_1_2 self.alive_agent(agent_1_2) assert_equal_by(lambda: agent_1_2.worker_id in self.onboarding_agents, True, 2) agent_1_2_object = manager.worker_manager.get_agent_for_assignment( agent_1_2.assignment_id) # No worker should be created for a unique task self.assertIsNone(agent_1_2_object) assert_equal_by( lambda: len([ p for p in agent_1_2.message_packet if p.data['text'] == data_model.COMMAND_EXPIRE_HIT ]), 1, 2, ) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 3, 2) def test_break_multi_convo(self): manager = self.mturk_manager manager.opt['allowed_conversations'] = 1 # Alive first agent agent_1 = self.agent_1 self.alive_agent(agent_1) assert_equal_by(lambda: agent_1.worker_id in self.onboarding_agents, True, 2) agent_1_object = manager.worker_manager.get_agent_for_assignment( agent_1.assignment_id) self.assertFalse(self.onboarding_agents[agent_1.worker_id]) self.assertEqual(agent_1_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_1.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_WAITING, 2) # Alive second agent agent_2 = self.agent_2 self.alive_agent(agent_2) assert_equal_by(lambda: agent_2.worker_id in self.onboarding_agents, True, 2) agent_2_object = manager.worker_manager.get_agent_for_assignment( agent_2.assignment_id) self.assertFalse(self.onboarding_agents[agent_2.worker_id]) self.assertEqual(agent_2_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_2.worker_id] = True assert_equal_by(agent_2_object.get_status, AssignState.STATUS_WAITING, 2) # Assert agents move to task assert_equal_by(agent_2_object.get_status, AssignState.STATUS_IN_TASK, 2) assert_equal_by(lambda: agent_2.worker_id in self.worlds_agents, True, 2) self.assertIn(agent_1.worker_id, self.worlds_agents) # Attempt to start a new conversation with duplicate worker 1 agent_1_2 = self.agent_1_2 self.alive_agent(agent_1_2) assert_equal_by(lambda: agent_1_2.worker_id in self.onboarding_agents, True, 2) agent_1_2_object = manager.worker_manager.get_agent_for_assignment( agent_1_2.assignment_id) # No worker should be created for a unique task self.assertIsNone(agent_1_2_object) assert_equal_by( lambda: len([ p for p in agent_1_2.message_packet if p.data['text'] == data_model.COMMAND_EXPIRE_HIT ]), 1, 2, ) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 1, 2) # Complete agents self.worlds_agents[agent_1.worker_id] = True self.worlds_agents[agent_2.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_DONE, 2) assert_equal_by(agent_2_object.get_status, AssignState.STATUS_DONE, 2) # Assert conversation is complete for manager and agents assert_equal_by( lambda: len([ p for p in agent_1.message_packet if p.data['text'] == data_model.COMMAND_SHOW_DONE_BUTTON ]), 1, 2, ) assert_equal_by( lambda: len([ p for p in agent_2.message_packet if p.data['text'] == data_model.COMMAND_SHOW_DONE_BUTTON ]), 1, 2, ) assert_equal_by(lambda: manager.completed_conversations, 1, 2) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 3, 2) def test_no_onboard_expire_waiting(self): manager = self.mturk_manager manager.set_onboard_function(None) # Alive first agent agent_1 = self.agent_1 self.alive_agent(agent_1) agent_1_object = manager.worker_manager.get_agent_for_assignment( agent_1.assignment_id) assert_equal_by(agent_1_object.get_status, AssignState.STATUS_WAITING, 2) manager._expire_agent_pool() assert_equal_by( lambda: len([ p for p in agent_1.message_packet if p.data['text'] == data_model.COMMAND_EXPIRE_HIT ]), 1, 2, ) # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 1, 2) def test_return_to_waiting_on_world_start(self): manager = self.mturk_manager # Alive first agent agent_1 = self.agent_1 self.alive_agent(agent_1) assert_equal_by(lambda: agent_1.worker_id in self.onboarding_agents, True, 2) agent_1_object = manager.worker_manager.get_agent_for_assignment( agent_1.assignment_id) self.assertFalse(self.onboarding_agents[agent_1.worker_id]) self.assertEqual(agent_1_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_1.worker_id] = True assert_equal_by(agent_1_object.get_status, AssignState.STATUS_WAITING, 2) # Make agent_1 no longer respond to change_conversation_requests def replace_on_msg(packet): agent_1.message_packet.append(packet) agent_1.on_msg = replace_on_msg # Alive second agent agent_2 = self.agent_2 self.alive_agent(agent_2) assert_equal_by(lambda: agent_2.worker_id in self.onboarding_agents, True, 2) agent_2_object = manager.worker_manager.get_agent_for_assignment( agent_2.assignment_id) self.assertFalse(self.onboarding_agents[agent_2.worker_id]) self.assertEqual(agent_2_object.get_status(), AssignState.STATUS_ONBOARDING) self.onboarding_agents[agent_2.worker_id] = True assert_equal_by(agent_2_object.get_status, AssignState.STATUS_WAITING, 2) # Assert agents attempt to move to task, but then move back to waiting assert_equal_by(agent_2_object.get_status, AssignState.STATUS_IN_TASK, 2) assert_equal_by(agent_2_object.get_status, AssignState.STATUS_WAITING, 3) agent_1.always_beat = False # Assert no world ever started self.assertNotIn(agent_2.worker_id, self.worlds_agents) # Expire everything manager.shutdown() # Assert sockets are closed assert_equal_by( lambda: len( [x for x in manager.socket_manager.run.values() if not x]), 2, 2)
def main(): """ IGC Human Evaluation. Specify the `--eval-data-path` to load examples for evaluation. The data in `--eval-data-path` should be formatted as a dictionary mapping IGC image ids to dicts with the following fields: { 'questions': list of (<generator_name>, <generated_question>) tuples, 'responses': list of (<generator_name>, <generated_response>) tuples, 'question': question to use when evaluating responses, 'context': context for the image } If not data path specified, loads a demo_example specified in worlds.py Specify `--image-path` for the path to the IGC images, where each example is saved as <image_id>.jpg NOTE: You can download the IGC Test Set from https://www.microsoft.com/en-us/download/details.aspx?id=55324 And you can use the `download_igc_images.py` script to download the images (please put the IGC_crowd_test.csv file in this directory to use the script) """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=3, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=5, type=int, help='maximal number of chat turns') argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=1800, type=int, help='time limit for entering a dialog message') argparser.add_argument('-mx_onb_time', '--max_onboard_time', type=int, default=300, help='time limit for turker' 'in onboarding') argparser.add_argument('-ni', '--num_images', type=int, default=5, help='number of images to show \ to turker') argparser.add_argument('--auto-approve-delay', type=int, default=3600 * 24, help='how long to wait for \ auto approval') argparser.add_argument('--data-path', type=str, default='', help='where to save data') argparser.add_argument('--eval-data-path', type=str, default='', help='path to file with candidates to ' 'evaluate') argparser.add_argument('--image-path', type=str, default='', help='path to IGC images') argparser.add_argument('-rnd', '--dialog-round', type=str, default='questions', choices=round_choices, help='which dialog round to show') opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if 'data_path' not in opt or opt['data_path'] == '': opt['data_path'] = "{}/data/{}_evals".format(os.getcwd(), opt['dialog_round']) opt['task_dir'] = os.getcwd() if opt['dialog_round'] == 'questions': opt.update(tc_questions) else: opt.update(tc_responses) mturk_agent_ids = [RATER] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) example_generator = IGCExampleGenerator(opt) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() def run_onboard(worker): worker.example_generator = example_generator world = RoleOnboardWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for w in workers: w.id = mturk_agent_ids[0] def run_conversation(mturk_manager, opt, workers): agents = workers[:] conv_idx = mturk_manager.conversation_index world = MTurkIGCEvalWorld( opt, agents=agents, world_tag='conversation t_{}'.format(conv_idx), ) while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """This task consists of an MTurk agent evaluating a wizard model. They are assigned a topic and asked to chat. """ start_time = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M') argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-mt', '--max-turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument('--max-resp-time', default=240, type=int, help='time limit for entering a dialog message') argparser.add_argument('--max-choice-time', type=int, default=300, help='time limit for turker' 'choosing the topic') argparser.add_argument('--ag-shutdown-time', default=120, type=int, help='time limit for entering a dialog message') argparser.add_argument('-rt', '--range-turn', default='3,5', help='sample range of number of turns') argparser.add_argument('--human-eval', type='bool', default=False, help='human vs human eval, no models involved') argparser.add_argument('--auto-approve-delay', type=int, default=3600 * 24 * 1, help='how long to wait for auto approval') argparser.add_argument('--only-masters', type='bool', default=False, help='Set to true to use only master turks for ' 'this test eval') argparser.add_argument('--unique-workers', type='bool', default=False, help='Each worker must be unique') argparser.add_argument('--mturk-log', type=str, default='data/mturklogs/{}.log'.format(start_time)) def inject_override(opt, override_dict): opt['override'] = override_dict for k, v in override_dict.items(): opt[k] = v def get_logger(opt): logger = logging.getLogger() logger.setLevel(logging.INFO) fmt = logging.Formatter('%(asctime)s: [ %(message)s ]', '%m/%d/%Y %I:%M:%S %p') console = logging.StreamHandler() console.setFormatter(fmt) logger.addHandler(console) if 'mturk_log' in opt: logfile = logging.FileHandler(opt['mturk_log'], 'a') logfile.setFormatter(fmt) logger.addHandler(logfile) logger.info('COMMAND: %s' % ' '.join(sys.argv)) logger.info('-' * 100) logger.info('CONFIG:\n%s' % json.dumps(opt, indent=4, sort_keys=True)) return logger # MODEL CONFIG # NOTE: please edit this to test your own models config = { 'model': 'projects:wizard_of_wikipedia:interactive_retrieval', 'retriever_model_file': 'models:wikipedia_full/tfidf_retriever/model', 'responder_model_file': 'models:wizard_of_wikipedia/full_dialogue_retrieval_model/model', } argparser.add_model_subargs(config['model']) # add model args to opt start_opt = argparser.parse_args() inject_override(start_opt, config) if not start_opt.get('human_eval'): bot = create_agent(start_opt) shared_bot_params = bot.share() else: shared_bot_params = None if not start_opt['human_eval']: get_logger(bot.opt) else: get_logger(start_opt) if start_opt['human_eval']: folder_name = 'human_eval-{}'.format(start_time) else: folder_name = '{}-{}'.format(start_opt['model'], start_time) start_opt['task'] = os.path.basename( os.path.dirname(os.path.abspath(__file__))) if 'data_path' not in start_opt: start_opt['data_path'] = os.path.join(os.getcwd(), 'data', 'wizard_eval', folder_name) start_opt.update(task_config) if not start_opt.get('human_eval'): mturk_agent_ids = ['PERSON_1'] else: mturk_agent_ids = ['PERSON_1', 'PERSON_2'] mturk_manager = MTurkManager(opt=start_opt, mturk_agent_ids=mturk_agent_ids) topics_generator = TopicsGenerator(start_opt) directory_path = os.path.dirname(os.path.abspath(__file__)) mturk_manager.setup_server(task_directory_path=directory_path) worker_roles = {} connect_counter = AttrDict(value=0) try: mturk_manager.start_new_run() agent_qualifications = [] if not start_opt['is_sandbox']: # assign qualifications if start_opt['only_masters']: agent_qualifications.append(MASTER_QUALIF) if start_opt['unique_workers']: qual_name = 'UniqueChatEval' qual_desc = ( 'Qualification to ensure each worker completes a maximum ' 'of one of these chat/eval HITs') qualification_id = \ mturk_utils.find_or_create_qualification(qual_name, qual_desc, False) print('Created qualification: ', qualification_id) UNIQUE_QUALIF = { 'QualificationTypeId': qualification_id, 'Comparator': 'DoesNotExist', 'RequiredToPreview': True } start_opt['unique_qualif_id'] = qualification_id agent_qualifications.append(UNIQUE_QUALIF) mturk_manager.create_hits(qualifications=agent_qualifications) def run_onboard(worker): if start_opt['human_eval']: role = mturk_agent_ids[connect_counter.value % len(mturk_agent_ids)] connect_counter.value += 1 worker_roles[worker.worker_id] = role else: role = 'PERSON_1' worker.topics_generator = topics_generator world = TopicChooseWorld(start_opt, worker, role=role) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_single_worker_eligibility(worker): return True def check_multiple_workers_eligibility(workers): valid_workers = {} for worker in workers: worker_id = worker.worker_id if worker_id not in worker_roles: print('Something went wrong') continue role = worker_roles[worker_id] if role not in valid_workers: valid_workers[role] = worker if len(valid_workers) == 2: break return valid_workers.values() if len(valid_workers) == 2 else [] if not start_opt['human_eval']: eligibility_function = { 'func': check_single_worker_eligibility, 'multiple': False, } else: eligibility_function = { 'func': check_multiple_workers_eligibility, 'multiple': True, } def assign_worker_roles(workers): if start_opt['human_eval']: for worker in workers: worker.id = worker_roles[worker.worker_id] else: for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): conv_idx = mturk_manager.conversation_index world = WizardEval( opt=start_opt, agents=workers, range_turn=[ int(s) for s in start_opt['range_turn'].split(',') ], max_turn=start_opt['max_turns'], max_resp_time=start_opt['max_resp_time'], model_agent_opt=shared_bot_params, world_tag='conversation t_{}'.format(conv_idx), agent_timeout_shutdown=opt['ag_shutdown_time'], ) while not world.episode_done(): world.parley() world.save_data() world.shutdown() gc.collect() mturk_manager.start_task(eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ Wizard of Wikipedia Data Collection Task. The task involves two people holding a conversation. One dialog partner chooses a topic to discuss, and then dialog proceeds. One partner is the Wizard, who has access to retrieved external information conditioned on the last two utterances, as well as information regarding the chosen topic. The other partner is the Apprentice, who assumes the role of someone eager to learn about the chosen topic. """ argparser = ParlaiParser(False, False) DictionaryAgent.add_cmdline_args(argparser) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument( '-min_t', '--min_turns', default=3, type=int, help='minimum number of turns' ) argparser.add_argument( '-max_t', '--max_turns', default=5, type=int, help='maximal number of chat turns', ) argparser.add_argument( '-mx_rsp_time', '--max_resp_time', default=120, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '-mx_onb_time', '--max_onboard_time', type=int, default=300, help='time limit for turker' 'in onboarding', ) argparser.add_argument( '--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat', ) argparser.add_argument( '--auto-approve-delay', type=int, default=3600 * 24 * 1, help='how long to wait for \ auto approval', ) argparser.add_argument( '--word-overlap-threshold', type=int, default=2, help='How much word overlap we want between message \ and checked sentence', ) argparser.add_argument( '--num-good-sentence-threshold', type=int, default=2, help='How many good sentences with sufficient overlap \ are necessary for turker to be considered good.', ) argparser.add_argument( '--num-passages-retrieved', type=int, default=7, help='How many passages to retrieve per dialog \ message', ) opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + opt['task'] opt['current_working_dir'] = os.getcwd() opt.update(task_config) mturk_agent_ids = [APPRENTICE, WIZARD] opt['min_messages'] = 2 mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) setup_personas_with_wiki_links(opt) ir_agent, task = setup_retriever(opt) persona_generator = PersonasGenerator(opt) wiki_title_to_passage = setup_title_to_passage(opt) mturk_manager.setup_server(task_directory_path=directory_path) worker_roles = {} connect_counter = AttrDict(value=0) try: mturk_manager.start_new_run() if not opt['is_sandbox']: with open(os.path.join(opt['current_working_dir'], 'mtdont.txt')) as f: lines = [l.replace('\n', '') for l in f.readlines()] for w in lines: mturk_manager.soft_block_worker(w) def run_onboard(worker): role = mturk_agent_ids[connect_counter.value % len(mturk_agent_ids)] connect_counter.value += 1 worker_roles[worker.worker_id] = role worker.persona_generator = persona_generator world = RoleOnboardWorld(opt, worker, role) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits() def check_workers_eligibility(workers): if opt['is_sandbox']: return workers valid_workers = {} for worker in workers: worker_id = worker.worker_id if worker_id not in worker_roles: """ Something went wrong... """ continue role = worker_roles[worker_id] if role not in valid_workers: valid_workers[role] = worker if len(valid_workers) == 2: break return valid_workers.values() if len(valid_workers) == 2 else [] eligibility_function = {'func': check_workers_eligibility, 'multiple': True} def assign_worker_roles(workers): if opt['is_sandbox']: for i, worker in enumerate(workers): worker.id = mturk_agent_ids[i % len(mturk_agent_ids)] else: for worker in workers: worker.id = worker_roles[worker.worker_id] def run_conversation(mturk_manager, opt, workers): agents = workers[:] if not opt['is_sandbox']: for agent in agents: worker_roles.pop(agent.worker_id) conv_idx = mturk_manager.conversation_index world = MTurkWizardOfWikipediaWorld( opt, agents=agents, world_tag='conversation t_{}'.format(conv_idx), ir_agent=ir_agent, wiki_title_to_passage=wiki_title_to_passage, task=task, ) world.reset_random() while not world.episode_done(): world.parley() world.save_data() if world.convo_finished and not world.good_wiz and not opt['is_sandbox']: mturk_manager.soft_block_worker(world.wizard_worker) world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ Human Evaluation of various image captions/comments. A turker is shown an image and two possible comments/captions, and optionally the personality used to create these captions. Then, the turker is asked to choose which caption they think is more engaging. In this example, we will just be comparing the original comment twice (this is just to demonstrate the task for future use). To use your own data, please specify `--eval-data-path` to an appropriate json file with a list of examples, where each example has the following structure: { 'image_hash': <hash of image>, 'personality': <personality, if applicable>, '<compare_key_1>': <first option to compare>, '<compare_key_2>': <second option to compare>, . . . } Note that compare_key_1 and compare_key_2 can be any field, as long as they map to a string comment/caption. Example Scenario: Suppose you have the original Personality-Captions dataset, and you would like to compare the outputs of your model called `model`. Your data may look like the following: [{ 'image_hash': hashforimageofcat, 'personality': 'Sweet', 'comment': 'Look at the cute cat!', # Human Comment 'model_comment': 'That's a weird looking dog' # Model Comment }, ...] Thus, you would specify `-ck1 comment -ck2 model_comment` to evaluate the outputs of the model vs. the human comments from Personality-Captions """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=1800, type=int, help='time limit for entering a dialog message') argparser.add_argument('-mx_onb_time', '--max_onboard_time', type=int, default=300, help='time limit for turker' 'in onboarding') argparser.add_argument('-ni', '--num_images', type=int, default=10, help='number of images to show \ to turker') argparser.add_argument('--data-path', type=str, default='', help='where to save data') argparser.add_argument('--eval-data-path', type=str, default='', help='where to load data to rank from. Leave ' 'blank to use Personality-Captions data') argparser.add_argument('-ck1', '--compare-key-1', type=str, default='comment', help='key of first option to compare') argparser.add_argument('-ck2', '--compare-key-2', type=str, default='comment', help='key of second option to compare') argparser.add_argument('--show-personality', default=True, type='bool', help='whether to show the personality') PersonalityCaptionsTeacher.add_cmdline_args(argparser) opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if 'data_path' not in opt or opt['data_path'] == '': opt['data_path'] = os.getcwd() + '/data/' + opt['task'] if opt.get('eval_data_path') == '': opt['eval_data_path'] = os.path.join( opt['datapath'], 'personality_captions/train.json') opt.update(task_config) mturk_agent_ids = [CHOOSER] mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) example_generator = ExampleGenerator(opt) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() def run_onboard(worker): worker.example_generator = example_generator world = RoleOnboardWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for w in workers: w.id = mturk_agent_ids[0] def run_conversation(mturk_manager, opt, workers): agents = workers[:] conv_idx = mturk_manager.conversation_index world = MTurkPersonalityCaptionsStackRankWorld( opt, agents=agents, world_tag='conversation t_{}'.format(conv_idx), ) while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() # The dialog model we want to evaluate from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent IrBaselineAgent.add_cmdline_args(argparser) opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # The task that we will evaluate the dialog model on task_opt = {} task_opt['datatype'] = 'test' task_opt['datapath'] = opt['datapath'] task_opt['task'] = '#MovieDD-Reddit' mturk_agent_id = 'Worker' mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id]) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = ModelEvaluatorOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): mturk_agent = workers[0] model_agent = IrBaselineAgent(opt=opt) world = ModelEvaluatorWorld(opt=opt, model_agent=model_agent, task_opt=task_opt, mturk_agent=mturk_agent) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ Main function for the DMG pilot data collection task :return: Nothing. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('--two_mturk_agents', dest='two_mturk_agents', action='store_true', help='data collection mode ' 'with converations between two MTurk agents') opt = argparser.parse_args() opt['task'] = 'dmg_pilot_dev' opt['datatype'] = 'dmg_pilot_data_1' opt.update(task_config) local_agent_1_id = 'local_1' mturk_agent_ids = ['mturk_agent_1'] if opt['two_mturk_agents']: mturk_agent_ids.append('mturk_agent_2') mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.set_onboard_function(onboard_function=None) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[:] # Create a local agent if not opt['two_mturk_agents']: if 'model' in opt: local_agent = create_agent(opt) else: local_agent = LocalHumanAgent(opt=None) local_agent.id = local_agent_1_id agents.append(local_agent) opt["batchindex"] = mturk_manager.started_conversations world = MTurkDMGDialogWorld( opt=opt, agents=agents ) log_timestamp = time.time() # Loop over all five rounds of the game for r in range(5): print("--- Starting round {} ---".format(r+1)) while not world.episode_done(): world.parley() # Write the log data to file print("Writing log to file") if not os.path.exists("logs"): os.makedirs("logs") with open('logs/dmg_pilot_data_{}_{}.json'.format(world.game_nr, log_timestamp), 'w') as f: json.dump(world.conversation_log, f) if not r == 4: # Reset the world for the next round world.selections = defaultdict(lambda: dict()) world.round_log = world.reset_round_log() world.turn_nr = -1 world.round_nr += 1 world.doneCounter = 0 world.episodeDone = False else: world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) print("Game ended.") except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ This task consists of one local human agent and two MTurk agents, each MTurk agent will go through the onboarding step to provide information about themselves, before being put into a conversation. You can end the conversation by sending a message ending with `[DONE]` from human_1. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_1_id = 'mturk_agent_1' mturk_agent_2_id = 'mturk_agent_2' human_agent_1_id = 'human_1' mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id] mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = MTurkMultiAgentDialogOnboardWorld( opt=opt, mturk_agent=worker ) while not world.episode_done(): world.parley() world.shutdown() # You can set onboard_function to None to skip onboarding mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True eligibility_function = { 'func': check_worker_eligibility, 'multiple': False, } def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): # Create mturk agents mturk_agent_1 = workers[0] mturk_agent_2 = workers[1] # Create the local human agents human_agent_1 = LocalHumanAgent(opt=None) human_agent_1.id = human_agent_1_id world = MTurkMultiAgentDialogWorld( opt=opt, agents=[human_agent_1, mturk_agent_1, mturk_agent_2] ) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('--two_mturk_agents', dest='two_mturk_agents', action='store_true', help='data collection mode ' 'with converations between two MTurk agents') opt = argparser.parse_args() opt['task'] = 'dealnodeal' opt['datatype'] = 'valid' opt.update(task_config) local_agent_1_id = 'local_1' mturk_agent_ids = ['mturk_agent_1'] if opt['two_mturk_agents']: mturk_agent_ids.append('mturk_agent_2') mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.set_onboard_function(onboard_function=None) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[:] # Create a local agent if not opt['two_mturk_agents']: if 'model' in opt: local_agent = create_agent(opt) else: local_agent = LocalHumanAgent(opt=None) local_agent.id = local_agent_1_id agents.append(local_agent) opt["batchindex"] = mturk_manager.started_conversations world = MTurkDealNoDealDialogWorld( opt=opt, agents=agents ) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() # The dialog model we want to evaluate from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent IrBaselineAgent.add_cmdline_args(argparser) opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # The task that we will evaluate the dialog model on task_opt = {} task_opt['datatype'] = 'test' task_opt['datapath'] = opt['datapath'] task_opt['task'] = '#MovieDD-Reddit' mturk_agent_id = 'Worker' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=[mturk_agent_id] ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = ModelEvaluatorOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): mturk_agent = workers[0] model_agent = IrBaselineAgent(opt=opt) world = ModelEvaluatorWorld( opt=opt, model_agent=model_agent, task_opt=task_opt, mturk_agent=mturk_agent ) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # Initialize a SQuAD teacher agent, which we will get context from module_name = 'parlai.tasks.squad.agents' class_name = 'DefaultTeacher' my_module = importlib.import_module(module_name) task_class = getattr(my_module, class_name) task_opt = {} task_opt['datatype'] = 'train' task_opt['datapath'] = opt['datapath'] mturk_agent_id = 'Worker' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=[mturk_agent_id] ) mturk_manager.setup_server() def run_onboard(worker): world = QADataCollectionOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=None) try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.ready_to_accept_workers() def check_workers_eligibility(workers): return workers eligibility_function = { 'func': check_workers_eligibility, 'multiple': True, } def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): task = task_class(task_opt) mturk_agent = workers[0] world = QADataCollectionWorld( opt=opt, task=task, mturk_agent=mturk_agent ) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) user_ids = {} # Initialize a SQuAD teacher agent, which we will get context from module_name = 'parlai.tasks.squad.agents' class_name = 'DefaultTeacher' my_module = importlib.import_module(module_name) task_class = getattr(my_module, class_name) task_opt = opt.copy() task_opt['datatype'] = 'train' task_opt['datapath'] = opt['datapath'] mturk_agent_id = 'Worker' mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id]) mturk_manager.setup_server() mturk_manager.set_onboard_function(onboard_function=None) mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.ready_to_accept_workers() def check_workers_eligibility(workers): return workers eligibility_function = { 'func': check_workers_eligibility, 'multiple': True, } def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): mturk_manager.left_pane_refresh(task_config['task_description']) mturk_agent = workers[0] if (mturk_agent.worker_id in user_ids): print("USER_ID: ", mturk_agent.worker_id, " DIALS: ", user_ids[mturk_agent.worker_id]) else: print("USER_ID: ", mturk_agent.worker_id, " DIALS: 0") if (mturk_agent.worker_id in user_ids) and user_ids[mturk_agent.worker_id] >= 15: ad = {'episode_done': False} ad['id'] = 'Restaurant bot' ad['text'] = "We are closing this HIT, since you've already had over 15 dialogues with our restaurant bot in this session. We are very appreciated for your help. Welcome to join the next session." mturk_agent.observe(validate(ad)) return else: world = QADataCollectionWorld(opt=opt, mturk_agent=mturk_agent) btime = time.time() world.parley() etime = time.time() logger.debug("DialTime: " + str(etime - btime)) if mturk_agent.worker_id not in user_ids: user_ids[mturk_agent.worker_id] = 1 else: user_ids[mturk_agent.worker_id] += 1 world.shutdown() world.review_work() return mturk_manager.start_task(eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation) mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """Handles setting up and running a ParlAI-MTurk task by instantiating an MTurk manager and configuring it for the qa_data_collection task """ # Get relevant arguments argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() # Set the task name to be the folder name opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt['assignment_duration_in_seconds'] = 10000 # append the contents of task_config.py to the configuration opt.update(task_config) # Initialize a teacher agent, which we will get premises from module_name = 'parlai.tasks.squad2.agents' class_name = 'DefaultTeacher' my_module = importlib.import_module(module_name) task_class = getattr(my_module, class_name) task_opt = opt.copy() task_opt['datatype'] = 'train' task_opt['datapath'] = opt['datapath'] assert ( task_opt['num_workers'] % 2 == 0 and task_opt['num_workers'] >= 4 ), "The game only supports even number of workers, with a minimum of 4 people." # Select an agent_id that worker agents will be assigned in their world persons = {} mturk_agent_roles = [] for i in range(1, task_opt['num_workers'] + 1): persons[i] = 'Person' + str(i) mturk_agent_roles.append(persons[i]) # Instantiate an MTurkManager with the given options and a maximum number # of agents per world of 1 (based on the length of mturk_agent_ids) mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_roles, use_db=True) mturk_manager.setup_server( task_directory_path=os.path.dirname(os.path.abspath(__file__))) role_index = 0 # Create an onboard_function, which will be run for workers who have # accepted your task and must be completed before they are put in the # queue for a task world. def run_onboard(worker): nonlocal role_index role = mturk_agent_roles[role_index % len(mturk_agent_roles)] role_index += 1 worker.update_agent_id('Onboarding {}'.format(role)) worker.demo_role = role # if role == 'Writer0' or role == 'Writer1': world = OnboardingWorld(opt=opt, mturk_agent=worker) # else: # world = EvaluatorOnboardingWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() return world.prep_save_data([worker]) # If we want to use the above onboard function, we can replace the below # with set_onboard_function(onboard_function=run_onboard) mturk_manager.set_onboard_function(onboard_function=run_onboard) try: # Initialize run information mturk_manager.start_new_run() # Set up the sockets and threads to recieve workers mturk_manager.ready_to_accept_workers() # Create the hits as specified by command line arguments mturk_manager.create_hits() # Check workers eligiblity acts as a filter, and should return # the list of all workers currently eligible to work on the task # Can be used to pair workers that meet certain criterea def check_workers_eligibility(workers): filled_roles = [] use_workers = [] for worker in workers: if worker.demo_role not in filled_roles: use_workers.append(worker) filled_roles.append(worker.demo_role) return use_workers eligibility_function = { 'func': check_workers_eligibility, 'multiple': True } # Assign worker roles is used to determine what the role each worker # in the given worker list will play. Setting `id` to None will return # the worker to the pool rather than putting them in a given task, # which is useful for having tasks with different possible worker # counts. def assign_worker_roles(workers): for worker in workers: worker.id = worker.demo_role # Define the task function, which will be run with workers that are # as the main task. global run_conversation def run_conversation(mturk_manager, opt, workers): # Create a task agent to get prompts from SQuAD 2.0 task = task_class(task_opt) # Create the task world world = MultiRoleAgentWorld(opt=opt, task=task, mturk_agents=workers) # run the world to completion while not world.episode_done(): world.parley() # shutdown and review the work world.shutdown() world.review_work() # Return the contents for saving return world.prep_save_data(workers) # Begin the task, allowing mturk_manager to start running the task # world on any workers who connect mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: # Any hits that aren't claimed or completed have to be shut down. Must # keep the world running until that point. mturk_manager.expire_all_unassigned_hits() # Shutdown the manager and free all related resources mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_1_id = 'mturk_agent_1' mturk_agent_2_id = 'mturk_agent_2' human_agent_1_id = 'human_1' mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id] mturk_manager = MTurkManager( opt=opt, mturk_agent_ids = mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = MTurkMultiAgentDialogOnboardWorld( opt=opt, mturk_agent=worker ) while not world.episode_done(): world.parley() world.shutdown() # You can set onboard_function to None to skip onboarding mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): # Create mturk agents mturk_agent_1 = workers[0] mturk_agent_2 = workers[1] # Create the local human agents human_agent_1 = LocalHumanAgent(opt=None) human_agent_1.id = human_agent_1_id world = MTurkMultiAgentDialogWorld( opt=opt, agents=[human_agent_1, mturk_agent_1, mturk_agent_2] ) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): completed_workers = [] argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_id = 'Worker' mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id]) mturk_manager.setup_server() qual_name = 'ParlAIExcludeQual{}t{}'.format(random.randint(10000, 99999), random.randint(10000, 99999)) qual_desc = ( 'Qualification for a worker not correctly completing the ' 'first iteration of a task. Used to filter to different task pools.') qualification_id = \ mturk_utils.find_or_create_qualification(qual_name, qual_desc, opt['is_sandbox']) print('Created qualification: ', qualification_id) def run_onboard(worker): world = QualificationFlowOnboardWorld(opt, worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) try: mturk_manager.start_new_run() agent_qualifications = [{ 'QualificationTypeId': qualification_id, 'Comparator': 'DoesNotExist', 'RequiredToPreview': True }] mturk_manager.create_hits(qualifications=agent_qualifications) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): mturk_agent = workers[0] world = QualificationFlowSoloWorld( opt=opt, mturk_agent=mturk_agent, qualification_id=qualification_id, firstTime=(mturk_agent.worker_id not in completed_workers), ) while not world.episode_done(): world.parley() completed_workers.append(mturk_agent.worker_id) world.shutdown() world.review_work() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_utils.delete_qualification(qualification_id, opt['is_sandbox']) mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """This task consists of an MTurk agent evaluating a Controllable Dialog model. """ start_time = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M') argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument( '--max-resp-time', default=240, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '--max-choice-time', type=int, default=300, help='time limit for turker' 'choosing the topic', ) argparser.add_argument( '--ag-shutdown-time', default=120, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '--num-turns', default=6, type=int, help='number of turns of dialogue' ) argparser.add_argument( '--human-eval', type='bool', default=False, help='human vs human eval, no models involved', ) argparser.add_argument( '--auto-approve-delay', type=int, default=3600 * 24 * 2, help='how long to wait for auto approval', ) argparser.add_argument( '--only-masters', type='bool', default=False, help='Set to true to use only master turks for ' 'this test eval', ) argparser.add_argument( '--create-model-qualif', type='bool', default=True, help='Create model qualif so unique eval between' 'models.', ) argparser.add_argument( '--limit-workers', type=int, default=len(SETTINGS_TO_RUN), help='max HITs a worker can complete', ) argparser.add_argument( '--mturk-log', type=str, default=('data/mturklogs/controllable/{}.log'.format(start_time)), ) argparser.add_argument( '--short-eval', type='bool', default=True, help='Only ask engagingness question and persona' 'question.', ) # persona specific arguments argparser.add_argument( '--persona-type', type=str, default='self', choices=['self', 'other', 'none'] ) argparser.add_argument( '--persona-datatype', type=str, default='valid', choices=['train', 'test', 'valid'], ) argparser.add_argument( '--max-persona-time', type=int, default=360, help='max time to view persona' ) def get_logger(opt): fmt = '%(asctime)s: [ %(message)s ]' logfn = None if 'mturk_log' in opt: logfn = opt['mturk_log'] if not os.path.isdir(os.path.dirname(logfn)): os.makedirs(os.path.dirname(logfn), exist_ok=True) logger = ParlaiLogger( name="mturk_controllable", console_level=INFO, file_level=INFO, console_format=fmt, file_format=fmt, filename=logfn, ) logger.info('COMMAND: %s' % ' '.join(sys.argv)) logger.info('-' * 100) logger.info('CONFIG:\n%s' % json.dumps(opt, indent=4, sort_keys=True)) return logger start_opt = argparser.parse_args() task_config['task_description'] = task_config['task_description'].format( start_opt['reward'] ) # set options start_opt['limit_workers'] = len(SETTINGS_TO_RUN) start_opt['allowed_conversations'] = 1 start_opt['max_hits_per_worker'] = start_opt['limit_workers'] start_opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) start_opt.update(task_config) logger = get_logger(start_opt) model_share_params = {} worker_models_seen = {} model_opts = {} model_counts = {} lock = Lock() for setup in SETTINGS_TO_RUN: assert 'human' not in setup model_counts[setup] = 0 agent_config = getattr(mcf, setup) combined_config = copy.deepcopy(start_opt) for k, v in agent_config.items(): combined_config[k] = v combined_config['override'][k] = v folder_name = '{}-{}'.format(setup, start_time) combined_config['save_data_path'] = os.path.join( start_opt['datapath'], 'local_controllable_dialogue', folder_name ) model_opts[setup] = combined_config bot = create_agent(combined_config, True) model_share_params[setup] = bot.share() if not start_opt.get('human_eval'): mturk_agent_ids = ['PERSON_1'] else: mturk_agent_ids = ['PERSON_1', 'PERSON_2'] mturk_manager = MTurkManager(opt=start_opt, mturk_agent_ids=mturk_agent_ids) personas_generator = PersonasGenerator(start_opt) directory_path = os.path.dirname(os.path.abspath(__file__)) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() agent_qualifications = [] # assign qualifications if start_opt['create_model_qualif']: qual_name = 'ControlEvalRound2' qual_desc = ( 'Qualification to ensure workers complete only a certain' 'number of these HITs' ) qualification_id = mturk_utils.find_or_create_qualification( qual_name, qual_desc, False ) print('Created qualification: ', qualification_id) start_opt['unique_qualif_id'] = qualification_id def run_onboard(worker): worker.personas_generator = personas_generator world = PersonaAssignWorld(start_opt, worker) world.parley() world.shutdown() def check_worker_eligibility(worker): worker_id = worker.worker_id lock.acquire() retval = len(worker_models_seen.get(worker_id, [])) < len(SETTINGS_TO_RUN) lock.release() return retval def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() mturk_manager.create_hits(qualifications=agent_qualifications) def run_conversation(mturk_manager, opt, workers): conv_idx = mturk_manager.conversation_index # gotta find a bot this worker hasn't seen yet assert len(workers) == 1 worker_id = workers[0].worker_id lock.acquire() if worker_id not in worker_models_seen: worker_models_seen[worker_id] = set() print("MODELCOUNTS:") print(pprint.pformat(model_counts)) logger.info("MODELCOUNTS\n" + pprint.pformat(model_counts)) model_options = [ (model_counts[setup_name] + 10 * random.random(), setup_name) for setup_name in SETTINGS_TO_RUN if setup_name not in worker_models_seen[worker_id] ] if not model_options: lock.release() logger.error( "Worker {} already finished all settings! Returning none".format( worker_id ) ) return None _, model_choice = min(model_options) worker_models_seen[worker_id].add(model_choice) model_counts[model_choice] += 1 lock.release() world = ControllableDialogEval( opt=model_opts[model_choice], agents=workers, num_turns=start_opt['num_turns'], max_resp_time=start_opt['max_resp_time'], model_agent_opt=model_share_params[model_choice], world_tag='conversation t_{}'.format(conv_idx), agent_timeout_shutdown=opt['ag_shutdown_time'], model_config=model_choice, ) world.reset_random() while not world.episode_done(): world.parley() world.save_data() lock.acquire() if not world.convo_finished: model_counts[model_choice] -= 1 worker_models_seen[worker_id].remove(model_choice) lock.release() world.shutdown() gc.collect() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ Main script for running an eval task against the LIGHT dataset. special CLI arguments are --light-eval-task-type [speech, emote, action] --light-eval-unseen [False, True] This launches a task that, on a workers first attempt pairs with an entry from the training set. Then based on if the worker performs above a specified benchmark, they will either be soft blocked from evaluating or allowed to try against the test set. """ # Get relevant arguments argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.set_defaults(datatype='test:stream') argparser.add_argument('--light-eval-task-type', default='speech', help='Type of task to be evaluating') argparser.add_argument( '--light-eval-unseen', default=False, type='bool', help='Evaluate against the unseen test rather than the seen test', ) opt = argparser.parse_args() task_opt = opt.copy() task_opt['task'] = 'light_dialog' assert opt['light_eval_task_type'] in [ 'speech', 'emote', 'action', ], '--light-eval-task-type must be one of speech, emote, or action' LABEL_TYPE = opt['light_eval_task_type'] # speech, emote, action TRAIN_TURNS = 7 TRAININGS = 1 MAX_WRONG = 1 if LABEL_TYPE != 'speech': TRAIN_TURNS = 3 TRAININGS = 2 MAX_WRONG = 3 if LABEL_TYPE == 'emote' else 2 task_opt['light_label_type'] = LABEL_TYPE task_opt['light_use_action'] = 'all' task_opt['light_use_cands'] = '20' task_opt['light_use_emote'] = 'all' task_opt['light_use_objects'] = True task_opt['light_use_person_names'] = True task_opt['light_use_persona'] = 'self' task_opt['light_use_repeat'] = 'none' task_opt['light_use_setting'] = True task_opt['light_use_speech'] = 'all' task_opt['light_use_current_self_output'] = 'all' task_opt['light_use_clip_cands'] = 10000 task_opt['light_unseen_test'] = task_opt['light_eval_unseen'] random.seed(10) agent = RepeatLabelAgent(task_opt) world = create_task(task_opt, agent) # Populate dialogues from the LIGHT dataset samples = [] curr_sample = [] while True: world.parley() curr_sample.append(world.acts[0].copy()) if world.acts[0]['episode_done']: if len(curr_sample) >= TRAIN_TURNS: samples.append(curr_sample) curr_sample = [] if world.epoch_done(): break train_samples = [] task_opt['datatype'] = 'train:stream' task_opt['light_unseen_test'] = False agent = RepeatLabelAgent(task_opt) world = create_task(task_opt, agent) curr_sample = [] while True: world.parley() curr_sample.append(world.acts[0].copy()) if world.acts[0]['episode_done']: if len(curr_sample) >= TRAIN_TURNS: train_samples.append(curr_sample) curr_sample = [] if world.epoch_done() or len(train_samples) > 2000: break # Set up temporary pools to pull tasks from use_train_samples = train_samples.copy() use_samples = train_samples.copy() # Set the task name to be the folder name opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) # append the contents of task_config.py to the configuration opt.update(task_config) # Select an agent_id that worker agents will be assigned in their world mturk_agent_roles = [LABEL_TYPE] opt['assignment_duration_in_seconds'] = 20 * 60 # Instantiate an MTurkManager with the given options and a maximum number # of agents per world of 1 (based on the length of mturk_agent_ids) mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_roles, use_db=True) mturk_manager.setup_server( task_directory_path=os.path.dirname(os.path.abspath(__file__))) # Create an onboard_function, which will be run for workers who have # accepted your task and must be completed before they are put in the # queue for a task world. completed_agents = [] completed_train = {} def run_onboard(worker): nonlocal completed_agents if worker.worker_id in completed_agents: return else: world = LightEvalTestWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() if world.did_complete: completed_agents.append(worker.worker_id) else: print(worker.worker_id, 'Failed the onboarding') world.shutdown() return world.prep_save_data([worker]) mturk_manager.set_onboard_function(onboard_function=run_onboard) try: # Initialize run information mturk_manager.start_new_run() # Set up the sockets and threads to recieve workers mturk_manager.ready_to_accept_workers() # Create the hits as specified by command line arguments mturk_manager.create_hits(qualifications=[]) # Check workers eligiblity acts as a filter, and should return # the list of all workers currently eligible to work on the task # Can be used to pair workers that meet certain criterea def check_workers_eligibility(workers): return workers eligibility_function = { 'func': check_workers_eligibility, 'multiple': True } # Assign worker roles is used to determine what the role each worker # in the given worker list will play. Setting `id` to None will return # the worker to the pool rather than putting them in a given task, # which is useful for having tasks with different possible worker # counts. def assign_worker_roles(workers): workers[0].id = LABEL_TYPE # Define the task function, which will be run with workers that are # as the main task. global run_conversation def run_conversation(mturk_manager, opt, workers): nonlocal completed_train nonlocal use_samples nonlocal use_train_samples worker_id = workers[0].worker_id use_train = True if worker_id not in completed_train: completed_train[worker_id] = 0 if completed_train[worker_id] >= TRAININGS: use_train = False # Create the real task world if not use_train: if len(use_samples) == 0: # reset the pool if none are left use_samples = samples.copy() sample = use_samples.pop() else: if len(use_train_samples) == 0: # reset the pool if none are left use_train_samples = train_samples.copy() sample = train_samples.pop() world = LightEvalTaskWorld( opt=opt, mturk_agents=workers, sample=sample, use_train=use_train, max_wrong=MAX_WRONG, ) # run the world to completion while not world.episode_done(): world.parley() # shutdown and review the work world.shutdown() world.review_work() if not world.completed and not use_train: samples.append(sample) if use_train and world.completed: completed_train[worker_id] += 1 print('Worker passed train: ', worker_id) # Return the contents for saving return world.prep_save_data(workers) # Begin the task, allowing mturk_manager to start running the task # world on any workers who connect mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: print('Accepted agents:', repr(completed_agents)) # Shutdown the manager and free all related resources mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() # The dialog model we want to evaluate from parlai.agents.transformer.transformer import TransformerGeneratorAgent from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent TransformerGeneratorAgent.add_cmdline_args(argparser) opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # The task that we will evaluate the dialog model on task_opt = {} task_opt['datatype'] = 'test' task_opt['datapath'] = opt['datapath'] task_opt['task'] = 'dailydialog:NoStart' opt['dict_file'] = '/home/christian/developer/cs767hw4/models/test.dict' opt['model_file'] = '/home/christian/developer/cs767hw4/models/test.checkpoint' opt['truncate'] = 64 opt['inference'] = 'mmi' opt['beam_size'] = 32 opt['no_cuda'] = True mturk_agent_id = 'Worker' mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id]) mturk_manager.setup_server() # task_name="dialo-mturk-eval", exists=True try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = ModelEvaluatorOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): mturk_agent = workers[0] model_agent = TransformerGeneratorAgent(opt=opt) world = ModelEvaluatorWorld( opt=opt, model_agent=model_agent, task_opt=task_opt, mturk_agent=mturk_agent, ) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """This task consists of an MTurk agent evaluating a chit-chat model. They are asked to chat to the model adopting a specific persona. After their conversation, they are asked to evaluate their partner on several metrics. """ argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument( '-dp', '--datapath', default='./', help='path to datasets, defaults to current directory') opt = argparser.parse_args() # add additional model args opt['override'] = { 'no_cuda': True, 'interactive_mode': True, 'tensorboard_log': False } # Set the task name to be the folder name opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) # append the contents of task_config.py to the configuration opt.update(task_config) mturk_agent_id = 'Tourist' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=[mturk_agent_id] ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.ready_to_accept_workers() mturk_manager.create_hits([LOCALE_QUALIF_SDBOX]) mturk_manager.set_onboard_function(onboard_function=None) # mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True # def assign_worker_roles(workers): # for index, worker in enumerate(workers): # worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def assign_worker_roles(workers): workers[0].id = mturk_agent_id def run_conversation(mturk_manager, opt, workers): agents = workers[:] # workers[0].assignment_generator = assignment_generator world = MultiWozEvalWorld( opt=opt, agent=workers[0] ) while not world.episode_done(): print("parley") world.parley() print("save data") world.save_data() print("world shutdown") world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ Handles setting up and running a ParlAI-MTurk task by instantiating an MTurk manager and configuring it for the qa_data_collection task. """ # Get relevant arguments argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument( '--light-unseen-rooms', default=False, type='bool', help='Launch using rooms from the unseen set rather than the seen', ) opt = argparser.parse_args() generator = GraphGenerator(opt, opt['light_unseen_rooms']) # Set the task name to be the folder name opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) # append the contents of task_config.py to the configuration opt.update(task_config) # Select an agent_id that worker agents will be assigned in their world mturk_agent_roles = ['worker_1', 'worker_2'] # Set runtime to be an hour in case workers are slow opt['assignment_duration_in_seconds'] = 60 * 60 # Instantiate an MTurkManager with the given options and a maximum number # of agents per world of 1 (based on the length of mturk_agent_ids) mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_roles, use_db=True ) mturk_manager.setup_server( task_directory_path=os.path.dirname(os.path.abspath(__file__)) ) # Create an onboard_function, which will be run for workers who have # accepted your task and must be completed before they are put in the # queue for a task world. completed_agents = [] def run_onboard(worker): nonlocal completed_agents if worker.worker_id in completed_agents: return else: world = LightChatOnboardingWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() completed_agents.append(worker.worker_id) print(worker.worker_id, 'took', world.turns, 'turns for onboarding') return world.prep_save_data([worker]) # If we want to use the above onboard function, we can replace the below # with set_onboard_function(onboard_function=run_onboard) mturk_manager.set_onboard_function(onboard_function=run_onboard) qualification_id = mturk_utils.find_qualification( 'adventure_chat_reject', opt['is_sandbox'], must_be_owned=False ) print('Found qualification: ', qualification_id) try: # Initialize run information mturk_manager.start_new_run() # Set up the sockets and threads to recieve workers mturk_manager.ready_to_accept_workers() agent_qualifications = [ { 'QualificationTypeId': qualification_id, 'Comparator': 'DoesNotExist', 'RequiredToPreview': True, } ] # Create the hits as specified by command line arguments mturk_manager.create_hits(qualifications=agent_qualifications) # Check workers eligiblity acts as a filter, and should return # the list of all workers currently eligible to work on the task # Can be used to pair workers that meet certain criterea def check_workers_eligibility(workers): return workers eligibility_function = {'func': check_workers_eligibility, 'multiple': True} # Assign worker roles is used to determine what the role each worker # in the given worker list will play. Setting `id` to None will return # the worker to the pool rather than putting them in a given task, # which is useful for having tasks with different possible worker # counts. def assign_worker_roles(workers): workers[0].id = mturk_agent_roles[0] workers[1].id = mturk_agent_roles[1] # Define the task function, which will be run with workers that are # as the main task. global run_conversation def run_conversation(mturk_manager, opt, workers): # Create the task world g = None while g is None: try: g, room, characters = generator.get_room() except Exception as e: print('error when creating graph:', repr(e)) world = LightChatTaskWorld( opt=opt, mturk_agents=workers, graph=g, room=room, characters=characters ) # run the world to completion while not world.episode_done(): world.parley() # shutdown and review the work world.shutdown() world.review_work() # Return the contents for saving return world.prep_save_data(workers) # Begin the task, allowing mturk_manager to start running the task # world on any workers who connect mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: # Any hits that aren't claimed or completed have to be shut down. Must # keep the world running until that point. mturk_manager.expire_all_unassigned_hits() # Shutdown the manager and free all related resources mturk_manager.shutdown()
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=5, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=150, type=int, help='time limit for entering a dialog message') argparser.add_argument('-mx_psn_time', '--max_persona_time', type=int, default=300, help='time limit for turker' 'entering the persona') argparser.add_argument('--ag_shutdown_time', default=120, type=int, help='time limit for entering a dialog message') argparser.add_argument('-rp', '--range_persona', default='4,6', help='sample range of number of persona sentences') opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + opt['task'] opt.update(task_config) mturk_agent_ids = ['PERSON_1'] mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() if not opt['is_sandbox']: # ADD BLOCKED WORKERS HERE blocked_worker_list = [] for w in blocked_worker_list: mturk_manager.block_worker(w, 'We found that you have unexpected behaviors in our previous HITs. For more questions please email us.') def run_onboard(worker): pass mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): worker = workers[0] world = PersonaProfileWorld(opt, worker) while not world.episode_done(): world.parley() world.save_data() world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setUp(self): patcher = mock.patch('builtins.input', return_value='y') self.addCleanup(patcher.stop) patcher.start() # Mock functions that hit external APIs and such self.server_utils = MTurkManagerFile.server_utils self.mturk_utils = MTurkManagerFile.mturk_utils self.server_utils.setup_server = mock.MagicMock( return_value='https://127.0.0.1') self.server_utils.setup_legacy_server = mock.MagicMock( return_value='https://127.0.0.1') self.server_utils.delete_server = mock.MagicMock() self.mturk_utils.setup_aws_credentials = mock.MagicMock() self.mturk_utils.calculate_mturk_cost = mock.MagicMock(return_value=1) self.mturk_utils.check_mturk_balance = mock.MagicMock( return_value=True) self.mturk_utils.create_hit_config = mock.MagicMock() self.mturk_utils.setup_sns_topic = mock.MagicMock( return_value=TOPIC_ARN) self.mturk_utils.delete_sns_topic = mock.MagicMock() self.mturk_utils.delete_qualification = mock.MagicMock() self.mturk_utils.find_or_create_qualification = mock.MagicMock( return_value=QUALIFICATION_ID) self.mturk_utils.find_qualification = mock.MagicMock( return_value=QUALIFICATION_ID) self.mturk_utils.give_worker_qualification = mock.MagicMock() self.mturk_utils.remove_worker_qualification = mock.MagicMock() self.mturk_utils.create_hit_type = mock.MagicMock( return_value=HIT_TYPE_ID) self.mturk_utils.subscribe_to_hits = mock.MagicMock() self.mturk_utils.create_hit_with_hit_type = mock.MagicMock( return_value=(MTURK_PAGE_URL, FAKE_HIT_ID, 'MTURK_HIT_DATA')) self.mturk_utils.get_mturk_client = mock.MagicMock( return_value=mock.MagicMock()) self.onboarding_agents = {} self.worlds_agents = {} # Set up an MTurk Manager and get it ready for accepting workers self.fake_socket = MockSocket() time.sleep(0.1) argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() self.opt = argparser.parse_args() self.opt['task'] = 'unittest' self.opt['assignment_duration_in_seconds'] = 1 self.opt['hit_title'] = 'test_hit_title' self.opt['hit_description'] = 'test_hit_description' self.opt['task_description'] = 'test_task_description' self.opt['hit_keywords'] = 'test_hit_keywords' self.opt['reward'] = 0.1 self.opt['is_debug'] = True self.opt['log_level'] = 0 self.opt['num_conversations'] = 1 self.mturk_agent_ids = ['mturk_agent_1', 'mturk_agent_2'] self.mturk_manager = MTurkManager(opt=self.opt, mturk_agent_ids=self.mturk_agent_ids, is_test=True) self.mturk_manager.port = self.fake_socket.port self.mturk_manager.setup_server() self.mturk_manager.start_new_run() self.mturk_manager.ready_to_accept_workers() self.mturk_manager.set_onboard_function(self.onboard_agent) self.mturk_manager.create_hits() def assign_worker_roles(workers): workers[0].id = 'mturk_agent_1' workers[1].id = 'mturk_agent_2' def run_task_wait(): self.mturk_manager.start_task(lambda w: True, assign_worker_roles, self.run_conversation) self.task_thread = threading.Thread(target=run_task_wait) self.task_thread.start() self.agent_1 = MockAgent(TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_1, TEST_WORKER_ID_1, TASK_GROUP_ID_1) self.agent_1_2 = MockAgent(TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_3, TEST_WORKER_ID_1, TASK_GROUP_ID_1) self.agent_2 = MockAgent(TEST_HIT_ID_2, TEST_ASSIGNMENT_ID_2, TEST_WORKER_ID_2, TASK_GROUP_ID_1)
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=5, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument( '-mx_rsp_time', '--max_resp_time', default=150, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '-mx_psn_time', '--max_persona_time', type=int, default=300, help='time limit for turker' 'entering the persona', ) argparser.add_argument( '--ag_shutdown_time', default=120, type=int, help='time limit for entering a dialog message', ) argparser.add_argument( '--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat', ) argparser.add_argument('--revised', default=False, type='bool', help='Whether to use revised personas') argparser.add_argument('-rt', '--range_turn', default='5,7', help='sample range of number of turns') argparser.add_argument('--personas-path', default=None, help='specify path for personas data') opt = argparser.parse_args() directory_path = os.path.dirname(os.path.abspath(__file__)) opt['task'] = os.path.basename(directory_path) if not opt.get('personas_path'): opt['personas_path'] = (argparser.parlai_home + '/parlai/mturk/personachat_chat/data') opt.update(task_config) opt['extract_personas_path'] = os.path.join(opt['datapath'], 'personachat_chat') mturk_agent_ids = ['PERSON_1', 'PERSON_2'] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) persona_generator = PersonasGenerator(opt) mturk_manager.setup_server(task_directory_path=directory_path) try: mturk_manager.start_new_run() mturk_manager.create_hits() if not opt['is_sandbox']: blocked_worker_list = [] for w in blocked_worker_list: mturk_manager.block_worker( w, 'We found that you have unexpected behaviors in our previous ' 'HITs. For more questions please email us.', ) def run_onboard(worker): worker.persona_generator = persona_generator world = PersonaProfileWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = [workers[0], workers[1]] conv_idx = mturk_manager.conversation_index world = PersonaChatWorld( opt=opt, agents=agents, range_turn=[int(s) for s in opt['range_turn'].split(',')], max_turn=opt['max_turns'], max_resp_time=opt['max_resp_time'], world_tag='conversation t_{}'.format(conv_idx), ) world.reset_random() while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation, ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): """ This task consists of one local human agent and two MTurk agents, each MTurk agent will go through the onboarding step to provide information about themselves, before being put into a conversation. You can end the conversation by sending a message ending with `[DONE]` from human_1. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_1_id = 'mturk_agent_1' mturk_agent_2_id = 'mturk_agent_2' human_agent_1_id = 'human_1' mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = MTurkMultiAgentDialogOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() # You can set onboard_function to None to skip onboarding mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): # Create mturk agents mturk_agent_1 = workers[0] mturk_agent_2 = workers[1] # Create the local human agents human_agent_1 = LocalHumanAgent(opt=None) human_agent_1.id = human_agent_1_id world = MTurkMultiAgentDialogWorld( opt=opt, agents=[human_agent_1, mturk_agent_1, mturk_agent_2]) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
class TestMTurkAgent(unittest.TestCase): """ Various unit tests for the MTurkAgent class. """ def setUp(self): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() self.opt = argparser.parse_args(print_args=False) self.opt['task'] = 'unittest' self.opt['assignment_duration_in_seconds'] = 6 mturk_agent_ids = ['mturk_agent_1'] self.mturk_manager = MTurkManager(opt=self.opt.copy(), mturk_agent_ids=mturk_agent_ids) self.worker_manager = self.mturk_manager.worker_manager self.turk_agent = MTurkAgent( self.opt.copy(), self.mturk_manager, TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_1, TEST_WORKER_ID_1, ) def tearDown(self): self.mturk_manager.shutdown() disconnect_path = os.path.join(parent_dir, 'disconnect-test.pickle') if os.path.exists(disconnect_path): os.remove(disconnect_path) def test_init(self): """ Test initialization of an agent. """ self.assertIsNotNone(self.turk_agent.creation_time) self.assertIsNone(self.turk_agent.id) self.assertIsNone(self.turk_agent.message_request_time) self.assertIsNone(self.turk_agent.conversation_id) self.assertFalse(self.turk_agent.some_agent_disconnected) self.assertFalse(self.turk_agent.hit_is_expired) self.assertFalse(self.turk_agent.hit_is_abandoned) self.assertFalse(self.turk_agent.hit_is_returned) self.assertFalse(self.turk_agent.hit_is_complete) self.assertFalse(self.turk_agent.disconnected) self.assertTrue(self.turk_agent.alived) def test_state_wrappers(self): """ Test the mturk agent wrappers around its state. """ for status in statuses: self.turk_agent.set_status(status) self.assertEqual(self.turk_agent.get_status(), status) for status in [ AssignState.STATUS_DONE, AssignState.STATUS_PARTNER_DISCONNECT ]: self.turk_agent.set_status(status) self.assertTrue(self.turk_agent.submitted_hit()) for status in active_statuses: self.turk_agent.set_status(status) self.assertFalse(self.turk_agent.is_final()) for status in complete_statuses: self.turk_agent.set_status(status) self.assertTrue(self.turk_agent.is_final()) self.turk_agent.append_message(MESSAGE_1) self.assertEqual(len(self.turk_agent.get_messages()), 1) self.turk_agent.append_message(MESSAGE_2) self.assertEqual(len(self.turk_agent.get_messages()), 2) self.turk_agent.append_message(MESSAGE_1) self.assertEqual(len(self.turk_agent.get_messages()), 2) self.assertIn(MESSAGE_1, self.turk_agent.get_messages()) self.assertIn(MESSAGE_2, self.turk_agent.get_messages()) # Ensure command interactions work as expected self.turk_agent.set_last_command(COMMAND_1) self.assertEqual(self.turk_agent.get_last_command(), COMMAND_1) self.turk_agent.clear_messages() self.assertEqual(len(self.turk_agent.get_messages()), 0) # In task checks self.turk_agent.conversation_id = 't_12345' self.assertTrue(self.turk_agent.is_in_task()) self.turk_agent.conversation_id = 'b_12345' self.assertFalse(self.turk_agent.is_in_task()) def test_connection_id(self): """ Ensure the connection_id hasn't changed. """ connection_id = "{}_{}".format(self.turk_agent.worker_id, self.turk_agent.assignment_id) self.assertEqual(self.turk_agent.get_connection_id(), connection_id) def test_inactive_data(self): """ Ensure data packet generated for inactive commands is valid. """ for status in complete_statuses: self.turk_agent.set_status(status) data = self.turk_agent.get_inactive_command_data() self.assertIsNotNone(data['text']) self.assertIsNotNone(data['inactive_text']) self.assertEqual(data['conversation_id'], self.turk_agent.conversation_id) self.assertEqual(data['agent_id'], TEST_WORKER_ID_1) def test_status_change(self): has_changed = False self.turk_agent.set_status(AssignState.STATUS_ONBOARDING) def wait_for_status_wrap(): nonlocal has_changed # noqa 999 we don't use python2 self.turk_agent.wait_for_status(AssignState.STATUS_WAITING) has_changed = True t = threading.Thread(target=wait_for_status_wrap, daemon=True) t.start() self.assertFalse(has_changed) time.sleep(0.07) self.assertFalse(has_changed) self.turk_agent.set_status(AssignState.STATUS_WAITING) time.sleep(0.07) self.assertTrue(has_changed) def test_message_queue(self): """ Ensure observations and acts work as expected. """ self.mturk_manager.send_message = mock.MagicMock() self.turk_agent.observe(ACT_1) self.mturk_manager.send_message.assert_called_with( TEST_WORKER_ID_1, TEST_ASSIGNMENT_ID_1, ACT_1) # First act comes through the queue and returns properly self.assertTrue(self.turk_agent.msg_queue.empty()) self.turk_agent.id = AGENT_ID self.turk_agent.put_data(MESSAGE_ID_1, ACT_1) self.assertTrue(self.turk_agent.recieved_packets[MESSAGE_ID_1]) self.assertFalse(self.turk_agent.msg_queue.empty()) returned_act = self.turk_agent.get_new_act_message() self.assertEqual(returned_act, ACT_1) # Repeat act is ignored self.turk_agent.put_data(MESSAGE_ID_1, ACT_1) self.assertTrue(self.turk_agent.msg_queue.empty()) for i in range(100): self.turk_agent.put_data(str(i), ACT_1) self.assertEqual(self.turk_agent.msg_queue.qsize(), 100) self.turk_agent.flush_msg_queue() self.assertTrue(self.turk_agent.msg_queue.empty()) # Test non-act messages blank_message = self.turk_agent.get_new_act_message() self.assertIsNone(blank_message) self.turk_agent.disconnected = True disconnect_message = self.turk_agent.get_new_act_message() self.turk_agent.disconnected = False self.assertEqual(disconnect_message['text'], self.turk_agent.MTURK_DISCONNECT_MESSAGE) self.turk_agent.hit_is_returned = True return_message = self.turk_agent.get_new_act_message() self.assertEqual(return_message['text'], self.turk_agent.RETURN_MESSAGE) self.turk_agent.hit_is_returned = False # Reduce state self.turk_agent.reduce_state() self.assertIsNone(self.turk_agent.msg_queue) self.assertIsNone(self.turk_agent.recieved_packets) def test_message_acts(self): self.mturk_manager.send_command = mock.MagicMock() self.mturk_manager.handle_turker_timeout = mock.MagicMock() # non-Blocking check self.assertIsNone(self.turk_agent.message_request_time) returned_act = self.turk_agent.act(blocking=False) self.assertIsNotNone(self.turk_agent.message_request_time) self.assertIsNone(returned_act) self.turk_agent.id = AGENT_ID self.turk_agent.put_data(MESSAGE_ID_1, ACT_1) returned_act = self.turk_agent.act(blocking=False) self.assertIsNone(self.turk_agent.message_request_time) self.assertEqual(returned_act, ACT_1) self.mturk_manager.send_command.assert_called_once() # non-Blocking timeout check self.mturk_manager.send_command = mock.MagicMock() returned_act = self.turk_agent.act(timeout=0.07, blocking=False) self.assertIsNotNone(self.turk_agent.message_request_time) self.assertIsNone(returned_act) while returned_act is None: returned_act = self.turk_agent.act(timeout=0.07, blocking=False) self.mturk_manager.send_command.assert_called_once() self.mturk_manager.handle_turker_timeout.assert_called_once() self.assertEqual(returned_act['text'], self.turk_agent.TIMEOUT_MESSAGE) # Blocking timeout check self.mturk_manager.send_command = mock.MagicMock() self.mturk_manager.handle_turker_timeout = mock.MagicMock() returned_act = self.turk_agent.act(timeout=0.07) self.mturk_manager.send_command.assert_called_once() self.mturk_manager.handle_turker_timeout.assert_called_once() self.assertEqual(returned_act['text'], self.turk_agent.TIMEOUT_MESSAGE)
def main(): """This task consists of an MTurk agent evaluating a chit-chat model. They are asked to chat to the model adopting a specific persona. After their conversation, they are asked to evaluate their partner on several metrics. """ argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-mt', '--max-turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument('--max-resp-time', default=240, type=int, help='time limit for entering a dialog message') argparser.add_argument('--max-persona-time', type=int, default=300, help='time limit for turker' 'entering the persona') argparser.add_argument('--ag-shutdown-time', default=120, type=int, help='time limit for entering a dialog message') argparser.add_argument('--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat') argparser.add_argument('--revised', default=False, type='bool', help='Whether to use revised personas') argparser.add_argument('-rt', '--range-turn', default='5,6', help='sample range of number of turns') argparser.add_argument('--auto-approve-delay', type=int, default=3600 * 24 * 1, help='how long to wait for auto approval') argparser.add_argument('--only-masters', type='bool', default=False, help='Set to True to use only master turks for this' + ' test eval, default is %(default)s') # ADD MODEL ARGS HERE, UNCOMMENT TO USE KVMEMNN MODEL AS AN EXAMPLE # argparser.set_defaults( # model='projects.personachat.kvmemnn.kvmemnn:Kvmemnn', # model_file='models:convai2/kvmemnn/model', # ) opt = argparser.parse_args() # add additional model args opt['override'] = { 'no_cuda': True, 'interactive_mode': True, 'tensorboard_log': False } bot = create_agent(opt) shared_bot_params = bot.share() print( '=== Actual bot opt === :\n {}'.format( '\n'.join(["[{}] : {}".format(k, v) for k, v in bot.opt.items()]) ) ) folder_name = ( 'master_{}_YOURCOMMENT__'.format(opt['only_masters']) + '__'.join(['{}_{}'.format(k, v) for k, v in opt['override'].items()]) ) # this is mturk task, not convai2 task from ParlAI opt['task'] = 'convai2:self' if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + folder_name opt.update(task_config) mturk_agent_ids = ['PERSON_1'] mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) persona_generator = PersonasGenerator(opt) mturk_manager.setup_server() try: mturk_manager.start_new_run() agent_qualifications = [] if opt['only_masters']: if opt['is_sandbox']: agent_qualifications.append(MASTER_QUALIF_SDBOX) else: agent_qualifications.append(MASTER_QUALIF) mturk_manager.create_hits(qualifications=agent_qualifications) if not opt['is_sandbox']: # ADD SOFT-BLOCKED WORKERS HERE # NOTE: blocking qual *must be* specified blocked_worker_list = [] for w in blocked_worker_list: print('Soft Blocking {}\n'.format(w)) mturk_manager.soft_block_worker(w) time.sleep(0.1) # do the sleep to prevent amazon query drop def run_onboard(worker): worker.persona_generator = persona_generator world = PersonaProfileWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[0] conv_idx = mturk_manager.conversation_index world = Convai2EvalWorld( opt=opt, agents=[agents], range_turn=[int(s) for s in opt['range_turn'].split(',')], max_turn=opt['max_turns'], max_resp_time=opt['max_resp_time'], model_agent_opt=shared_bot_params, world_tag='conversation t_{}'.format(conv_idx), agent_timeout_shutdown=opt['ag_shutdown_time'], ) world.reset_random() while not world.episode_done(): world.parley() world.save_data() world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
class TestAssignState(unittest.TestCase): """ Various unit tests for the AssignState class. """ def setUp(self): self.agent_state1 = AssignState() self.agent_state2 = AssignState(status=AssignState.STATUS_IN_TASK) argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() self.opt = argparser.parse_args(print_args=False) self.opt['task'] = 'unittest' self.opt['assignment_duration_in_seconds'] = 6 mturk_agent_ids = ['mturk_agent_1'] self.mturk_manager = MTurkManager(opt=self.opt, mturk_agent_ids=mturk_agent_ids) self.worker_manager = self.mturk_manager.worker_manager def tearDown(self): self.mturk_manager.shutdown() def test_assign_state_init(self): """ Test proper initialization of assignment states. """ self.assertEqual(self.agent_state1.status, AssignState.STATUS_NONE) self.assertEqual(len(self.agent_state1.messages), 0) self.assertEqual(len(self.agent_state1.message_ids), 0) self.assertIsNone(self.agent_state1.last_command) self.assertEqual(self.agent_state2.status, AssignState.STATUS_IN_TASK) self.assertEqual(len(self.agent_state1.messages), 0) self.assertEqual(len(self.agent_state1.message_ids), 0) self.assertIsNone(self.agent_state1.last_command) def test_message_management(self): """ Test message management in an AssignState. """ # Ensure message appends succeed and are idempotent self.agent_state1.append_message(MESSAGE_1) self.assertEqual(len(self.agent_state1.get_messages()), 1) self.agent_state1.append_message(MESSAGE_2) self.assertEqual(len(self.agent_state1.get_messages()), 2) self.agent_state1.append_message(MESSAGE_1) self.assertEqual(len(self.agent_state1.get_messages()), 2) self.assertEqual(len(self.agent_state2.get_messages()), 0) self.assertIn(MESSAGE_1, self.agent_state1.get_messages()) self.assertIn(MESSAGE_2, self.agent_state1.get_messages()) self.assertEqual(len(self.agent_state1.message_ids), 2) self.agent_state2.append_message(MESSAGE_1) self.assertEqual(len(self.agent_state2.message_ids), 1) # Ensure command interactions work as expected self.agent_state1.set_last_command(COMMAND_1) self.assertEqual(self.agent_state1.get_last_command(), COMMAND_1) # Ensure clearing messages acts as intended and doesn't clear agent2 self.agent_state1.clear_messages() self.assertEqual(len(self.agent_state1.messages), 0) self.assertEqual(len(self.agent_state1.message_ids), 0) self.assertIsNone(self.agent_state1.last_command) self.assertEqual(len(self.agent_state2.message_ids), 1) def test_state_handles_status(self): """ Ensures status updates and is_final are valid. """ for status in statuses: self.agent_state1.set_status(status) self.assertEqual(self.agent_state1.get_status(), status) for status in active_statuses: self.agent_state1.set_status(status) self.assertFalse(self.agent_state1.is_final()) for status in complete_statuses: self.agent_state1.set_status(status) self.assertTrue(self.agent_state1.is_final()) # TODO update the below once bonus is default for status in complete_statuses: self.agent_state1.set_status(status) text, command = self.agent_state1.get_inactive_command_text() self.assertIsNotNone(text) self.assertIsNotNone(command)