def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('--replay',
                           action='store_true',
                           help='Set to replay old interactions')
    argparser.add_argument('--replay-log-file',
                           type=str,
                           default='',
                           help='location of log to use if replay')
    argparser.add_argument('--real-time',
                           action='store_true',
                           help='Set to replay in real time ')
    argparser.add_argument('--replay-bot',
                           action='store_true',
                           help='Set to replay bot actions instead of human')
    argparser.add_argument('--model-file',
                           type=str,
                           default='',
                           help='language generator model file')
    argparser.add_argument('--world-idx',
                           type=int,
                           default=-1,
                           help='specify world to load')
    argparser.add_argument('--start-idx',
                           type=int,
                           default=0,
                           help='where to start replay, if replaying actions')
    argparser.add_argument('--bot-type',
                           type=str,
                           default='discrete',
                           choices=['discrete', 'natural'],
                           help='which bot log to use')
    opt = argparser.parse_args()
    opt.update(task_config)

    mturk_agent_1_id = 'Tourist'
    mturk_agent_2_id = 'Guide'
    mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id]
    task_directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(task_directory_path)
    opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)
    mturk_manager.setup_server(task_directory_path=task_directory_path)

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = InstructionWorld(opt=opt, mturk_agent=worker)
            while not world.episode_done():
                world.parley()
                world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        global worker_count
        worker_count = 0

        def assign_worker_roles(workers):
            workers[0].id = mturk_agent_ids[0]
            workers[1].id = mturk_agent_ids[1]
            return [workers[0], workers[1]]

        def run_conversation(mturk_manager, opt, workers):
            # Create mturk agents
            mturk_agent_1 = workers[0]
            mturk_agent_2 = workers[1]
            conv_idx = mturk_manager.conversation_index
            world = TalkTheWalkWorld(opt=opt,
                                     agents=[mturk_agent_1, mturk_agent_2],
                                     world_tag=conv_idx)

            while not world.episode_done():
                world.parley()
            world.shutdown()
            world.review_work()
            if not opt.get('replay'):
                world.save()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)
    except Exception:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 2
0
def main():
    """
        IGC Human Evaluation.

        Specify the `--eval-data-path` to load examples for evaluation.

        The data in `--eval-data-path` should be formatted as a dictionary
        mapping IGC image ids to dicts with the following fields:
        {
            'questions': list of (<generator_name>, <generated_question>) tuples,
            'responses': list of (<generator_name>, <generated_response>) tuples,
            'question': question to use when evaluating responses,
            'context': context for the image
        }

        If not data path specified, loads a demo_example specified in worlds.py

        Specify `--image-path` for the path to the IGC images, where each example
        is saved as <image_id>.jpg


        NOTE: You can download the IGC Test Set from
            https://www.microsoft.com/en-us/download/details.aspx?id=55324

        And you can use the `download_igc_images.py` script to download the images
        (please put the IGC_crowd_test.csv file in this directory to use the script)

    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t',
                           '--min_turns',
                           default=3,
                           type=int,
                           help='minimum number of turns')
    argparser.add_argument('-mt',
                           '--max_turns',
                           default=5,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument(
        '-mx_rsp_time',
        '--max_resp_time',
        default=1800,
        type=int,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument(
        '-mx_onb_time',
        '--max_onboard_time',
        type=int,
        default=300,
        help='time limit for turker'
        'in onboarding',
    )
    argparser.add_argument(
        '-ni',
        '--num_images',
        type=int,
        default=5,
        help='number of images to show \
                           to turker',
    )
    argparser.add_argument(
        '--auto-approve-delay',
        type=int,
        default=3600 * 24,
        help='how long to wait for  \
                           auto approval',
    )
    argparser.add_argument('--data-path',
                           type=str,
                           default='',
                           help='where to save data')
    argparser.add_argument(
        '--eval-data-path',
        type=str,
        default='',
        help='path to file with candidates to '
        'evaluate',
    )
    argparser.add_argument('--image-path',
                           type=str,
                           default='',
                           help='path to IGC images')
    argparser.add_argument(
        '-rnd',
        '--dialog-round',
        type=str,
        default='questions',
        choices=round_choices,
        help='which dialog round to show',
    )

    opt = argparser.parse_args()
    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)
    if 'data_path' not in opt or opt['data_path'] == '':
        opt['data_path'] = "{}/data/{}_evals".format(os.getcwd(),
                                                     opt['dialog_round'])
    opt['task_dir'] = os.getcwd()
    if opt['dialog_round'] == 'questions':
        opt.update(tc_questions)
    else:
        opt.update(tc_responses)

    mturk_agent_ids = [RATER]
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    example_generator = IGCExampleGenerator(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()

        def run_onboard(worker):
            worker.example_generator = example_generator
            world = RoleOnboardWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for w in workers:
                w.id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            conv_idx = mturk_manager.conversation_index
            world = MTurkIGCEvalWorld(
                opt,
                agents=agents,
                world_tag='conversation t_{}'.format(conv_idx))
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 3
0
def main():
    '''Handles setting up and running a ParlAI-MTurk task by instantiating
    an MTurk manager and configuring it for the qa_data_collection task
    '''
    # Get relevant arguments
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))

    # append the contents of task_config.py to the configuration
    opt.update(task_config)

    # Initialize a SQuAD teacher agent, which we will get context from
    module_name = 'parlai.tasks.squad.agents'
    class_name = 'DefaultTeacher'
    my_module = importlib.import_module(module_name)
    task_class = getattr(my_module, class_name)
    task_opt = opt.copy()
    task_opt['datatype'] = 'train'
    task_opt['datapath'] = opt['datapath']

    # Select an agent_id that worker agents will be assigned in their world
    mturk_agent_id = 'Worker'

    # Instantiate an MTurkManager with the given options and a maximum number
    # of agents per world of 1 (based on the length of mturk_agent_ids)
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=[mturk_agent_id],
        use_db=True,
    )
    mturk_manager.setup_server()

    # Create an onboard_function, which will be run for workers who have
    # accepted your task and must be completed before they are put in the
    # queue for a task world.
    def run_onboard(worker):
        world = QADataCollectionOnboardWorld(opt=opt, mturk_agent=worker)
        while not world.episode_done():
            world.parley()
        world.shutdown()
        return world.prep_save_data([worker])

    # If we want to use the above onboard function, we can replace the below
    # with set_onboard_function(onboard_function=run_onboard)
    mturk_manager.set_onboard_function(onboard_function=None)

    try:
        # Initialize run information
        mturk_manager.start_new_run()

        # Set up the sockets and threads to recieve workers
        mturk_manager.ready_to_accept_workers()

        # Create the hits as specified by command line arguments
        mturk_manager.create_hits()

        # Check workers eligiblity acts as a filter, and should return
        # the list of all workers currently eligible to work on the task
        def check_workers_eligibility(workers):
            return workers

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        # Assign worker roles is used to determine what the role each worker
        # in the given worker list will play. Setting `id` to None will return
        # the worker to the pool rather than putting them in a given task,
        # which is useful for having tasks with different possible worker
        # counts.
        def assign_worker_roles(workers):
            workers[0].id = mturk_agent_id

        # Define the task function, which will be run with workers that are
        # as the main task.
        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            # create a task agent to ask the questions
            task = task_class(task_opt)
            # Create the task world
            world = QADataCollectionWorld(opt=opt,
                                          task=task,
                                          mturk_agent=workers[0])
            # run the world to completion
            while not world.episode_done():
                world.parley()

            # shutdown and review the work
            world.shutdown()
            world.review_work()

            # Return the contents for saving
            return world.prep_save_data(workers)

        # Begin the task, allowing mturk_manager to start running the task
        # world on any workers who connect
        mturk_manager.start_task(eligibility_function=eligibility_function,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)
    except BaseException:
        raise
    finally:
        # Any hits that aren't claimed or completed have to be shut down. Must
        # keep the world running until that point.
        mturk_manager.expire_all_unassigned_hits()
        # Shutdown the manager and free all related resources
        mturk_manager.shutdown()
Ejemplo n.º 4
0
def main():
    """Handles setting up and running a ParlAI-MTurk task by instantiating
    an MTurk manager and configuring it for the qa_data_collection task
    """
    # Get relevant arguments
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))

    # append the contents of task_config.py to the configuration
    opt.update(task_config)

    # Select an agent_id that worker agents will be assigned in their world
    mturk_agent_roles = ['Asker', 'Answerer', 'Evaluator']

    # Instantiate an MTurkManager with the given options and a maximum number
    # of agents per world of 1 (based on the length of mturk_agent_ids)
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_roles,
        use_db=True,
    )
    mturk_manager.setup_server(
        task_directory_path=os.path.dirname(os.path.abspath(__file__)))

    role_index = 0

    # Create an onboard_function, which will be run for workers who have
    # accepted your task and must be completed before they are put in the
    # queue for a task world.
    def run_onboard(worker):
        nonlocal role_index
        role = mturk_agent_roles[role_index % 3]
        role_index += 1
        worker.update_agent_id('Onboarding {}'.format(role))
        worker.demo_role = role
        if role == 'Asker':
            world = AskerOnboardingWorld(opt=opt, mturk_agent=worker)
        elif role == 'Answerer':
            world = AnswererOnboardingWorld(opt=opt, mturk_agent=worker)
        else:
            world = EvaluatorOnboardingWorld(opt=opt, mturk_agent=worker)
        while not world.episode_done():
            world.parley()
        world.shutdown()
        return world.prep_save_data([worker])

    # If we want to use the above onboard function, we can replace the below
    # with set_onboard_function(onboard_function=run_onboard)
    mturk_manager.set_onboard_function(onboard_function=run_onboard)

    try:
        # Initialize run information
        mturk_manager.start_new_run()

        # Set up the sockets and threads to recieve workers
        mturk_manager.ready_to_accept_workers()

        # Create the hits as specified by command line arguments
        mturk_manager.create_hits()

        # Check workers eligiblity acts as a filter, and should return
        # the list of all workers currently eligible to work on the task
        # Can be used to pair workers that meet certain criterea
        def check_workers_eligibility(workers):
            filled_roles = []
            use_workers = []
            for worker in workers:
                if worker.demo_role not in filled_roles:
                    use_workers.append(worker)
                    filled_roles.append(worker.demo_role)
            return use_workers

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        # Assign worker roles is used to determine what the role each worker
        # in the given worker list will play. Setting `id` to None will return
        # the worker to the pool rather than putting them in a given task,
        # which is useful for having tasks with different possible worker
        # counts.
        def assign_worker_roles(workers):
            for worker in workers:
                worker.id = worker.demo_role

        # Define the task function, which will be run with workers that are
        # as the main task.
        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            # Create the task world
            world = MultiRoleAgentWorld(opt=opt, mturk_agents=workers)
            # run the world to completion
            while not world.episode_done():
                world.parley()

            # shutdown and review the work
            world.shutdown()
            world.review_work()

            # Return the contents for saving
            return world.prep_save_data(workers)

        # Begin the task, allowing mturk_manager to start running the task
        # world on any workers who connect
        mturk_manager.start_task(eligibility_function=eligibility_function,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)
    except BaseException:
        raise
    finally:
        # Any hits that aren't claimed or completed have to be shut down. Must
        # keep the world running until that point.
        mturk_manager.expire_all_unassigned_hits()
        # Shutdown the manager and free all related resources
        mturk_manager.shutdown()
Ejemplo n.º 5
0
    def setUp(self):
        # Mock functions that hit external APIs and such
        self.server_utils = MTurkManagerFile.server_utils
        self.mturk_utils = MTurkManagerFile.mturk_utils
        self.server_utils.setup_server = mock.MagicMock(
            return_value='https://127.0.0.1')
        self.server_utils.setup_legacy_server = mock.MagicMock(
            return_value='https://127.0.0.1')
        self.server_utils.delete_server = mock.MagicMock()
        self.mturk_utils.setup_aws_credentials = mock.MagicMock()
        self.mturk_utils.calculate_mturk_cost = mock.MagicMock(return_value=1)
        self.mturk_utils.check_mturk_balance = mock.MagicMock(
            return_value=True)
        self.mturk_utils.create_hit_config = mock.MagicMock()
        self.mturk_utils.setup_sns_topic = mock.MagicMock(
            return_value=TOPIC_ARN)
        self.mturk_utils.delete_sns_topic = mock.MagicMock()
        self.mturk_utils.delete_qualification = mock.MagicMock()
        self.mturk_utils.find_or_create_qualification = mock.MagicMock(
            return_value=QUALIFICATION_ID)
        self.mturk_utils.find_qualification = mock.MagicMock(
            return_value=QUALIFICATION_ID)
        self.mturk_utils.give_worker_qualification = mock.MagicMock()
        self.mturk_utils.remove_worker_qualification = mock.MagicMock()
        self.mturk_utils.create_hit_type = mock.MagicMock(
            return_value=HIT_TYPE_ID)
        self.mturk_utils.subscribe_to_hits = mock.MagicMock()
        self.mturk_utils.create_hit_with_hit_type = mock.MagicMock(
            return_value=(MTURK_PAGE_URL, FAKE_HIT_ID, 'MTURK_HIT_DATA'))
        self.mturk_utils.get_mturk_client = mock.MagicMock(
            return_value=mock.MagicMock())

        self.onboarding_agents = {}
        self.worlds_agents = {}

        # Set up an MTurk Manager and get it ready for accepting workers
        self.fake_socket = MockSocket()
        time.sleep(0.1)
        argparser = ParlaiParser(False, False)
        argparser.add_parlai_data_path()
        argparser.add_mturk_args()
        self.opt = argparser.parse_args(print_args=False)
        self.opt['task'] = 'unittest'
        self.opt['assignment_duration_in_seconds'] = 1
        self.opt['hit_title'] = 'test_hit_title'
        self.opt['hit_description'] = 'test_hit_description'
        self.opt['task_description'] = 'test_task_description'
        self.opt['hit_keywords'] = 'test_hit_keywords'
        self.opt['reward'] = 0.1
        self.opt['is_debug'] = True
        self.opt['log_level'] = 0
        self.opt['num_conversations'] = 1
        self.mturk_agent_ids = ['mturk_agent_1', 'mturk_agent_2']
        self.mturk_manager = MTurkManager(opt=self.opt,
                                          mturk_agent_ids=self.mturk_agent_ids,
                                          is_test=True)
        self.mturk_manager.port = self.fake_socket.port
        self.mturk_manager.setup_server()
        self.mturk_manager.start_new_run()
        self.mturk_manager.ready_to_accept_workers()
        self.mturk_manager.set_onboard_function(self.onboard_agent)
        self.mturk_manager.create_hits()

        def assign_worker_roles(workers):
            workers[0].id = 'mturk_agent_1'
            workers[1].id = 'mturk_agent_2'

        def run_task_wait():
            self.mturk_manager.start_task(lambda w: True, assign_worker_roles,
                                          self.run_conversation)

        self.task_thread = threading.Thread(target=run_task_wait)
        self.task_thread.start()

        self.agent_1 = MockAgent(TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_1,
                                 TEST_WORKER_ID_1, TASK_GROUP_ID_1)
        self.agent_1_2 = MockAgent(TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_3,
                                   TEST_WORKER_ID_1, TASK_GROUP_ID_1)
        self.agent_2 = MockAgent(TEST_HIT_ID_2, TEST_ASSIGNMENT_ID_2,
                                 TEST_WORKER_ID_2, TASK_GROUP_ID_1)
def main():
    completed_workers = []
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id])
    mturk_manager.setup_server()
    qual_name = 'ParlAIExcludeQual{}t{}'.format(
        random.randint(10000, 99999), random.randint(10000, 99999)
    )
    qual_desc = (
        'Qualification for a worker not correctly completing the '
        'first iteration of a task. Used to filter to different task pools.'
    )
    qualification_id = mturk_utils.find_or_create_qualification(
        qual_name, qual_desc, opt['is_sandbox']
    )
    print('Created qualification: ', qualification_id)

    def run_onboard(worker):
        world = QualificationFlowOnboardWorld(opt, worker)
        while not world.episode_done():
            world.parley()
        world.shutdown()

    mturk_manager.set_onboard_function(onboard_function=run_onboard)

    try:
        mturk_manager.start_new_run()
        agent_qualifications = [
            {
                'QualificationTypeId': qualification_id,
                'Comparator': 'DoesNotExist',
                'RequiredToPreview': True,
            }
        ]
        mturk_manager.create_hits(qualifications=agent_qualifications)

        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(worker):
            worker[0].id = mturk_agent_id

        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            mturk_agent = workers[0]
            world = QualificationFlowSoloWorld(
                opt=opt,
                mturk_agent=mturk_agent,
                qualification_id=qualification_id,
                firstTime=(mturk_agent.worker_id not in completed_workers),
            )
            while not world.episode_done():
                world.parley()
            completed_workers.append(mturk_agent.worker_id)
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )
    except BaseException:
        raise
    finally:
        mturk_utils.delete_qualification(qualification_id, opt['is_sandbox'])
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 7
0
def main():
    '''Main script for running an eval task against the LIGHT dataset.

    special CLI arguments are
      --light-eval-task-type [speech, emote, action]
      --light-eval-unseen [False, True]

    This launches a task that, on a workers first attempt pairs with an entry
    from the training set. Then based on if the worker performs above a
    specified benchmark, they will either be soft blocked from evaluating or
    allowed to try against the test set.
    '''
    # Get relevant arguments
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.set_defaults(datatype='test:stream')
    argparser.add_argument('--light-eval-task-type',
                           default='speech',
                           help='Type of task to be evaluating')
    argparser.add_argument(
        '--light-eval-unseen',
        default=False,
        type='bool',
        help='Evaluate against the unseen test rather than the seen test')
    opt = argparser.parse_args()

    task_opt = opt.copy()
    task_opt['task'] = 'light_dialog'
    assert opt['light_eval_task_type'] in [
        'speech', 'emote', 'action'
    ], ('--light-eval-task-type must be one of speech, emote, or action')
    LABEL_TYPE = opt['light_eval_task_type']  # speech, emote, action
    TRAIN_TURNS = 7
    TRAININGS = 1
    MAX_WRONG = 1
    if LABEL_TYPE != 'speech':
        TRAIN_TURNS = 3
        TRAININGS = 2
        MAX_WRONG = 3 if LABEL_TYPE == 'emote' else 2
    task_opt['light_label_type'] = LABEL_TYPE
    task_opt['light_use_action'] = 'all'
    task_opt['light_use_cands'] = '20'
    task_opt['light_use_emote'] = 'all'
    task_opt['light_use_objects'] = True
    task_opt['light_use_person_names'] = True
    task_opt['light_use_persona'] = 'self'
    task_opt['light_use_repeat'] = 'none'
    task_opt['light_use_setting'] = True
    task_opt['light_use_speech'] = 'all'
    task_opt['light_use_current_self_output'] = 'all'
    task_opt['light_use_clip_cands'] = 10000
    task_opt['light_unseen_test'] = task_opt['light_eval_unseen']

    random.seed(10)
    agent = RepeatLabelAgent(task_opt)
    world = create_task(task_opt, agent)

    # Populate dialogues from the LIGHT dataset
    samples = []
    curr_sample = []
    while True:
        world.parley()
        curr_sample.append(world.acts[0].copy())
        if world.acts[0]['episode_done']:
            if len(curr_sample) >= TRAIN_TURNS:
                samples.append(curr_sample)
            curr_sample = []
        if world.epoch_done():
            break

    train_samples = []
    task_opt['datatype'] = 'train:stream'
    task_opt['light_unseen_test'] = False
    agent = RepeatLabelAgent(task_opt)
    world = create_task(task_opt, agent)
    curr_sample = []
    while True:
        world.parley()
        curr_sample.append(world.acts[0].copy())
        if world.acts[0]['episode_done']:
            if len(curr_sample) >= TRAIN_TURNS:
                train_samples.append(curr_sample)
            curr_sample = []
        if world.epoch_done() or len(train_samples) > 2000:
            break

    # Set up temporary pools to pull tasks from
    use_train_samples = train_samples.copy()
    use_samples = train_samples.copy()

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))

    # append the contents of task_config.py to the configuration
    opt.update(task_config)

    # Select an agent_id that worker agents will be assigned in their world
    mturk_agent_roles = [LABEL_TYPE]

    opt['assignment_duration_in_seconds'] = 20 * 60

    # Instantiate an MTurkManager with the given options and a maximum number
    # of agents per world of 1 (based on the length of mturk_agent_ids)
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_roles,
        use_db=True,
    )
    mturk_manager.setup_server(
        task_directory_path=os.path.dirname(os.path.abspath(__file__)))

    # Create an onboard_function, which will be run for workers who have
    # accepted your task and must be completed before they are put in the
    # queue for a task world.
    completed_agents = []

    completed_train = {}

    def run_onboard(worker):
        nonlocal completed_agents
        if worker.worker_id in completed_agents:
            return
        else:
            world = LightEvalTestWorld(opt=opt, mturk_agent=worker)
            while not world.episode_done():
                world.parley()
            if world.did_complete:
                completed_agents.append(worker.worker_id)
            else:
                print(worker.worker_id, 'Failed the onboarding')
            world.shutdown()
            return world.prep_save_data([worker])

    mturk_manager.set_onboard_function(onboard_function=run_onboard)

    try:
        # Initialize run information
        mturk_manager.start_new_run()

        # Set up the sockets and threads to recieve workers
        mturk_manager.ready_to_accept_workers()

        # Create the hits as specified by command line arguments
        mturk_manager.create_hits(qualifications=[])

        # Check workers eligiblity acts as a filter, and should return
        # the list of all workers currently eligible to work on the task
        # Can be used to pair workers that meet certain criterea
        def check_workers_eligibility(workers):
            return workers

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        # Assign worker roles is used to determine what the role each worker
        # in the given worker list will play. Setting `id` to None will return
        # the worker to the pool rather than putting them in a given task,
        # which is useful for having tasks with different possible worker
        # counts.
        def assign_worker_roles(workers):
            workers[0].id = LABEL_TYPE

        # Define the task function, which will be run with workers that are
        # as the main task.
        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            nonlocal completed_train
            nonlocal use_samples
            nonlocal use_train_samples
            worker_id = workers[0].worker_id
            use_train = True
            if worker_id not in completed_train:
                completed_train[worker_id] = 0
            if completed_train[worker_id] >= TRAININGS:
                use_train = False

            # Create the real task world
            if not use_train:
                if len(use_samples) == 0:
                    # reset the pool if none are left
                    use_samples = samples.copy()
                sample = use_samples.pop()
            else:
                if len(use_train_samples) == 0:
                    # reset the pool if none are left
                    use_train_samples = train_samples.copy()
                sample = train_samples.pop()

            world = LightEvalTaskWorld(
                opt=opt,
                mturk_agents=workers,
                sample=sample,
                use_train=use_train,
                max_wrong=MAX_WRONG,
            )
            # run the world to completion
            while not world.episode_done():
                world.parley()

            # shutdown and review the work
            world.shutdown()
            world.review_work()

            if not world.completed and not use_train:
                samples.append(sample)
            if use_train and world.completed:
                completed_train[worker_id] += 1
                print('Worker passed train: ', worker_id)

            # Return the contents for saving
            return world.prep_save_data(workers)

        # Begin the task, allowing mturk_manager to start running the task
        # world on any workers who connect
        mturk_manager.start_task(eligibility_function=eligibility_function,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        print('Accepted agents:', repr(completed_agents))
        # Shutdown the manager and free all related resources
        mturk_manager.shutdown()
Ejemplo n.º 8
0
def main():
    """Handles setting up and running a ParlAI-MTurk task by instantiating
    an MTurk manager and configuring it for the context_evaluator task
    """
    # Get relevant arguments
    shared_utils.disable_logging()
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_context_evaluation_args()
    opt = argparser.parse_args(print_args=False)

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))

    # Append the contents of task_config.py to the configuration
    opt.update(task_configs['general'])
    opt.update(task_configs[opt['dataset']])
    opt.update(task_configs['sandbox' if opt['is_sandbox'] else 'live'])
    opt.update(task_configs[opt['prompt_type']])
    pprint(opt)
    if opt['evaluation_data_dir'] is not None:
        assert opt['dataset'] in opt[
            'evaluation_data_dir'], 'Dataset name must be in evaluation data dir name.'

    # Load data to evaluate
    evaluation_data = None
    if opt['evaluation_data_dir'] is not None:
        evaluation_data = {}
        for filename in os.listdir(opt['evaluation_data_dir']):
            with open(os.path.join(opt['evaluation_data_dir'],
                                   filename)) as json_file:
                evaluation_data[filename[:-5]] = json.load(json_file)

    # Track which evaluation splits have been / are being worked on
    global active_workers_per_incomplete_hit_by_split, active_workers_by_split, incomplete_hits_by_split
    # The values in these maps should always be non-negative
    active_workers_per_incomplete_hit_by_split, active_workers_by_split, incomplete_hits_by_split = {}, {}, {}
    for q_spl in range(opt['question_splits']):
        option_splits = opt['num_options'] if opt['prompt_type'] in {
            'quote and question'
        } else 1
        for o_spl in range(option_splits):
            active_workers_by_split[(q_spl, o_spl)] = 0
            incomplete_hits_by_split[(
                q_spl,
                o_spl)] = opt['num_conversations'] / (opt['question_splits'] *
                                                      option_splits)
            active_workers_per_incomplete_hit_by_split[(
                q_spl, o_spl)] = (active_workers_by_split[(q_spl, o_spl)] /
                                  incomplete_hits_by_split[(q_spl, o_spl)])

    # Track stats about how many people have passed onboarding
    global num_passed_agents, num_total_agents
    num_passed_agents, num_total_agents = 0, 0

    # Initialize a dataset agent, which we will get quote from
    task_class = getattr(
        importlib.import_module('parlai.tasks.' + opt['dataset'] + '.agents'),
        'IndexTeacher')
    task_opt = opt.copy()

    # Instantiate an MTurkManager with the given options and a maximum number
    # of agents per world of 1 (based on the length of mturk_agent_ids)
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=[opt['mturk_agent_id']],
        use_db=True,
    )
    mturk_manager.setup_server(
    )  # Can pass in os.path.dirname(os.path.abspath(__file__))

    # Create an onboard_function, which will be run for workers who have
    # accepted your task and must be completed before they are put in the
    # queue for a task world.
    def run_onboard(worker):
        world = ContextEvaluationOnboardWorld(opt=opt, mturk_agent=worker)
        while not world.episode_done():
            world.parley()
        if world.passed_test is not None:
            global num_passed_agents
            num_passed_agents += world.passed_test
            global num_total_agents
            num_total_agents += 1
            print('TEST PASS RATE:', num_passed_agents, '/', num_total_agents)
        world.shutdown()
        return world.prep_save_data([worker])

    # If we want to use the above onboard function, we can replace the below
    # with set_onboard_function(onboard_function=run_onboard) (onboard_function=None to skip)
    mturk_manager.set_onboard_function(onboard_function=run_onboard)

    try:
        # Initialize run information
        mturk_manager.start_new_run()

        # Set up the sockets and threads to recieve workers
        mturk_manager.ready_to_accept_workers()

        # Create the hits as specified by command line arguments
        mturk_manager.create_hits()

        # Check workers eligiblity acts as a filter, and should return
        # the list of all workers currently eligible to work on the task
        def check_workers_eligibility(workers):
            return workers

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        # Assign worker roles is used to determine what the role each worker
        # in the given worker list will play. Setting `id` to None will return
        # the worker to the pool rather than putting them in a given task,
        # which is useful for having tasks with different possible worker
        # counts.
        def assign_worker_roles(workers):
            workers[0].id = task_opt['mturk_agent_id']

        # Define the task function, which will be run with workers that are
        # as the main task.
        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            # create a task agent to ask the questions
            q_spl, o_spl = min(
                active_workers_per_incomplete_hit_by_split,
                key=active_workers_per_incomplete_hit_by_split.get)
            active_workers_by_split[(q_spl, o_spl)] += 1
            active_workers_per_incomplete_hit_by_split[(
                q_spl, o_spl)] = (active_workers_by_split[(q_spl, o_spl)] /
                                  incomplete_hits_by_split[(q_spl, o_spl)])
            task_opt['question_split_no'] = q_spl
            task_opt['option_split_no'] = o_spl
            opt['question_split_no'] = q_spl
            opt['option_split_no'] = o_spl
            print('Worker starting...')
            print('active_workers_by_split:', active_workers_by_split)
            print('incomplete_hits_by_split:', incomplete_hits_by_split)
            print('active_workers_per_incomplete_hit_by_split:',
                  active_workers_per_incomplete_hit_by_split)

            task = task_class(task_opt)
            # Create the task world
            world = ContextEvaluationWorld(opt=opt,
                                           task=task,
                                           mturk_agent=workers[0],
                                           evaluation_data=evaluation_data)
            # run the world to completion
            while not world.episode_done():
                world.parley()

            # shutdown and review the work
            world.shutdown()
            world.review_work()

            active_workers_by_split[(q_spl, o_spl)] = max(
                0, active_workers_by_split[(q_spl, o_spl)] - 1)
            if world.hit_done and (len(world.reject_reasons) == 0):
                incomplete_hits_by_split[(q_spl, o_spl)] = max(
                    0, incomplete_hits_by_split[(q_spl, o_spl)] - 1)
            active_workers_per_incomplete_hit_by_split[(q_spl, o_spl)] = (
                float('inf') if incomplete_hits_by_split[(q_spl, o_spl)] <= 0
                else active_workers_by_split[(q_spl, o_spl)] /
                incomplete_hits_by_split[(q_spl, o_spl)])
            print('Worker finishing...')
            print('active_workers_by_split:', active_workers_by_split)
            print('incomplete_hits_by_split:', incomplete_hits_by_split)
            print('active_workers_per_incomplete_hit_by_split:',
                  active_workers_per_incomplete_hit_by_split)
            if max(list(incomplete_hits_by_split.values())) <= 0:
                print('********** COMPLETED HITS! **********',
                      str(datetime.datetime.now()))
                mturk_manager.completed_conversations = mturk_manager.num_conversations  # Signal no more HITs needed

            # Return the contents for saving
            return world.prep_save_data(workers)

        # Begin the task, allowing mturk_manager to start running the task
        # world on any workers who connect
        mturk_manager.start_task(eligibility_function=eligibility_function,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)
    except BaseException:
        raise
    finally:
        # Any hits that aren't claimed or completed have to be shut down. Must
        # keep the world running until that point.
        mturk_manager.expire_all_unassigned_hits()
        # Shutdown the manager and free all related resources
        mturk_manager.shutdown()
Ejemplo n.º 9
0
def main():
    """
        Image Chat data collection task.

        A worker is shown an image and part of a conversation, and is given a
        personality with which the worker should continue the conversation.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t',
                           '--min_turns',
                           default=3,
                           type=int,
                           help='minimum number of turns')
    argparser.add_argument('-mt',
                           '--max_turns',
                           default=5,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('-mx_rsp_time',
                           '--max_resp_time',
                           default=1800,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('-mx_onb_time',
                           '--max_onboard_time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'in onboarding')
    argparser.add_argument('-ni',
                           '--num_images',
                           type=int,
                           default=10,
                           help='number of images to show \
                           to turker')
    argparser.add_argument('--auto-approve-delay',
                           type=int,
                           default=3600 * 24 * 5,
                           help='how long to wait for  \
                           auto approval')
    argparser.add_argument('--second-response',
                           type='bool',
                           default=False,
                           help='Specify if getting responses \
                           to responses to original comment')
    ImageChatTeacher.add_cmdline_args(argparser)

    opt = argparser.parse_args()
    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)
    if 'data_path' not in opt:
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    opt.update(config_second if opt['second_response'] else config_first)

    mturk_agent_ids = [RESPONDER]
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    personality_generator = PersonalityGenerator(opt)
    example_generator = ExampleGenerator(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()
        mturk_manager.ready_to_accept_workers()

        def run_onboard(worker):
            worker.personality_generator = personality_generator
            worker.example_generator = example_generator
            world = RoleOnboardWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.create_hits()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for w in workers:
                w.id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            conv_idx = mturk_manager.conversation_index
            world = MTurkImageChatWorld(
                opt,
                agents=agents,
                world_tag='conversation t_{}'.format(conv_idx),
            )
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 10
0
def main():
    """This task consists of one agent, model or MTurk worker, talking to an
    MTurk worker to negotiate a deal.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t', '--min_turns', default=5, type=int,
                           help='minimum number of turns')
    argparser.add_argument('-mt', '--max_turns', default=10, type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=150,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--ag_shutdown_time', default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--persona-type', default='both', type=str,
                           choices=['both', 'self', 'other'],
                           help='Which personas to load from personachat')
    opt = argparser.parse_args()

    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)

    opt['extract_personas_path'] = os.path.join(opt['datapath'], opt['task'])
    opt.update(task_config)

    mturk_agent_ids = ['PERSON_1']

    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_ids
    )

    mturk_manager.setup_server(task_directory_path=directory_path)

    personas_generator = PersonasGenerator(opt)
    opt['personas_generator'] = personas_generator

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        if not opt['is_sandbox']:
            # ADD BLOCKED WORKERS HERE
            blocked_worker_list = []
            for w in blocked_worker_list:
                mturk_manager.block_worker(w, 'We found that you have unexpected behaviors in our previous HITs. For more questions please email us.')

        def run_onboard(worker):
            pass

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            worker = workers[0]
            world = RephrasePersonaWorld(opt, worker)
            while not world.episode_done():
                world.parley()
            world.save_data()
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 11
0
def main():
    """Handles setting up and running a ParlAI-MTurk task by instantiating
    an MTurk manager and configuring it for the qa_data_collection task
    """
    # Get relevant arguments
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt['assignment_duration_in_seconds'] = 10000

    # append the contents of task_config.py to the configuration
    opt.update(task_config)

    # Initialize a teacher agent, which we will get premises from
    module_name = 'parlai.tasks.squad2.agents'
    class_name = 'DefaultTeacher'
    my_module = importlib.import_module(module_name)
    task_class = getattr(my_module, class_name)
    task_opt = opt.copy()
    task_opt['datatype'] = 'train'
    task_opt['datapath'] = opt['datapath']
    assert (
        task_opt['num_workers'] % 2 == 0 and task_opt['num_workers'] >= 4
    ), "The game only supports even number of workers, with a minimum of 4 people."

    # Select an agent_id that worker agents will be assigned in their world
    persons = {}
    mturk_agent_roles = []
    for i in range(1, task_opt['num_workers'] + 1):
        persons[i] = 'Person ' + str(i)
        mturk_agent_roles.append(persons[i])

    # Instantiate an MTurkManager with the given options and a maximum number
    # of agents per world of 1 (based on the length of mturk_agent_ids)
    mturk_manager = MTurkManager(opt=opt,
                                 mturk_agent_ids=mturk_agent_roles,
                                 use_db=True)

    mturk_manager.setup_server(
        task_directory_path=os.path.dirname(os.path.abspath(__file__)))

    onboard_role_index = 0
    role_index = 0

    # Create an onboard_function, which will be run for workers who have
    # accepted your task and must be completed before they are put in the
    # queue for a task world.
    onboarded_workers = []

    def run_onboard(worker):
        nonlocal onboard_role_index
        role = "Onboard_role" + str(
            onboard_role_index
        )  # mturk_agent_roles[role_index % len(mturk_agent_roles)]
        onboard_role_index += 1
        worker.update_agent_id('Onboarding {}'.format(role))
        worker.onboard_demo_role = role
        world = OnboardingWorld(opt=opt, mturk_agent=worker)
        if worker.worker_id not in onboarded_workers:
            while not world.episode_done():
                onboard_test = world.parley()
            if onboard_test['episode_done'] == False:
                onboarded_workers.append(worker.worker_id)
        else:
            onboard_test = {'episode_done': False}
        print('Workers onboarded:', onboarded_workers)
        world.shutdown()
        return world.prep_save_data([worker]), onboard_test['episode_done']

    # If we want to use the above onboard function, we can replace the below
    # with set_onboard_function(onboard_function=run_onboard)
    mturk_manager.set_onboard_function(onboard_function=run_onboard)

    try:
        # Initialize run information
        mturk_manager.start_new_run()

        # Set up the sockets and threads to recieve workers
        mturk_manager.ready_to_accept_workers()

        # Create the hits as specified by command line arguments
        qualifications = []
        qualifications.append({
            'QualificationTypeId': '00000000000000000040',
            'Comparator': 'GreaterThan',
            'IntegerValues': [5000],
            'ActionsGuarded': 'PreviewAndAccept',
        })
        # PreviewAndAccept
        qualifications.append({
            'QualificationTypeId': '000000000000000000L0',
            'Comparator': 'GreaterThan',
            'IntegerValues': [98],
            'ActionsGuarded': 'PreviewAndAccept',
        })
        mturk_manager.create_hits(qualifications)

        # Check workers eligiblity acts as a filter, and should return
        # the list of all workers currently eligible to work on the task
        # Can be used to pair workers that meet certain criterea
        def check_workers_eligibility(workers):
            filled_roles = []
            use_workers = []
            for worker in workers:
                if worker.onboard_demo_role not in filled_roles:
                    use_workers.append(worker)
                    filled_roles.append(worker.onboard_demo_role)
            return use_workers

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True
        }

        # Assign worker roles is used to determine what the role each worker
        # in the given worker list will play. Setting `id` to None will return
        # the worker to the pool rather than putting them in a given task,
        # which is useful for having tasks with different possible worker
        # counts.
        def assign_worker_roles(workers):
            nonlocal role_index
            for worker in workers:
                role = mturk_agent_roles[role_index % len(mturk_agent_roles)]
                role_index += 1
                worker.update_agent_id('Onboarding {}'.format(role))
                worker.demo_role = role

                worker.id = worker.demo_role

        # Define the task function, which will be run with workers that are
        # as the main task.
        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            # Create a task agent to get prompts from SQuAD 2.0
            task = task_class(task_opt)
            # Create the task world
            world = MultiRoleAgentWorld(opt=opt,
                                        task=task,
                                        mturk_agents=workers)
            # run the world to completion
            while not world.episode_done():
                world.parley()

            # shutdown and review the work
            world.shutdown()
            world.review_work()

            # Return the contents for saving
            return world.prep_save_data(workers)

        # Begin the task, allowing mturk_manager to start running the task
        # world on any workers who connect
        mturk_manager.start_task(
            eligibility_function=eligibility_function,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )
    except BaseException:
        raise
    finally:
        # Any hits that aren't claimed or completed have to be shut down. Must
        # keep the world running until that point.
        mturk_manager.expire_all_unassigned_hits()
        # Shutdown the manager and free all related resources
        mturk_manager.shutdown()
Ejemplo n.º 12
0
Archivo: run.py Proyecto: zhf459/ParlAI
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    # Initialize a SQuAD teacher agent, which we will get context from
    module_name = 'parlai.tasks.squad.agents'
    class_name = 'DefaultTeacher'
    my_module = importlib.import_module(module_name)
    task_class = getattr(my_module, class_name)
    task_opt = {}
    task_opt['datatype'] = 'train'
    task_opt['datapath'] = opt['datapath']

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id])
    mturk_manager.setup_server()

    def run_onboard(worker):
        world = QADataCollectionOnboardWorld(opt=opt, mturk_agent=worker)
        while not world.episode_done():
            world.parley()
        world.shutdown()

    mturk_manager.set_onboard_function(onboard_function=None)

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def get_worker_role(worker):
            return mturk_agent_id

        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            task = task_class(task_opt)
            mturk_agent = workers[0]
            world = QADataCollectionWorld(opt=opt,
                                          task=task,
                                          mturk_agent=mturk_agent)
            while not world.episode_done():
                world.parley()
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 role_function=get_worker_role,
                                 task_function=run_conversation)
    except:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 13
0
def run_task(override_opt):
    """
    This task consists of an MTurk worker talking to a model and MTurker also evaluates
    each utterance of the bot for various buckets (see constants).
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-num_t',
                           '--num_turns',
                           default=6,
                           type=int,
                           help='minimum number of turns')
    argparser.add_argument(
        '--task-model-parallel',
        default=True,
        type=bool,
        help='Whether to load models to be used with model_parallel True.',
    )
    argparser.add_argument(
        '--auto-approve-delay',
        dest='auto_approve_delay',
        type=int,
        default=3600 * 24 * 5,
        help='how long to wait for auto approval',
    )
    argparser.add_argument(
        '--max-resp-time',
        type=int,
        default=180,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument(
        '--max-onboard-time',
        type=int,
        default=300,
        help='time limit accepting onboarding',
    )
    argparser.add_argument(
        '--base-save-folder',
        default=None,
        type=str,
        help='base folder for saving all crowdsourcing results',
    )
    argparser.add_argument(
        '--base-model-folder',
        default=None,
        type=str,
        help='base folder for loading model files from',
    )
    argparser.add_argument(
        '--onboard-worker-answer-folder',
        default=None,
        type=str,
        help=
        'base folder for saving all worker answer results during onboarding',
    )

    argparser.set_params(**override_opt)
    opt = argparser.parse_args()

    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)

    opt['left_pane_text'] = LEFT_PANE_TEXT
    opt.update(TASK_CONFIG)

    # NOTE: you have to set all three of these opts to enforce the MTurk core
    # param max_hits_per_worker.
    #  - Without unique_qual_name, MTurkManager creates different qualification
    #    for each run (so a worker could do N hits per run) Also, the
    #    worker has to get to N HITs in at least one run or they won't be given
    #    the qualification.
    #  - allowed_conversations is like max concurrent conversations
    #    allowed_conversations needs to be 1 or the actual max would be N +
    #    allowed_conversations. Worker gets notified via frontend message that
    #    they aren't eligible (second description screen), UNLESS the frontend
    #    overwrites that functionality.
    # There's also still a race condition where the worker might be able to open
    # 1 extra task
    opt['unique_qual_name'] = 'turn_annotations_max_submissions'
    opt['max_hits_per_worker'] = 10
    opt['allowed_conversations'] = 3

    # Limits the number of models that can generate at once
    MAX_CONCURRENT_RESPONSES = 1
    semaphore = threading.Semaphore(MAX_CONCURRENT_RESPONSES)

    run_statistics = copy.deepcopy(opt['conversations_needed'])
    run_statistics = {r: 0 for (r, v) in run_statistics.items()}
    onboard_statistics = {}

    save_folder = 'sandbox' if opt['is_sandbox'] else 'live'
    opt['save_folder'] = os.path.join(opt['base_save_folder'], save_folder,
                                      time.strftime("%Y_%m_%d"))
    os.makedirs(opt['save_folder'], exist_ok=True)

    print(
        f'Going to start collecting {opt["num_conversations"]} conversations, max_hits_per_worker: {opt["max_hits_per_worker"]}, reward: {opt["reward"]}, is_sandbox: {opt["is_sandbox"]}.'
    )

    # Create the models before it launches Heroku backend b/c takes a while
    models_needed = list(opt['conversations_needed'].keys())
    active_models = [
        m for m in models_needed if opt['conversations_needed'][m] > 0
    ]
    shared_bot_agents = TurkLikeAgent.get_bot_agents(opt,
                                                     active_models,
                                                     datapath=opt['datapath'])

    mturk_agent_ids = [AGENT_0]
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = TurnAnnotationsOnboardWorld(opt, worker)
            status = world.parley()
            if status not in onboard_statistics:
                onboard_statistics[status] = 0
            onboard_statistics[status] += 1
            print(
                f'After onboard world parley. About to shutdown onboard world for {worker.worker_id}, status was: {status}. Total onboard statistics for this run are: {onboard_statistics}.'
            )
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            workers[0].id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            remaining_counts_needed = [
                (m, c - run_statistics[m])
                for (m, c) in opt['conversations_needed'].items()
            ]
            remaining_counts_needed.sort(reverse=True, key=lambda x: x[1])
            model_name = remaining_counts_needed[0][0]
            print(
                f'Remaining conversation counts needed: {remaining_counts_needed}'
            )

            # Get a bot and add it to the list of "workers"
            print(f'Choosing the "{model_name}" model for the bot.')
            agent = create_agent_from_shared(shared_bot_agents[model_name])
            bot_worker = TurkLikeAgent(
                opt,
                model_name=model_name,
                model_agent=agent,
                num_turns=opt['num_turns'],
                semaphore=semaphore,
            )
            workers_including_bot = workers + [bot_worker]

            assert len(workers_including_bot) == 2

            conv_idx = mturk_manager.conversation_index
            world = TurnAnnotationsChatWorld(
                opt=opt,
                agents=workers_including_bot,
                num_turns=opt['num_turns'],
                max_resp_time=opt['max_resp_time'],
                tag='conversation t_{}'.format(conv_idx),
                annotations_config=ANNOTATIONS_CONFIG,
            )
            while not world.episode_done():
                print('About to parley')
                world.parley()
            model_nickname, convo_finished = world.save_data()

            if convo_finished:
                run_statistics[model_nickname] += 1

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 14
0
def main():
    """This task consists of an MTurk agent evaluating a wizard model. They
    are assigned a topic and asked to chat.
    """
    start_time = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M')
    argparser = ParlaiParser(False, add_model_args=True)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-mt',
                           '--max-turns',
                           default=10,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument(
        '--max-resp-time',
        default=240,
        type=int,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument(
        '--max-choice-time',
        type=int,
        default=300,
        help='time limit for turker'
        'choosing the topic',
    )
    argparser.add_argument(
        '--ag-shutdown-time',
        default=120,
        type=int,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument('-rt',
                           '--range-turn',
                           default='3,5',
                           help='sample range of number of turns')
    argparser.add_argument(
        '--human-eval',
        type='bool',
        default=False,
        help='human vs human eval, no models involved',
    )
    argparser.add_argument(
        '--auto-approve-delay',
        type=int,
        default=3600 * 24 * 1,
        help='how long to wait for auto approval',
    )
    argparser.add_argument(
        '--only-masters',
        type='bool',
        default=False,
        help='Set to true to use only master turks for '
        'this test eval',
    )
    argparser.add_argument(
        '--unique-workers',
        type='bool',
        default=False,
        help='Each worker must be unique',
    )
    argparser.add_argument('--mturk-log',
                           type=str,
                           default='data/mturklogs/{}.log'.format(start_time))

    def inject_override(opt, override_dict):
        opt['override'] = override_dict
        for k, v in override_dict.items():
            opt[k] = v

    def get_logger(opt):
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)

        fmt = logging.Formatter('%(asctime)s: [ %(message)s ]',
                                '%m/%d/%Y %I:%M:%S %p')
        console = logging.StreamHandler()
        console.setFormatter(fmt)
        logger.addHandler(console)
        if 'mturk_log' in opt:
            logfile = logging.FileHandler(opt['mturk_log'], 'a')
            logfile.setFormatter(fmt)
            logger.addHandler(logfile)
        logger.info('COMMAND: %s' % ' '.join(sys.argv))
        logger.info('-' * 100)
        logger.info('CONFIG:\n%s' % json.dumps(opt, indent=4, sort_keys=True))

        return logger

    # MODEL CONFIG
    # NOTE: please edit this to test your own models
    config = {
        'model':
        'projects:wizard_of_wikipedia:interactive_retrieval',
        'retriever_model_file':
        'models:wikipedia_full/tfidf_retriever/model',
        'responder_model_file':
        'models:wizard_of_wikipedia/full_dialogue_retrieval_model/model',
    }

    argparser.add_model_subargs(config['model'])  # add model args to opt
    start_opt = argparser.parse_args()

    inject_override(start_opt, config)

    if not start_opt.get('human_eval'):
        bot = create_agent(start_opt)
        shared_bot_params = bot.share()
    else:
        shared_bot_params = None

    if not start_opt['human_eval']:
        get_logger(bot.opt)
    else:
        get_logger(start_opt)

    if start_opt['human_eval']:
        folder_name = 'human_eval-{}'.format(start_time)
    else:
        folder_name = '{}-{}'.format(start_opt['model'], start_time)

    start_opt['task'] = os.path.basename(
        os.path.dirname(os.path.abspath(__file__)))
    if 'data_path' not in start_opt:
        start_opt['data_path'] = os.path.join(os.getcwd(), 'data',
                                              'wizard_eval', folder_name)
    start_opt.update(task_config)

    if not start_opt.get('human_eval'):
        mturk_agent_ids = ['PERSON_1']
    else:
        mturk_agent_ids = ['PERSON_1', 'PERSON_2']

    mturk_manager = MTurkManager(opt=start_opt,
                                 mturk_agent_ids=mturk_agent_ids)

    topics_generator = TopicsGenerator(start_opt)
    directory_path = os.path.dirname(os.path.abspath(__file__))
    mturk_manager.setup_server(task_directory_path=directory_path)
    worker_roles = {}
    connect_counter = AttrDict(value=0)

    try:
        mturk_manager.start_new_run()
        agent_qualifications = []
        if not start_opt['is_sandbox']:
            # assign qualifications
            if start_opt['only_masters']:
                agent_qualifications.append(MASTER_QUALIF)
            if start_opt['unique_workers']:
                qual_name = 'UniqueChatEval'
                qual_desc = (
                    'Qualification to ensure each worker completes a maximum '
                    'of one of these chat/eval HITs')
                qualification_id = mturk_utils.find_or_create_qualification(
                    qual_name, qual_desc, False)
                print('Created qualification: ', qualification_id)
                UNIQUE_QUALIF = {
                    'QualificationTypeId': qualification_id,
                    'Comparator': 'DoesNotExist',
                    'RequiredToPreview': True,
                }
                start_opt['unique_qualif_id'] = qualification_id
                agent_qualifications.append(UNIQUE_QUALIF)
        mturk_manager.create_hits(qualifications=agent_qualifications)

        def run_onboard(worker):
            if start_opt['human_eval']:
                role = mturk_agent_ids[connect_counter.value %
                                       len(mturk_agent_ids)]
                connect_counter.value += 1
                worker_roles[worker.worker_id] = role
            else:
                role = 'PERSON_1'
            worker.topics_generator = topics_generator
            world = TopicChooseWorld(start_opt, worker, role=role)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_single_worker_eligibility(worker):
            return True

        def check_multiple_workers_eligibility(workers):
            valid_workers = {}
            for worker in workers:
                worker_id = worker.worker_id
                if worker_id not in worker_roles:
                    print('Something went wrong')
                    continue
                role = worker_roles[worker_id]
                if role not in valid_workers:
                    valid_workers[role] = worker
                if len(valid_workers) == 2:
                    break
            return valid_workers.values() if len(valid_workers) == 2 else []

        if not start_opt['human_eval']:
            eligibility_function = {
                'func': check_single_worker_eligibility,
                'multiple': False,
            }
        else:
            eligibility_function = {
                'func': check_multiple_workers_eligibility,
                'multiple': True,
            }

        def assign_worker_roles(workers):
            if start_opt['human_eval']:
                for worker in workers:
                    worker.id = worker_roles[worker.worker_id]
            else:
                for index, worker in enumerate(workers):
                    worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            conv_idx = mturk_manager.conversation_index
            world = WizardEval(
                opt=start_opt,
                agents=workers,
                range_turn=[
                    int(s) for s in start_opt['range_turn'].split(',')
                ],
                max_turn=start_opt['max_turns'],
                max_resp_time=start_opt['max_resp_time'],
                model_agent_opt=shared_bot_params,
                world_tag='conversation t_{}'.format(conv_idx),
                agent_timeout_shutdown=opt['ag_shutdown_time'],
            )
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            gc.collect()

        mturk_manager.start_task(
            eligibility_function=eligibility_function,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 15
0
def main():
    """
        Personality-Captions Data Collection Task.

        This is the task setup used when collecting the Personality-Captions
        dataset (https://arxiv.org/abs/1810.10665).
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    PersonalityCaptionsTeacher.add_cmdline_args(argparser)
    argparser.add_argument(
        '-ni',
        '--num_images',
        type=int,
        default=10,
        help='number of images to show \
                           to turker',
    )
    argparser.add_argument(
        '-mx_rsp_time',
        '--max_resp_time',
        default=1800,
        type=int,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument(
        '-mx_onb_time',
        '--max_onboard_time',
        type=int,
        default=300,
        help='time limit for turker'
        'in onboarding',
    )
    argparser.add_argument(
        '--auto-approve-delay',
        type=int,
        default=3600 * 24 * 5,
        help='how long to wait for  \
                           auto approval',
    )
    argparser.add_argument(
        '--multiple-personality',
        type='bool',
        default=False,
        help='for getting captions with '
        'multiple personalities for same image',
    )
    argparser.add_argument(
        '--task-type',
        type=str,
        default='personality',
        choices=['personality', 'no_personality', 'caption'],
        help='Task Type - specify `personality` for '
        'original task, `no_personality` for the same task '
        'instructions but with no personality, and '
        '`caption` for the task but asking for a normal '
        'caption.',
    )

    opt = argparser.parse_args()

    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)
    if 'data_path' not in opt:
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    opt.update(TASK_TYPE_TO_CONFIG[opt['task_type']])
    build_pc_data(opt)
    mturk_agent_ids = [COMMENTER]
    mturk_manager = MTurkManager(opt=opt,
                                 mturk_agent_ids=mturk_agent_ids,
                                 use_db=True)
    personality_generator = PersonalityGenerator(opt)
    image_generator = ImageGenerator(opt)
    personality_and_image_generator = PersonalityAndImageGenerator(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()

        def run_onboard(worker):
            worker.personality_generator = personality_generator
            worker.image_generator = image_generator
            worker.personality_and_image_generator = personality_and_image_generator
            world = RoleOnboardWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for w in workers:
                w.id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            conv_idx = mturk_manager.conversation_index
            world = MTurkPersonalityCaptionsWorld(
                opt,
                agents=agents,
                world_tag='conversation t_{}'.format(conv_idx))
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 16
0
def main():
    """This task consists of an MTurk agent evaluating a chit-chat model. They
    are asked to chat to the model adopting a specific persona. After their
    conversation, they are asked to evaluate their partner on several metrics.
    """
    argparser = ParlaiParser(False, add_model_args=True)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-mt',
                           '--max-turns',
                           default=10,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('--max-resp-time',
                           default=180,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--max-persona-time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'entering the persona')
    argparser.add_argument('--ag-shutdown-time',
                           default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--persona-type',
                           default='both',
                           type=str,
                           choices=['both', 'self', 'other'],
                           help='Which personas to load from personachat')
    argparser.add_argument('--revised',
                           default=False,
                           type='bool',
                           help='Whether to use revised personas')
    argparser.add_argument('-rt',
                           '--range-turn',
                           default='5,6',
                           help='sample range of number of turns')
    argparser.add_argument('--auto-approve-delay',
                           type=int,
                           default=3600 * 24 * 1,
                           help='how long to wait for  \
                           auto approval')

    # ADD MODEL ARGS HERE (KVMEMNN ADDED AS AN EXAMPLE)
    argparser.set_defaults(
        model='projects.personachat.kvmemnn.kvmemnn:Kvmemnn',
        model_file='models:convai2/kvmemnn/model',
    )
    opt = argparser.parse_args()

    # add additional model args
    opt['no_cuda'] = True
    opt['override'] = ['interactive_mode']
    opt['interactive_mode'] = True

    bot = create_agent(opt)
    shared_bot_params = bot.share()

    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    if 'data_path' not in opt:
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    opt.update(task_config)

    mturk_agent_ids = ['PERSON_1']

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    persona_generator = PersonasGenerator(opt)
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        if not opt['is_sandbox']:
            # ADD BLOCKED WORKERS HERE
            blocked_worker_list = []
            for w in blocked_worker_list:
                mturk_manager.block_worker(
                    w, 'We found that you have unexpected behaviors in our \
                     previous HITs. For more questions please email us.')

        def run_onboard(worker):
            worker.persona_generator = persona_generator
            world = PersonaProfileWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[0]
            conv_idx = mturk_manager.conversation_index
            world = Convai2EvalWorld(
                opt=opt,
                agents=[agents],
                range_turn=[int(s) for s in opt['range_turn'].split(',')],
                max_turn=opt['max_turns'],
                max_resp_time=opt['max_resp_time'],
                model_agent_opt=shared_bot_params,
                world_tag='conversation t_{}'.format(conv_idx),
                agent_timeout_shutdown=opt['ag_shutdown_time'],
            )
            world.reset_random()
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 17
0
def run_task(override_opt: Optional[dict] = None):
    """
    This task consists of an MTurk worker talking to a model and MTurker also evaluates
    each utterance of the bot for various buckets (see constants).
    """

    config_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 'task_config')
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    default_task_folder = os.path.join(argparser.parlai_home, 'data',
                                       'turn_annotations')
    argparser.add_mturk_args()
    argparser.add_argument('-num_t',
                           '--num_turns',
                           default=6,
                           type=int,
                           help='minimum number of turns')
    argparser.add_argument(
        '--conversations-needed',
        dest='conversations_needed_string',
        default=None,
        type=str,
        help=
        'Number of convos needed for each model. For example: "modelA:50,modelB:20"',
    )
    argparser.add_argument(
        '--task-model-parallel',
        default=True,
        type=bool,
        help='Whether to load models to be used with model_parallel True.',
    )
    argparser.add_argument(
        '--auto-approve-delay',
        dest='auto_approve_delay',
        type=int,
        default=3600 * 24 * 5,
        help='how long to wait for auto approval',
    )
    argparser.add_argument(
        '--max-resp-time',
        type=int,
        default=180,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument(
        '--max-onboard-time',
        type=int,
        default=300,
        help='time limit accepting onboarding',
    )
    argparser.add_argument(
        '--base-save-folder',
        default=default_task_folder,
        type=str,
        help='base folder for saving all crowdsourcing results',
    )
    argparser.add_argument(
        '--base-model-folder',
        default=None,
        type=str,
        help='base folder for loading model files from',
    )
    argparser.add_argument(
        '--onboard-worker-answer-folder',
        default=os.path.join(default_task_folder, 'onboard_answers'),
        type=str,
        help=
        'base folder for saving all worker answer results during onboarding',
    )
    argparser.add_argument(
        '--worker-blocklist-paths',
        default=None,
        type=str,
        help=
        'Path(s) to a list of IDs of workers to soft-block, separated by newlines. Use commas to indicate multiple lists',
    )
    argparser.add_argument(
        '--check-acceptability',
        default=False,
        type=bool,
        help=
        "Check worker's responses against several metrics of acceptability",
    )
    argparser.add_argument('--include-persona',
                           default=False,
                           type=bool,
                           help="Show persona to the bot")
    argparser.add_argument(
        '--conversation-start-mode',
        default='hi',
        type=str,
        choices=['hi', 'bst'],
        help=
        'Whether to show "Hi!" or two previous utterances (as in BlendedSkillTalk) at the beginning of the conversation',
    )
    argparser.add_argument(
        '--context-seed',
        default=None,
        type=int,
        help="Set seed for pulling the context info (for testing)",
    )
    argparser.add_argument(
        '--hit-config-path',
        default=os.path.join(config_folder, 'hit_config.json'),
        type=str,
        help=
        'Path to file of parameters describing how MTurk will describe the HIT to the workers',
    )
    argparser.add_argument(
        '--task-description-path',
        default=os.path.join(config_folder, 'task_description.html'),
        type=str,
        help='Path to file of HTML to show on the task-description page',
    )
    argparser.add_argument(
        '--left-pane-text-path',
        default=os.path.join(config_folder, 'left_pane_text.html'),
        type=str,
        help=
        'Path to file of HTML to show on the left-hand pane of the chat window',
    )
    argparser.add_argument(
        '--annotations-intro',
        default=
        'Does this comment from your partner have any of the following attributes? (Check all that apply)',
        type=str,
        help='Text shown to worker before they fill out annotation form',
    )
    argparser.add_argument(
        '--annotations-config-path',
        default=os.path.join(config_folder, 'annotations_config.json'),
        type=str,
        help='Path to JSON of annotation categories',
    )
    argparser.add_argument(
        '--onboard-task-data-path',
        default=os.path.join(config_folder, 'onboard_task_data.json'),
        type=str,
        help='Path to JSON containing settings for running onboarding',
    )
    argparser.add_argument(
        '--final-rating-question',
        default='Please rate your partner on a scale of 1-5.',
        type=str,
        help='Text to show when asking worker to make their final rating',
    )

    # NOTE: you have to set all three of these opts to enforce the MTurk core
    # param max_hits_per_worker.
    #  - Without unique_qual_name, MTurkManager creates different qualification
    #    for each run (so a worker could do N hits per run) Also, the
    #    worker has to get to N HITs in at least one run or they won't be given
    #    the qualification.
    #  - allowed_conversations is like max concurrent conversations
    #    allowed_conversations needs to be 1 or the actual max would be N +
    #    allowed_conversations. Worker gets notified via frontend message that
    #    they aren't eligible (second description screen), UNLESS the frontend
    #    overwrites that functionality.
    # There's also still a race condition where the worker might be able to open
    # 1 extra task
    argparser.set_defaults(
        unique_qual_name='turn_annotations_max_submissions',
        max_hits_per_worker=10,
        allowed_conversations=3,
    )

    if override_opt is not None:
        argparser.set_params(**override_opt)
        opt = argparser.parse_args([])
    else:
        opt = argparser.parse_args()
    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)

    # Set the number of conversations needed
    if opt.get('conversations_needed_string') is not None:
        parts = opt['conversations_needed_string'].split(',')
        conversations_needed = {}
        for part in parts:
            model_name, num_string = part.split(':')
            conversations_needed[model_name] = int(num_string)
        opt['conversations_needed'] = conversations_needed

    # Read in workers to soft-block
    if opt.get('worker_blocklist_paths') is not None:
        blocklist_paths = opt['worker_blocklist_paths'].split(',')
        worker_blocklist = set()
        for path in blocklist_paths:
            with open(path) as f:
                worker_blocklist |= set(f.read().strip().split('\n'))
        opt['worker_blocklist'] = worker_blocklist

    # Read in and define text shown to users
    if opt.get('hit_config') is None:
        with open(opt['hit_config_path']) as f:
            opt['hit_config'] = json.load(f)
        opt.update(opt['hit_config'])
        # Add all of the settings in hit_config into the base opt
    if opt.get('task_description') is None:
        with open(opt['task_description_path']) as f:
            opt['task_description'] = f.readlines()
    if opt.get('left_pane_text') is None:
        with open(opt['left_pane_text_path']) as f:
            opt['left_pane_text'] = f.readlines()
    if opt.get('annotations_config') is None:
        with open(opt['annotations_config_path']) as f:
            opt['annotations_config'] = json.load(f)
    if opt.get('onboard_task_data') is None:
        with open(opt['onboard_task_data_path']) as f:
            opt['onboard_task_data'] = json.load(f)

    # Limits the number of models that can generate at once
    max_concurrent_responses = 1
    semaphore = threading.Semaphore(max_concurrent_responses)

    run_statistics = copy.deepcopy(opt['conversations_needed'])
    run_statistics = {r: 0 for (r, v) in run_statistics.items()}
    onboard_statistics = {}

    save_folder = 'sandbox' if opt['is_sandbox'] else 'live'
    opt['save_folder'] = os.path.join(opt['base_save_folder'], save_folder,
                                      time.strftime("%Y_%m_%d"))
    os.makedirs(opt['save_folder'], exist_ok=True)

    print(
        f'Going to start collecting {opt["num_conversations"]} conversations, max_hits_per_worker: {opt["max_hits_per_worker"]}, reward: {opt["reward"]}, is_sandbox: {opt["is_sandbox"]}.'
    )

    # Create the models before it launches Heroku backend b/c takes a while
    models_needed = list(opt['conversations_needed'].keys())
    active_models = [
        m for m in models_needed if opt['conversations_needed'][m] > 0
    ]
    shared_bot_agents = TurkLikeAgent.get_bot_agents(opt, active_models)

    mturk_agent_ids = [AGENT_0]
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)
    mturk_manager.setup_server(task_directory_path=directory_path)

    if opt['include_persona'] or opt['conversation_start_mode'] == 'bst':
        context_generator = ContextGenerator(opt, datatype='test', seed=0)
        # We pull from the test set so that the model can't regurgitate
        # memorized conversations
    else:
        context_generator = None

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        if not opt['is_sandbox']:
            # Soft-block all chosen workers
            if len(opt['worker_blocklist']) > 0:
                print(
                    f"About to soft-block {len(opt['worker_blocklist'])} workers."
                )
                for w in set(opt['worker_blocklist']):
                    try:
                        print('Soft Blocking {}\n'.format(w))
                        mturk_manager.soft_block_worker(w)
                    except Exception as e:
                        print(f'Did not soft block worker {w}: {e}')
                    time.sleep(0.1)
            else:
                print(
                    'WARNING: We are in live mode, but a list of workers to soft-block '
                    'has not been passed in.')

        def run_onboard(worker):
            world = TurnAnnotationsOnboardWorld(opt, worker)
            status = world.parley()
            if status not in onboard_statistics:
                onboard_statistics[status] = 0
            onboard_statistics[status] += 1
            print(
                f'After onboard world parley. About to shutdown onboard world for {worker.worker_id}, status was: {status}. Total onboard statistics for this run are: {onboard_statistics}.'
            )
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            workers[0].id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            remaining_counts_needed = [
                (m, c - run_statistics[m])
                for (m, c) in opt['conversations_needed'].items()
            ]
            remaining_counts_needed.sort(reverse=True, key=lambda x: x[1])
            model_name = remaining_counts_needed[0][0]
            print(
                f'Remaining conversation counts needed: {remaining_counts_needed}'
            )

            # Get a bot and add it to the list of "workers"
            print(f'Choosing the "{model_name}" model for the bot.')
            agent = create_agent_from_shared(shared_bot_agents[model_name])
            bot_worker = TurkLikeAgent(
                opt,
                model_name=model_name,
                model_agent=agent,
                num_turns=opt['num_turns'],
                semaphore=semaphore,
            )
            workers_including_bot = workers + [bot_worker]

            assert len(workers_including_bot) == 2

            # Get context: personas, previous utterances, etc.
            if context_generator is not None:
                context_info = context_generator.get_context()
            else:
                context_info = None

            conv_idx = mturk_manager.conversation_index
            world = TurnAnnotationsChatWorld(
                opt=opt,
                agents=workers_including_bot,
                num_turns=opt['num_turns'],
                max_resp_time=opt['max_resp_time'],
                tag='conversation t_{}'.format(conv_idx),
                context_info=context_info,
            )
            while not world.episode_done():
                print('About to parley')
                world.parley()
            model_nickname, worker_is_unacceptable, convo_finished = world.save_data(
            )
            if worker_is_unacceptable:
                print(f'Soft-blocking worker {workers[0].worker_id}')
                mturk_manager.soft_block_worker(workers[0].worker_id)
                time.sleep(0.1)
            if not worker_is_unacceptable and convo_finished:
                run_statistics[model_nickname] += 1

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
def main():
    """
    This task consists of one agent, model or MTurk worker, talking to an MTurk worker
    to negotiate a deal.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument(
        '--two_mturk_agents',
        dest='two_mturk_agents',
        action='store_true',
        help='data collection mode '
        'with converations between two MTurk agents',
    )

    opt = argparser.parse_args()
    opt['task'] = 'dealnodeal'
    opt['datatype'] = 'valid'
    opt.update(task_config)

    local_agent_1_id = 'local_1'
    mturk_agent_ids = ['mturk_agent_1']
    if opt['two_mturk_agents']:
        mturk_agent_ids.append('mturk_agent_2')

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        mturk_manager.set_onboard_function(onboard_function=None)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]

            # Create a local agent
            if not opt['two_mturk_agents']:
                if 'model' in opt:
                    local_agent = create_agent(opt)
                else:
                    local_agent = LocalHumanAgent(opt=None)

                local_agent.id = local_agent_1_id
                agents.append(local_agent)

            opt["batchindex"] = mturk_manager.started_conversations

            world = MTurkDealNoDealDialogWorld(opt=opt, agents=agents)

            while not world.episode_done():
                world.parley()

            world.shutdown()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 19
0
def main():
    """
    This task consists of one local human agent and two MTurk agents,
    each MTurk agent will go through the onboarding step to provide
    information about themselves, before being put into a conversation.
    You can end the conversation by sending a message ending with
    `[DONE]` from human_1.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    mturk_agent_1_id = 'mturk_agent_1'
    mturk_agent_2_id = 'mturk_agent_2'
    human_agent_1_id = 'human_1'
    mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id]
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = MTurkMultiAgentDialogOnboardWorld(opt=opt,
                                                      mturk_agent=worker)
            while not world.episode_done():
                world.parley()
            world.shutdown()

        # You can set onboard_function to None to skip onboarding
        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        eligibility_function = {
            'func': check_worker_eligibility,
            'multiple': False,
        }

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            # Create mturk agents
            mturk_agent_1 = workers[0]
            mturk_agent_2 = workers[1]

            # Create the local human agents
            human_agent_1 = LocalHumanAgent(opt=None)
            human_agent_1.id = human_agent_1_id

            world = MTurkMultiAgentDialogWorld(
                opt=opt, agents=[human_agent_1, mturk_agent_1, mturk_agent_2])

            while not world.episode_done():
                world.parley()

            world.shutdown()

        mturk_manager.start_task(eligibility_function=eligibility_function,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 20
0
def main():
    """
        Human Evaluation of various image captions/comments.

        A turker is shown an image and two possible comments/captions, and
        optionally the personality used to create these captions. Then, the
        turker is asked to choose which caption they think is more engaging.

        In this example, we will just be comparing the original comment twice
        (this is just to demonstrate the task for future use).

        To use your own data, please specify `--eval-data-path` to an
        appropriate json file with a list of examples, where each example
        has the following structure:
            {
                'image_hash': <hash of image>,
                'personality': <personality, if applicable>,
                '<compare_key_1>': <first option to compare>,
                '<compare_key_2>': <second option to compare>,
                .
                .
                .
            }
        Note that compare_key_1 and compare_key_2 can be any field, as long as they
        map to a string comment/caption.

        Example Scenario:
            Suppose you have the original Personality-Captions dataset, and
            you would like to compare the outputs of your model called `model`.

            Your data may look like the following:
            [{
                'image_hash': hashforimageofcat,
                'personality': 'Sweet',
                'comment': 'Look at the cute cat!', # Human Comment
                'model_comment': 'That's a weird looking dog' # Model Comment
            }, ...]

            Thus, you would specify `-ck1 comment -ck2 model_comment` to evaluate
            the outputs of the model vs. the human comments from Personality-Captions
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-mx_rsp_time',
                           '--max_resp_time',
                           default=1800,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('-mx_onb_time',
                           '--max_onboard_time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'in onboarding')
    argparser.add_argument('-ni',
                           '--num_images',
                           type=int,
                           default=10,
                           help='number of images to show \
                           to turker')
    argparser.add_argument('--data-path',
                           type=str,
                           default='',
                           help='where to save data')
    argparser.add_argument('--eval-data-path',
                           type=str,
                           default='',
                           help='where to load data to rank from. Leave '
                           'blank to use Personality-Captions data')
    argparser.add_argument('-ck1',
                           '--compare-key-1',
                           type=str,
                           default='comment',
                           help='key of first option to compare')
    argparser.add_argument('-ck2',
                           '--compare-key-2',
                           type=str,
                           default='comment',
                           help='key of second option to compare')
    argparser.add_argument('--show-personality',
                           default=True,
                           type='bool',
                           help='whether to show the personality')
    PersonalityCaptionsTeacher.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)
    if 'data_path' not in opt or opt['data_path'] == '':
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    if opt.get('eval_data_path') == '':
        opt['eval_data_path'] = os.path.join(
            opt['datapath'], 'personality_captions/train.json')
    opt.update(task_config)

    mturk_agent_ids = [CHOOSER]
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    example_generator = ExampleGenerator(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()

        def run_onboard(worker):
            worker.example_generator = example_generator
            world = RoleOnboardWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for w in workers:
                w.id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            conv_idx = mturk_manager.conversation_index
            world = MTurkPersonalityCaptionsStackRankWorld(
                opt,
                agents=agents,
                world_tag='conversation t_{}'.format(conv_idx),
            )
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 21
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()

    # The dialog model we want to evaluate
    from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent

    IrBaselineAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    bot = create_agent(
        {'model_file': '/export/a14/dlewis/cont_train_gen/model.checkpoint'})

    # The task that we will evaluate the dialog model on
    task_opt = {}
    task_opt['datatype'] = 'test'
    task_opt['datapath'] = opt['datapath']
    task_opt['task'] = '#MovieDD-Reddit'

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id])
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = ModelEvaluatorOnboardWorld(opt=opt, mturk_agent=worker)
            while not world.episode_done():
                world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(worker):
            worker[0].id = mturk_agent_id

        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            mturk_agent = workers[0]

            model_agent = IrBaselineAgent(opt=opt)
            model_agent = bot
            world = ModelEvaluatorWorld(
                opt=opt,
                model_agent=model_agent,
                task_opt=task_opt,
                mturk_agent=mturk_agent,
            )

            while not world.episode_done():
                world.parley()
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )
    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 22
0
def main():
    """This task consists of one agent, model or MTurk worker, talking to an
    MTurk worker to negotiate a deal.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t',
                           '--min_turns',
                           default=5,
                           type=int,
                           help='minimum number of turns')
    argparser.add_argument('-mt',
                           '--max_turns',
                           default=10,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('-mx_rsp_time',
                           '--max_resp_time',
                           default=150,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('-mx_psn_time',
                           '--max_persona_time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'entering the persona')
    argparser.add_argument('--ag_shutdown_time',
                           default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--persona-type',
                           default='both',
                           type=str,
                           choices=['both', 'self', 'other'],
                           help='Which personas to load from personachat')
    argparser.add_argument('--revised',
                           default=False,
                           type='bool',
                           help='Whether to use revised personas')
    argparser.add_argument('-rt',
                           '--range_turn',
                           default='5,7',
                           help='sample range of number of turns')
    argparser.add_argument('--personas-path',
                           default=None,
                           help='specify path for personas data')
    opt = argparser.parse_args()

    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)

    if not opt.get('personas_path'):
        opt['personas_path'] = argparser.parlai_home + '/parlai/mturk/personachat_chat/data'

    opt.update(task_config)

    opt['extract_personas_path'] = os.path.join(opt['datapath'],
                                                'personachat_chat')

    mturk_agent_ids = ['PERSON_1', 'PERSON_2']

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    persona_generator = PersonasGenerator(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        if not opt['is_sandbox']:
            blocked_worker_list = []
            for w in blocked_worker_list:
                mturk_manager.block_worker(
                    w,
                    'We found that you have unexpected behaviors in our previous HITs. For more questions please email us.'
                )

        def run_onboard(worker):
            worker.persona_generator = persona_generator
            world = PersonaProfileWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = [workers[0], workers[1]]
            conv_idx = mturk_manager.conversation_index
            world = PersonaChatWorld(
                opt=opt,
                agents=agents,
                range_turn=[int(s) for s in opt['range_turn'].split(',')],
                max_turn=opt['max_turns'],
                max_resp_time=opt['max_resp_time'],
                world_tag='conversation t_{}'.format(conv_idx))
            world.reset_random()
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
def main():
    """This task consists of an MTurk agent evaluating a Controllable Dialog model.
    """
    start_time = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M')
    argparser = ParlaiParser(False, add_model_args=True)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('--max-resp-time',
                           default=240,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--max-choice-time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'choosing the topic')
    argparser.add_argument('--ag-shutdown-time',
                           default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--num-turns',
                           default=6,
                           type=int,
                           help='number of turns of dialogue')
    argparser.add_argument('--human-eval',
                           type='bool',
                           default=False,
                           help='human vs human eval, no models involved')
    argparser.add_argument('--auto-approve-delay',
                           type=int,
                           default=3600 * 24 * 2,
                           help='how long to wait for auto approval')
    argparser.add_argument('--only-masters',
                           type='bool',
                           default=False,
                           help='Set to true to use only master turks for '
                           'this test eval')
    argparser.add_argument('--create-model-qualif',
                           type='bool',
                           default=True,
                           help='Create model qualif so unique eval between'
                           'models.')
    argparser.add_argument('--limit-workers',
                           type=int,
                           default=len(SETTINGS_TO_RUN),
                           help='max HITs a worker can complete')
    argparser.add_argument(
        '--mturk-log',
        type=str,
        default=('$HOME/ParlAI/data/mturklogs/controllable/{}.log'.format(
            start_time)))
    argparser.add_argument('--short-eval',
                           type='bool',
                           default=True,
                           help='Only ask engagingness question and persona'
                           'question.')
    # persona specific arguments
    argparser.add_argument('--persona-type',
                           type=str,
                           default='self',
                           choices=['self', 'other', 'none'])
    argparser.add_argument('--persona-datatype',
                           type=str,
                           default='valid',
                           choices=['train', 'test', 'valid'])
    argparser.add_argument('--max-persona-time',
                           type=int,
                           default=360,
                           help='max time to view persona')

    def get_logger(opt):
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)

        fmt = logging.Formatter('%(asctime)s: [ %(message)s ]',
                                '%m/%d/%Y %I:%M:%S %p')
        console = logging.StreamHandler()
        console.setFormatter(fmt)
        logger.addHandler(console)
        if 'mturk_log' in opt:
            logfn = opt['mturk_log'].replace('$HOME', os.environ['HOME'])
            if not os.path.isdir(os.path.dirname(logfn)):
                raise OSError("Please run `mkdir -p {}`".format(
                    os.path.dirname(logfn)))
            logfile = logging.FileHandler(logfn, 'a')
            logfile.setFormatter(fmt)
            logger.addHandler(logfile)
        logger.info('COMMAND: %s' % ' '.join(sys.argv))
        logger.info('-' * 100)
        logger.info('CONFIG:\n%s' % json.dumps(opt, indent=4, sort_keys=True))

        return logger

    start_opt = argparser.parse_args()

    task_config['task_description'] = task_config['task_description'].format(
        start_opt['reward'])

    # set options
    start_opt['limit_workers'] = len(SETTINGS_TO_RUN)
    start_opt['allowed_conversations'] = 1
    start_opt['max_hits_per_worker'] = start_opt['limit_workers']
    start_opt['task'] = os.path.basename(
        os.path.dirname(os.path.abspath(__file__)))

    start_opt.update(task_config)

    get_logger(start_opt)

    model_share_params = {}
    worker_models_seen = {}
    model_opts = {}
    model_counts = {}

    lock = Lock()

    for setup in SETTINGS_TO_RUN:
        assert 'human' not in setup
        model_counts[setup] = 0
        agent_config = getattr(mcf, setup)
        combined_config = copy.deepcopy(start_opt)
        for k, v in agent_config.items():
            combined_config[k] = v
            combined_config['override'][k] = v
        folder_name = '{}-{}'.format(setup, start_time)
        combined_config['save_data_path'] = os.path.join(
            start_opt['datapath'], 'local_controllable_dialogue', folder_name)
        model_opts[setup] = combined_config
        bot = create_agent(combined_config, True)
        model_share_params[setup] = bot.share()

    if not start_opt.get('human_eval'):
        mturk_agent_ids = ['PERSON_1']
    else:
        mturk_agent_ids = ['PERSON_1', 'PERSON_2']

    mturk_manager = MTurkManager(opt=start_opt,
                                 mturk_agent_ids=mturk_agent_ids)

    personas_generator = PersonasGenerator(start_opt)

    directory_path = os.path.dirname(os.path.abspath(__file__))

    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()
        agent_qualifications = []
        # assign qualifications
        if start_opt['create_model_qualif']:
            qual_name = 'ControlEvalRound2'
            qual_desc = (
                'Qualification to ensure workers complete only a certain'
                'number of these HITs')
            qualification_id = mturk_utils.find_or_create_qualification(
                qual_name, qual_desc, False)
            print('Created qualification: ', qualification_id)
            start_opt['unique_qualif_id'] = qualification_id

        def run_onboard(worker):
            worker.personas_generator = personas_generator
            world = PersonaAssignWorld(start_opt, worker)
            world.parley()
            world.shutdown()

        def check_worker_eligibility(worker):
            worker_id = worker.worker_id
            lock.acquire()
            retval = len(worker_models_seen.get(worker_id,
                                                [])) < len(SETTINGS_TO_RUN)
            lock.release()
            return retval

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits(qualifications=agent_qualifications)

        def run_conversation(mturk_manager, opt, workers):
            conv_idx = mturk_manager.conversation_index

            # gotta find a bot this worker hasn't seen yet
            assert len(workers) == 1
            worker_id = workers[0].worker_id
            lock.acquire()
            if worker_id not in worker_models_seen:
                worker_models_seen[worker_id] = set()
            print("MODELCOUNTS:")
            print(pprint.pformat(model_counts))
            logging.info("MODELCOUNTS\n" + pprint.pformat(model_counts))
            model_options = [
                (model_counts[setup_name] + 10 * random.random(), setup_name)
                for setup_name in SETTINGS_TO_RUN
                if setup_name not in worker_models_seen[worker_id]
            ]
            if not model_options:
                lock.release()
                logging.error(
                    "Worker {} already finished all settings! Returning none".
                    format(worker_id))
                return None
            _, model_choice = min(model_options)

            worker_models_seen[worker_id].add(model_choice)
            model_counts[model_choice] += 1
            lock.release()

            world = ControllableDialogEval(
                opt=model_opts[model_choice],
                agents=workers,
                num_turns=start_opt['num_turns'],
                max_resp_time=start_opt['max_resp_time'],
                model_agent_opt=model_share_params[model_choice],
                world_tag='conversation t_{}'.format(conv_idx),
                agent_timeout_shutdown=opt['ag_shutdown_time'],
                model_config=model_choice,
            )
            world.reset_random()
            while not world.episode_done():
                world.parley()
            world.save_data()

            lock.acquire()
            if not world.convo_finished:
                model_counts[model_choice] -= 1
                worker_models_seen[worker_id].remove(model_choice)
            lock.release()

            world.shutdown()
            gc.collect()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 24
0
def main():
    """This task consists of an MTurk agent evaluating a chit-chat model. They
    are asked to chat to the model adopting a specific persona. After their
    conversation, they are asked to evaluate their partner on several metrics.
    """
    argparser = ParlaiParser(False, add_model_args=True)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-mt',
                           '--max-turns',
                           default=10,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('--max-resp-time',
                           default=240,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--max-persona-time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'entering the persona')
    argparser.add_argument('--ag-shutdown-time',
                           default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--persona-type',
                           default='both',
                           type=str,
                           choices=['both', 'self', 'other'],
                           help='Which personas to load from personachat')
    argparser.add_argument('--revised',
                           default=False,
                           type='bool',
                           help='Whether to use revised personas')
    argparser.add_argument('-rt',
                           '--range-turn',
                           default='5,6',
                           help='sample range of number of turns')
    argparser.add_argument('--auto-approve-delay',
                           type=int,
                           default=3600 * 24 * 1,
                           help='how long to wait for  \
                           auto approval')
    argparser.add_argument(
        '--only-masters',
        type='bool',
        default=False,
        help=
        'Set to True to use only master turks for this test eval, default is %(default)s'
    )

    # ADD MODEL ARGS HERE, UNCOMMENT TO USE KVMEMNET MODEL AS AN EXAMPLE
    # argparser.set_defaults(
    # model='projects.personachat.kvmemnn.kvmemnn:Kvmemnn',
    # model_file='models:convai2/kvmemnn/model',
    # )

    opt = argparser.parse_args()

    # add additional model args
    opt['override'] = {
        'no_cuda': True,
        'interactive_mode': True,
        'tensorboard_log': False
    }

    bot = create_agent(opt)
    shared_bot_params = bot.share()
    print('=== Actual bot opt === :\n {}'.format('\n'.join(
        ["[{}] : {}".format(k, v) for k, v in bot.opt.items()])))
    folder_name = 'master_{}_YOURCOMMENT__'.format(
        opt['only_masters']) + '__'.join(
            ['{}_{}'.format(k, v) for k, v in opt['override'].items()])

    #  this is mturk task, not convai2 task from ParlAI
    opt['task'] = 'convai2:self'
    if 'data_path' not in opt:
        opt['data_path'] = os.getcwd() + '/data/' + folder_name
    opt.update(task_config)

    mturk_agent_ids = ['PERSON_1']

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    persona_generator = PersonasGenerator(opt)
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        agent_qualifications = []
        if opt['only_masters'] is True:
            if opt['is_sandbox']:
                agent_qualifications.append(MASTER_QUALIF_SDBOX)
            else:
                agent_qualifications.append(MASTER_QUALIF)
        mturk_manager.create_hits(qualifications=agent_qualifications)

        if not opt['is_sandbox']:
            # ADD BLOCKED WORKERS HERE, This is Soft blocking! blocking qual *must be* specified
            blocked_worker_list = []
            for w in blocked_worker_list:
                print('Soft Blocking {}\n'.format(w))
                mturk_manager.soft_block_worker(w)
                time.sleep(0.1)  # do the sleep to prevent amazon query drop

        def run_onboard(worker):
            worker.persona_generator = persona_generator
            world = PersonaProfileWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[0]
            conv_idx = mturk_manager.conversation_index
            world = Convai2EvalWorld(
                opt=opt,
                agents=[agents],
                range_turn=[int(s) for s in opt['range_turn'].split(',')],
                max_turn=opt['max_turns'],
                max_resp_time=opt['max_resp_time'],
                model_agent_opt=shared_bot_params,
                world_tag='conversation t_{}'.format(conv_idx),
                agent_timeout_shutdown=opt['ag_shutdown_time'],
            )
            world.reset_random()
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 25
0
def main(opt):
    setup_aws_credentials()
    if opt['no_sandbox']:
        db_file, all_runs_dir = PATHS['live']
        opt['is_sandbox'] = False
    else:
        db_file, all_runs_dir = PATHS['sandbox']
    assert os.path.exists(db_file), f"DB file {db_file} doesn't exist!"
    assert os.path.isdir(
        all_runs_dir), f"run directory {all_runs_dir} doesn't exist!"
    db = MTurkDataHandler(file_name=db_file)
    mturk_manager = MTurkManager(opt, [])
    client = mturk_utils.get_mturk_client(not opt['no_sandbox'])

    # Get run IDs
    if opt['run_ids'] is None:
        run_ids = list(os.listdir(all_runs_dir))
        run2worker = defaultdict(lambda: dict())
        worker2run = defaultdict(lambda: dict())
        for run_id in run_ids:
            run_dir = os.path.join(all_runs_dir, run_id)
            hits = os.listdir(run_dir)
            for hit in hits:
                # t_*/workers/{WORKER_ID}.json
                resps = os.listdir(f"{run_dir}/{hit}/workers/")
                assert len(resps) == 1, "More than one response found!"
                worker_id = resps[0].split('.')[0]
                worker_data = json.load(
                    open(os.path.join(run_dir, hit, "workers", resps[0])))
                run2worker[run_id][worker_id] = worker_data
                worker2run[worker_id][run_id] = worker_data

    else:
        run_ids = opt['run_ids'].split(',')

    def get_all_hits():
        """ """
        all_hits = []
        resp = client.list_hits()
        all_hits.append(resp['HITs'])
        while 'NextToken' in resp and resp['NextToken']:
            resp = client.list_hits(NextToken=resp['NextToken'])
            all_hits += resp['HITs']
            time.sleep(0.5)
        return all_hits

    def get_run_id_data(run_ids):
        """ """
        print(f"Found following run IDs: ")
        n_hits = 0
        run_data = list()
        for run_id in run_ids:
            run_datum = db.get_run_data(run_id)
            run_data.append((run_id, run_datum))
        run_data.sort(key=lambda x: x[1]['launch_time'])
        for run_id, run_datum in run_data:
            start_time = datetime.fromtimestamp(run_datum['launch_time'])
            hits = db.get_pairings_for_run(run_id)
            n_hits += len(hits)
            print(f"{run_id} {len(hits)} HITS, started {start_time}")
        print(f"Total {n_hits} HITS over {len(run_ids)} runs")

    def approve_run_hits(run_id):
        """ """
        to_approve = []
        n_to_approve, n_approved = 0, 0
        hits = db.get_pairings_for_run(run_id)
        data = []
        for hit in hits:
            if hit['conversation_id'] is None:
                continue
            try:
                full_data = db.get_full_conversation_data(
                    run_id, hit['conversation_id'], False)
            except FileNotFoundError:
                continue

            datum = next(iter(full_data['worker_data'].values()))
            if datum['response']['text'] in BAD_RESPONSES:
                continue
            n_to_approve += 1
            to_approve.append(datum['assignment_id'])
            data.append(datum)
            print(f"To approve: {datum['assignment_id']}")

        print(f"Run ID {run_id}: to approve {n_to_approve} HITs")
        conf = input("Confirm? (y/n): ")
        if conf == "y":
            didnt_approve = list()
            for asgn_id in to_approve:
                try:
                    mturk_manager.approve_work(asgn_id)
                    n_approved += 1
                    print(f"Approved {asgn_id}")
                except:
                    didnt_approve.append(asgn_id)
                    print(f"Failed to approve: {asgn_id}")
            print(f"\tApproved {n_approved} HITs")
            if didnt_approve:
                print(
                    f"\tFailed to approve assignments {','.join(didnt_approve)}"
                )
        else:
            print("\tCancelled approvals")

    def approve_assignment(asgn_id):
        """ """
        conf = input(f"Confirm approving assignment {asgn_id}? (y/n): ")
        if conf == "y":
            try:
                mturk_manager.approve_work(asgn_id, override_rejection=True)
                print(f"\tSuccessfully approved!")
            except:
                print(f"\tFailed to approve.")

        else:
            print("\tCancelled approvals.")

    def award_from_file(bonus_file, msg):
        awards = [r.split(',') for r in open(bonus_file, encoding="utf-8")]
        total_bonus = sum(float(award[-1]) for award in awards)
        conf = input(
            f"Confirm awarding total bonus ${total_bonus} to {len(awards)} workers? "
        )
        if conf == "y":
            n_awarded = 0
            amt_awarded = 0.0
            didnt_award = list()
            for award in tqdm(awards):
                try:
                    worker_id, asgn_id, request_tok, bonus_amt = award
                except ValueError:
                    ipdb.set_trace()
                bonus_amt = float(bonus_amt)
                try:
                    mturk_manager.pay_bonus(worker_id=worker_id,
                                            bonus_amount=bonus_amt,
                                            assignment_id=asgn_id,
                                            reason=msg,
                                            unique_request_token=request_tok)
                    n_awarded += 1
                    amt_awarded += bonus_amt
                except:
                    didnt_award.append(
                        (worker_id, asgn_id, request_tok, bonus_amt))
                    #print(f"\tFailed to award bonus to {worker_id}")
            print(f"Awarded {amt_awarded} to {n_awarded} workers.")
            if didnt_award:
                print("Failed on:")
                for worker_id, asgn_id, request_tok, bonus_amt in didnt_award:
                    #print(f"\tFailed to award bonus {bonus_amt} to {worker_id} for assignment {asgn_id} (tok: {request_tok})")
                    print(f"{worker_id},{asgn_id},{request_tok},{bonus_amt}")
        else:
            print("\tCancelled bonus.")

        return

    def award_bonus(worker_id, bonus_amt, asgn_id, msg, request_tok):
        conf = input(f"Confirm awarding ${bonus_amt} to {worker_id}?")
        if conf == "y":
            try:
                mturk_manager.pay_bonus(worker_id=worker_id,
                                        bonus_amount=bonus_amt,
                                        assignment_id=asgn_id,
                                        reason=msg,
                                        unique_request_token=request_tok)
                print(f"\tSuccessfully approved!")
            except:
                print(f"\tFailed to approve.")
        else:
            print("\tCancelled bonus.")

    def inspect_assignment(asgn_id):
        """ """
        raise NotImplementedError
        #asgn_data = db.get_assignment_data(asgn_id)
        #if asgn_data is None:
        #    print("Assignment ID {asgn_id} not found.")

    def inspect_hit(hit_id):
        """ """
        raise NotImplementedError
        #hit_data = db.get_hit_data(hit_id)
        #if hit_data is None:
        #    print("HIT ID {hit_id} not found.")

    def inspect_run_worker_pair(run_id, worker_id):
        """ """
        worker_data = run2worker[run_id][worker_id]
        asgn_id = worker_data['assignment_id']
        answers = list()
        qsts = list()
        ctx = worker_data['task_data'][0]['conversations'][0]['dialog'][0][
            'text']
        for task_datum in worker_data['task_data']:
            qst_d = task_datum['conversations'][1]
            qsts.append(qst_d['dialog'][0]['text'])
            if 'answer' in qst_d and 'answer' is not None:
                answers.append(qst_d['answer'])
            else:
                answers.append(None)

        try:
            choices = [
                CHOICE2ANS[r['speakerChoice']]
                for r in worker_data['response']['task_data']
            ]
            reasons = [
                r['textReason'] for r in worker_data['response']['task_data']
            ]
        except KeyError as e:
            print("Key error!")
            print("task_data not in worker response!")
            ipdb.set_trace()

        try:
            pair = db.get_worker_assignment_pairing(worker_id, asgn_id)
            hit_time = pair['task_end'] - pair['task_start']
        except:
            ipdb.set_trace()

        print(f"\nAssignment ID: {worker_data['assignment_id']}")
        print(f"CONTEXT: {ctx}\n")
        for qst, ans, choice, reason in zip(qsts, answers, choices, reasons):
            print(f"QUESTION: {qst}")
            print(f"ANSWER: {ans}")
            print(f"CHOICE: {choice}")
            print(f"REASON: {reason}")
            print()
        print(f"HIT time: {hit_time}")
        resp = input("Accept (y/n) ? ")
        if resp == "y":
            #try:
            #    mturk_manager.approve_work(asgn_id, override_rejection=True)
            #    print("\tApproved!")
            #except:
            #    ipdb.set_trace()
            mturk_manager.approve_work(asgn_id, override_rejection=True)
            print("\tApproved!")

    def inspect_hit_worker_pair(hit_id, worker_id):
        """ """
        resp = client.list_assignments_for_hit(HITId=hit_id)
        all_asgns = list(resp['Assignments'])
        while 'NextToken' in resp and resp['NextToken']:
            resp = client.list_assignments_for_hit(HITId=hit_id,
                                                   NextToken=resp['NextToken'])
            if resp['Assignments']:
                all_asgns.append(resp['Assignments'])
            time.sleep(0.5)

        assert len(all_asgns) == 1, ipdb.set_trace()
        asgn_ids = [a['AssignmentId'] for a in all_asgns]
        run_ids = list()
        worker_runs = worker2run[worker_id]
        for asgn_id in asgn_ids:
            for run_id, run_d in worker_runs.items():
                if run_d['assignment_id'] == asgn_id:
                    run_ids.append(run_id)
        print(f"Assignment ID: {asgn_ids[0]}")
        print(f"Submit date: {all_asgns[0]['SubmitTime'].strftime('%m/%d')}")
        #assert len(run_ids) == 1, ipdb.set_trace()
        #run_id = run_ids[0]
        #asgn_id = asgn_ids[0]
        #worker_data = run2worker[run_id][worker_id]
        #answers = list()
        #qsts = list()
        #ctx = worker_data['task_data'][0]['conversations'][0]['dialog'][0]['text']
        #for task_datum in worker_data['task_data']:
        #    qst_d = task_datum['conversations'][1]
        #    qsts.append(qst_d['dialog'][0]['text'])
        #    if 'answer' in qst_d and 'answer' is not None:
        #        answers.append(qst_d['answer'])
        #    else:
        #        answers.append(None)

        #try:
        #    choices = [CHOICE2ANS[r['speakerChoice']] for r in worker_data['response']['task_data']]
        #    reasons = [r['textReason'] for r in worker_data['response']['task_data']]
        #except KeyError as e:
        #    print("Key error!")
        #    print("task_data not in worker response!")
        #    ipdb.set_trace()

        #try:
        #    pair = db.get_worker_assignment_pairing(worker_id, asgn_id)
        #    hit_time = pair['task_end'] - pair['task_start']
        #except:
        #    ipdb.set_trace()

        #print(f"\nAssignment ID: {worker_data['assignment_id']}")
        #print(f"CONTEXT: {ctx}\n")
        #for qst, ans, choice, reason in zip(qsts, answers, choices, reasons):
        #    print(f"QUESTION: {qst}")
        #    print(f"ANSWER: {ans}")
        #    print(f"CHOICE: {choice}")
        #    print(f"REASON: {reason}")
        #    print()
        #print(f"HIT time: {hit_time}")
        #resp = input("Accept (y/n) ? ")
        #if resp == "y":
        #    try:
        #        mturk_manager.approve_work(asgn_id, override_rejection=True)
        #        print("\tApproved!")
        #    except:
        #        ipdb.set_trace()

    # main loop
    while True:
        print("Enter 'p' to print runs")
        cmd = input("Enter command: ")
        if len(cmd) == 0 or cmd == "exit":
            break
        cmd_parts = cmd.split()
        if cmd_parts[0] == "p":
            get_run_id_data(run_ids)
        elif cmd_parts[0] == "inspect":
            assert len(cmd_parts) == 3, "Insufficient arguments!"
            inspect_run_worker_pair(cmd_parts[1], cmd_parts[2])
        elif cmd_parts[0] in ["get-asgn", 'ga']:
            assert len(
                cmd_parts
            ) == 3, "Insufficient arguments! Please provide worker_id and ..."
            inspect_hit_worker_pair(cmd_parts[1], cmd_parts[2])
        elif cmd_parts[0] == "inspect-asgn":
            assert len(cmd_parts) > 1, "No assignment ID provided."
            inspect_assignment(cmd_parts[1])
        elif cmd_parts[0] == "inspect-hit":
            assert len(cmd_parts) > 1, "No HIT ID provided."
            inspect_hit(cmd_parts[1])
        elif cmd_parts[0] == "approve":
            assert len(cmd_parts) > 1, "No run ID provided."
            run_id = cmd_parts[1]
            if run_id in run_ids:
                approve_run_hits(run_id)
            else:
                print(f"Run ID {run_id} not found!")
        elif cmd_parts[0] == "approve-asgn":
            assert len(cmd_parts) > 1, "No assignment ID provided."
            approve_assignment(cmd_parts[1])
        elif cmd_parts[0] == "award-from-file":
            assert len(cmd_parts) > 1, "No file provided."
            if not os.path.exists(cmd_parts[1]):
                print(f"File {cmd_parts[1]} not found!")
                continue
            award_from_file(cmd_parts[1], BONUS_MSG)
        elif cmd_parts[0] in ["d", "debug"]:
            ipdb.set_trace()
        else:
            print(f"Command `{cmd}` not understood.")
Ejemplo n.º 26
0
def main():
    """
        Wizard of Wikipedia Data Collection Task.

        The task involves two people holding a conversation. One dialog partner
        chooses a topic to discuss, and then dialog proceeds.

        One partner is the Wizard, who has access to retrieved external
        information conditioned on the last two utterances, as well as
        information regarding the chosen topic.

        The other partner is the Apprentice, who assumes the role of someone
        eager to learn about the chosen topic.
    """
    argparser = ParlaiParser(False, False)
    DictionaryAgent.add_cmdline_args(argparser)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t', '--min_turns', default=3, type=int,
                           help='minimum number of turns')
    argparser.add_argument('-max_t', '--max_turns', default=5, type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('-mx_onb_time', '--max_onboard_time', type=int,
                           default=300, help='time limit for turker'
                           'in onboarding')
    argparser.add_argument('--persona-type', default='both', type=str,
                           choices=['both', 'self', 'other'],
                           help='Which personas to load from personachat')
    argparser.add_argument('--auto-approve-delay', type=int,
                           default=3600 * 24 * 1, help='how long to wait for  \
                           auto approval')
    argparser.add_argument('--word-overlap-threshold', type=int, default=2,
                           help='How much word overlap we want between message \
                           and checked sentence')
    argparser.add_argument('--num-good-sentence-threshold', type=int, default=2,
                           help='How many good sentences with sufficient overlap \
                           are necessary for turker to be considered good.')
    argparser.add_argument('--num-passages-retrieved', type=int, default=7,
                           help='How many passages to retrieve per dialog \
                           message')

    opt = argparser.parse_args()
    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)
    if 'data_path' not in opt:
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
        opt['current_working_dir'] = os.getcwd()
    opt.update(task_config)

    mturk_agent_ids = [APPRENTICE, WIZARD]
    opt['min_messages'] = 2

    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_ids
    )
    setup_personas_with_wiki_links(opt)
    ir_agent, task = setup_retriever(opt)
    persona_generator = PersonasGenerator(opt)
    wiki_title_to_passage = setup_title_to_passage(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)
    worker_roles = {}
    connect_counter = AttrDict(value=0)

    try:
        mturk_manager.start_new_run()
        if not opt['is_sandbox']:
            with open(os.path.join(opt['current_working_dir'], 'mtdont.txt')) as f:
                lines = [l.replace('\n', '') for l in f.readlines()]
                for w in lines:
                    mturk_manager.soft_block_worker(w)

        def run_onboard(worker):
            role = mturk_agent_ids[connect_counter.value % len(mturk_agent_ids)]
            connect_counter.value += 1
            worker_roles[worker.worker_id] = role
            worker.persona_generator = persona_generator
            world = RoleOnboardWorld(opt, worker, role)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits()

        def check_workers_eligibility(workers):
            if opt['is_sandbox']:
                return workers
            valid_workers = {}
            for worker in workers:
                worker_id = worker.worker_id
                if worker_id not in worker_roles:
                    '''Something went wrong...'''
                    continue
                role = worker_roles[worker_id]
                if role not in valid_workers:
                    valid_workers[role] = worker
                if len(valid_workers) == 2:
                    break
            return valid_workers.values() if len(valid_workers) == 2 else []

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        def assign_worker_roles(workers):
            if opt['is_sandbox']:
                for i, worker in enumerate(workers):
                    worker.id = mturk_agent_ids[i % len(mturk_agent_ids)]
            else:
                for worker in workers:
                    worker.id = worker_roles[worker.worker_id]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            if not opt['is_sandbox']:
                for agent in agents:
                    worker_roles.pop(agent.worker_id)
            conv_idx = mturk_manager.conversation_index
            world = MTurkWizardOfWikipediaWorld(
                opt,
                agents=agents,
                world_tag='conversation t_{}'.format(conv_idx),
                ir_agent=ir_agent,
                wiki_title_to_passage=wiki_title_to_passage,
                task=task
            )
            world.reset_random()
            while not world.episode_done():
                world.parley()
            world.save_data()
            if (world.convo_finished and
                    not world.good_wiz and
                    not opt['is_sandbox']):
                mturk_manager.soft_block_worker(world.wizard_worker)
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=eligibility_function,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 27
0
def main():
    """
    Human Evaluation of various responses to comments on images.

    A turker is shown an image and some dialog history. Then, the
    turker is asked to choose which response they think is more engaging.

    If no `--eval-data-path` is given, the data from the original
    Image-Chat dataset is used.

    To use your own data, please specify `--eval-data-path`, a path to an
    appropriate json file with a list of examples, where each example
    has the following structure:
        {
            'image_hash': <hash of image>,
            'dialog': [(personality, text), ...] - list of personality, text tuples
            'personality': <personality of responses to compare>
            '<compare_key_1>': <first response to compare>,
            '<compare_key_2>': <second option to compare>,
            .
            .
            .
        }
    Note that compare_key_1 and compare_key_2 can be any field, as long as they
    map to a string response.

    Example Scenario:
        Suppose you have the original Image-Chat dataset, and
        you would like to compare the outputs of your model called `model`.

        Your data may look like the following:
        [{
            'image_hash': hashforimageofcat,
            'dialog': [
                ('Sweet', 'What a cute cat!'),
                ('Neutral', 'Just looks like a plain cat to me')
            ]
            'personality': 'Sweet',
            'comment': 'It really is adorable if you look!', # Human Comment
            'model_comment': 'You'll love it if you pet it!' # Model Comment
        }, ...]

        Thus, you would specify `-ck1 comment -ck2 model_comment` to evaluate
        the outputs of the model vs. the human comments from Personality-Captions
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t',
                           '--min_turns',
                           default=3,
                           type=int,
                           help='minimum number of turns')
    argparser.add_argument('-mt',
                           '--max_turns',
                           default=5,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument(
        '-mx_rsp_time',
        '--max_resp_time',
        default=1800,
        type=int,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument(
        '-mx_onb_time',
        '--max_onboard_time',
        type=int,
        default=300,
        help='time limit for turker'
        'in onboarding',
    )
    argparser.add_argument(
        '-ni',
        '--num_images',
        type=int,
        default=10,
        help='number of images to show \
                           to turker',
    )
    argparser.add_argument(
        '--auto-approve-delay',
        type=int,
        default=3600 * 24,
        help='how long to wait for  \
                           auto approval',
    )
    argparser.add_argument('--data-path',
                           type=str,
                           default='',
                           help='where to save data')
    argparser.add_argument(
        '--eval-data-path',
        type=str,
        default='',
        help='where to load data to rank from. Leave '
        'blank to use Image-Chat data',
    )
    argparser.add_argument(
        '-ck1',
        '--compare-key-1',
        type=str,
        default='comment',
        help='key of first comparable',
    )
    argparser.add_argument(
        '-ck2',
        '--compare-key-2',
        type=str,
        default='comment',
        help='key of first comparable',
    )
    argparser.add_argument(
        '-rnd',
        '--dialog-round',
        type=str,
        default='first_response',
        choices=round_choices,
        help='which dialog round to show',
    )
    argparser.add_argument(
        '--show-personality',
        default=True,
        type='bool',
        help='whether to show the personality',
    )
    ImageChatTeacher.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    build_ic(opt)
    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)
    if 'data_path' not in opt or opt['data_path'] == '':
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    if opt.get('eval_data_path') == '':
        opt['eval_data_path'] = os.path.join(opt['datapath'],
                                             'image_chat/test.json')
    config = config_first if opt[
        'dialog_round'] == 'first_response' else config_second
    opt.update(config)

    mturk_agent_ids = [CHOOSER]
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    example_generator = ExampleGenerator(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()

        def run_onboard(worker):
            worker.example_generator = example_generator
            world = RoleOnboardWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for w in workers:
                w.id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            conv_idx = mturk_manager.conversation_index
            world = MTurkImageChatStackRankWorld(
                opt,
                agents=agents,
                world_tag='conversation t_{}'.format(conv_idx))
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()