Example #1
0
def main():
    """This task consists of one agent, model or MTurk worker, talking to an
    MTurk worker to negotiate a deal.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument(
        '--two_mturk_agents',
        dest='two_mturk_agents',
        action='store_true',
        help='data collection mode '
        'with converations between two MTurk agents',
    )

    opt = argparser.parse_args()
    opt['task'] = 'dealnodeal'
    opt['datatype'] = 'valid'
    opt.update(task_config)

    local_agent_1_id = 'local_1'
    mturk_agent_ids = ['mturk_agent_1']
    if opt['two_mturk_agents']:
        mturk_agent_ids.append('mturk_agent_2')

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        mturk_manager.set_onboard_function(onboard_function=None)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]

            # Create a local agent
            if not opt['two_mturk_agents']:
                if 'model' in opt:
                    local_agent = create_agent(opt)
                else:
                    local_agent = LocalHumanAgent(opt=None)

                local_agent.id = local_agent_1_id
                agents.append(local_agent)

            opt["batchindex"] = mturk_manager.started_conversations

            world = MTurkDealNoDealDialogWorld(opt=opt, agents=agents)

            while not world.episode_done():
                world.parley()

            world.shutdown()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #2
0
    if 'tasks' in metrics:
        for task, t_metrics in metrics['tasks'].items():
            logger.info('task = %s | EM = %.4f | F1 = %.4f | exs = %d | ' %
                        (task, t_metrics['accuracy'], t_metrics['f1'],
                         t_metrics['total']))
        logger.info('Overall EM = %.4f | exs = %d' %
                    (metrics['accuracy'], metrics['total']))
    else:
        logger.info('EM = %.4f | F1 = %.4f | exs = %d' %
                    (metrics['accuracy'], metrics['f1'], metrics['total']))
    logger.info('[ Done. Time = %.2f (s) ]' % valid_time.time())


if __name__ == '__main__':
    # Get command line arguments
    argparser = ParlaiParser()
    DocReaderAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()

    # Set logging (only stderr)
    logger = logging.getLogger('DrQA')
    logger.setLevel(logging.INFO)
    fmt = logging.Formatter('%(asctime)s: %(message)s', '%m/%d/%Y %I:%M:%S %p')
    console = logging.StreamHandler()
    console.setFormatter(fmt)
    logger.addHandler(console)

    # Set cuda
    opt['cuda'] = not opt['no_cuda'] and torch.cuda.is_available()
    if opt['cuda']:
        logger.info('[ Using CUDA (GPU %d) ]' % opt['gpu'])
Example #3
0
def main():
    """
    This task consists of an MTurk agent evaluating a wizard model.

    They are assigned a topic and asked to chat.
    """
    start_time = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M')
    argparser = ParlaiParser(False, add_model_args=True)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-mt',
                           '--max-turns',
                           default=10,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument(
        '--max-resp-time',
        default=240,
        type=int,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument(
        '--max-choice-time',
        type=int,
        default=300,
        help='time limit for turker'
        'choosing the topic',
    )
    argparser.add_argument(
        '--ag-shutdown-time',
        default=120,
        type=int,
        help='time limit for entering a dialog message',
    )
    argparser.add_argument('-rt',
                           '--range-turn',
                           default='3,5',
                           help='sample range of number of turns')
    argparser.add_argument(
        '--human-eval',
        type='bool',
        default=False,
        help='human vs human eval, no models involved',
    )
    argparser.add_argument(
        '--auto-approve-delay',
        type=int,
        default=3600 * 24 * 1,
        help='how long to wait for auto approval',
    )
    argparser.add_argument(
        '--only-masters',
        type='bool',
        default=False,
        help='Set to true to use only master turks for '
        'this test eval',
    )
    argparser.add_argument(
        '--unique-workers',
        type='bool',
        default=False,
        help='Each worker must be unique',
    )
    argparser.add_argument(
        '--mturk-log',
        type=str,
        default='data/mturklogs/wizard_of_wikipedia/{}.log'.format(start_time),
    )

    def inject_override(opt, override_dict):
        opt['override'] = override_dict
        for k, v in override_dict.items():
            opt[k] = v

    def get_logger(opt):
        fmt = '%(asctime)s: [ %(message)s ]'
        logfile = None
        if 'mturk_log' in opt:
            logfile = opt['mturk_log']
            if not os.path.isdir(os.path.dirname(logfile)):
                os.makedirs(os.path.dirname(logfile))
        logger = ParlaiLogger(
            "mturk_woz",
            console_level=INFO,
            file_level=INFO,
            console_format=fmt,
            file_format=fmt,
            filename=logfile,
        )
        logger.info('COMMAND: %s' % ' '.join(sys.argv))
        logger.info('-' * 100)
        logger.info('CONFIG:\n%s' % json.dumps(opt, indent=4, sort_keys=True))

        return logger

    # MODEL CONFIG
    # NOTE: please edit this to test your own models
    config = {
        'model':
        'projects:wizard_of_wikipedia:interactive_retrieval',
        'retriever_model_file':
        'models:wikipedia_full/tfidf_retriever/model',
        'responder_model_file':
        'models:wizard_of_wikipedia/full_dialogue_retrieval_model/model',
    }

    argparser.add_model_subargs(config['model'])  # add model args to opt
    start_opt = argparser.parse_args()

    inject_override(start_opt, config)

    if not start_opt.get('human_eval'):
        bot = create_agent(start_opt)
        shared_bot_params = bot.share()
    else:
        shared_bot_params = None

    if not start_opt['human_eval']:
        get_logger(bot.opt)
    else:
        get_logger(start_opt)

    if start_opt['human_eval']:
        folder_name = 'human_eval-{}'.format(start_time)
    else:
        folder_name = '{}-{}'.format(start_opt['model'], start_time)

    start_opt['task'] = os.path.basename(
        os.path.dirname(os.path.abspath(__file__)))
    if 'data_path' not in start_opt:
        start_opt['data_path'] = os.path.join(os.getcwd(), 'data',
                                              'wizard_eval', folder_name)
    start_opt.update(task_config)

    if not start_opt.get('human_eval'):
        mturk_agent_ids = ['PERSON_1']
    else:
        mturk_agent_ids = ['PERSON_1', 'PERSON_2']

    mturk_manager = MTurkManager(opt=start_opt,
                                 mturk_agent_ids=mturk_agent_ids)

    topics_generator = TopicsGenerator(start_opt)
    directory_path = os.path.dirname(os.path.abspath(__file__))
    mturk_manager.setup_server(task_directory_path=directory_path)
    worker_roles = {}
    connect_counter = AttrDict(value=0)

    try:
        mturk_manager.start_new_run()
        agent_qualifications = []
        if not start_opt['is_sandbox']:
            # assign qualifications
            if start_opt['only_masters']:
                agent_qualifications.append(MASTER_QUALIF)
            if start_opt['unique_workers']:
                qual_name = 'UniqueChatEval'
                qual_desc = (
                    'Qualification to ensure each worker completes a maximum '
                    'of one of these chat/eval HITs')
                qualification_id = mturk_utils.find_or_create_qualification(
                    qual_name, qual_desc, False)
                print('Created qualification: ', qualification_id)
                UNIQUE_QUALIF = {
                    'QualificationTypeId': qualification_id,
                    'Comparator': 'DoesNotExist',
                    'RequiredToPreview': True,
                }
                start_opt['unique_qualif_id'] = qualification_id
                agent_qualifications.append(UNIQUE_QUALIF)
        mturk_manager.create_hits(qualifications=agent_qualifications)

        def run_onboard(worker):
            if start_opt['human_eval']:
                role = mturk_agent_ids[connect_counter.value %
                                       len(mturk_agent_ids)]
                connect_counter.value += 1
                worker_roles[worker.worker_id] = role
            else:
                role = 'PERSON_1'
            worker.topics_generator = topics_generator
            world = TopicChooseWorld(start_opt, worker, role=role)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_single_worker_eligibility(worker):
            return True

        def check_multiple_workers_eligibility(workers):
            valid_workers = {}
            for worker in workers:
                worker_id = worker.worker_id
                if worker_id not in worker_roles:
                    print('Something went wrong')
                    continue
                role = worker_roles[worker_id]
                if role not in valid_workers:
                    valid_workers[role] = worker
                if len(valid_workers) == 2:
                    break
            return valid_workers.values() if len(valid_workers) == 2 else []

        if not start_opt['human_eval']:
            eligibility_function = {
                'func': check_single_worker_eligibility,
                'multiple': False,
            }
        else:
            eligibility_function = {
                'func': check_multiple_workers_eligibility,
                'multiple': True,
            }

        def assign_worker_roles(workers):
            if start_opt['human_eval']:
                for worker in workers:
                    worker.id = worker_roles[worker.worker_id]
            else:
                for index, worker in enumerate(workers):
                    worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            conv_idx = mturk_manager.conversation_index
            world = WizardEval(
                opt=start_opt,
                agents=workers,
                range_turn=[
                    int(s) for s in start_opt['range_turn'].split(',')
                ],
                max_turn=start_opt['max_turns'],
                max_resp_time=start_opt['max_resp_time'],
                model_agent_opt=shared_bot_params,
                world_tag='conversation t_{}'.format(conv_idx),
                agent_timeout_shutdown=opt['ag_shutdown_time'],
            )
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            gc.collect()

        mturk_manager.start_task(
            eligibility_function=eligibility_function,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #4
0
File: run.py Project: zwcdp/ParlAI
def main():
    """This task consists of one agent, model or MTurk worker, talking to an
    MTurk worker to negotiate a deal.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t', '--min_turns', default=5, type=int,
                           help='minimum number of turns')
    argparser.add_argument('-mt', '--max_turns', default=10, type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=150,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--ag_shutdown_time', default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--persona-type', default='both', type=str,
                           choices=['both', 'self', 'other'],
                           help='Which personas to load from personachat')
    opt = argparser.parse_args()

    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)

    opt['extract_personas_path'] = os.path.join(opt['datapath'], opt['task'])
    opt.update(task_config)

    mturk_agent_ids = ['PERSON_1']

    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_ids
    )

    mturk_manager.setup_server(task_directory_path=directory_path)

    personas_generator = PersonasGenerator(opt)
    opt['personas_generator'] = personas_generator

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        if not opt['is_sandbox']:
            # ADD BLOCKED WORKERS HERE
            blocked_worker_list = []
            for w in blocked_worker_list:
                mturk_manager.block_worker(
                    w,
                    'We found that you have unexpected behaviors in our '
                    'previous HITs. For more questions please email us.'
                )

        def run_onboard(worker):
            pass

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            worker = workers[0]
            world = RephrasePersonaWorld(opt, worker)
            while not world.episode_done():
                world.parley()
            world.save_data()
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #5
0
    def setUp(self):
        # Mock functions that hit external APIs and such
        self.server_utils = MTurkManagerFile.server_utils
        self.mturk_utils = MTurkManagerFile.mturk_utils
        self.server_utils.setup_server = \
            mock.MagicMock(return_value='https://127.0.0.1')
        self.server_utils.setup_legacy_server = \
            mock.MagicMock(return_value='https://127.0.0.1')
        self.server_utils.delete_server = mock.MagicMock()
        self.mturk_utils.setup_aws_credentials = mock.MagicMock()
        self.mturk_utils.calculate_mturk_cost = mock.MagicMock(return_value=1)
        self.mturk_utils.check_mturk_balance = \
            mock.MagicMock(return_value=True)
        self.mturk_utils.create_hit_config = mock.MagicMock()
        self.mturk_utils.setup_sns_topic = mock.MagicMock(
            return_value=TOPIC_ARN)
        self.mturk_utils.delete_sns_topic = mock.MagicMock()
        self.mturk_utils.delete_qualification = mock.MagicMock()
        self.mturk_utils.find_or_create_qualification = mock.MagicMock(
            return_value=QUALIFICATION_ID)
        self.mturk_utils.find_qualification = mock.MagicMock(
            return_value=QUALIFICATION_ID)
        self.mturk_utils.give_worker_qualification = mock.MagicMock()
        self.mturk_utils.remove_worker_qualification = mock.MagicMock()
        self.mturk_utils.create_hit_type = mock.MagicMock(
            return_value=HIT_TYPE_ID)
        self.mturk_utils.subscribe_to_hits = mock.MagicMock()
        self.mturk_utils.create_hit_with_hit_type = mock.MagicMock(
            return_value=(MTURK_PAGE_URL, FAKE_HIT_ID, 'MTURK_HIT_DATA'))
        self.mturk_utils.get_mturk_client = mock.MagicMock(
            return_value=mock.MagicMock())

        self.onboarding_agents = {}
        self.worlds_agents = {}

        # Set up an MTurk Manager and get it ready for accepting workers
        self.fake_socket = MockSocket()
        time.sleep(0.1)
        argparser = ParlaiParser(False, False)
        argparser.add_parlai_data_path()
        argparser.add_mturk_args()
        self.opt = argparser.parse_args(print_args=False)
        self.opt['task'] = 'unittest'
        self.opt['assignment_duration_in_seconds'] = 1
        self.opt['hit_title'] = 'test_hit_title'
        self.opt['hit_description'] = 'test_hit_description'
        self.opt['task_description'] = 'test_task_description'
        self.opt['hit_keywords'] = 'test_hit_keywords'
        self.opt['reward'] = 0.1
        self.opt['is_debug'] = True
        self.opt['log_level'] = 0
        self.opt['num_conversations'] = 1
        self.mturk_agent_ids = ['mturk_agent_1', 'mturk_agent_2']
        self.mturk_manager = MTurkManager(
            opt=self.opt,
            mturk_agent_ids=self.mturk_agent_ids,
            is_test=True,
        )
        self.mturk_manager.port = self.fake_socket.port
        self.mturk_manager.setup_server()
        self.mturk_manager.start_new_run()
        self.mturk_manager.ready_to_accept_workers()
        self.mturk_manager.set_onboard_function(self.onboard_agent)
        self.mturk_manager.create_hits()

        def assign_worker_roles(workers):
            workers[0].id = 'mturk_agent_1'
            workers[1].id = 'mturk_agent_2'

        def run_task_wait():
            self.mturk_manager.start_task(
                lambda w: True, assign_worker_roles, self.run_conversation)

        self.task_thread = threading.Thread(target=run_task_wait)
        self.task_thread.start()

        self.agent_1 = MockAgent(TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_1,
                                 TEST_WORKER_ID_1, TASK_GROUP_ID_1)
        self.agent_1_2 = MockAgent(TEST_HIT_ID_1, TEST_ASSIGNMENT_ID_3,
                                   TEST_WORKER_ID_1, TASK_GROUP_ID_1)
        self.agent_2 = MockAgent(TEST_HIT_ID_2, TEST_ASSIGNMENT_ID_2,
                                 TEST_WORKER_ID_2, TASK_GROUP_ID_1)
Example #6
0
def _make_argparse_table(class_):
    """
    Build the reStructuredText table containing the args and descriptions.
    """
    readme = []
    parser = ParlaiParser(False, False)
    class_.add_cmdline_args(parser, partial_opt=None)
    # group by whatever ArgumentGroups there are
    for ag in parser._action_groups:
        actions = []
        # get options defined within only this group
        for action in ag._group_actions:
            if hasattr(action, 'hidden') and action.hidden:
                # some options are marked hidden
                continue
            if action.dest == argparse.SUPPRESS or action.dest == 'help':
                continue
            action_strings = ",  ".join(f'`{a}`'
                                        for a in action.option_strings)
            description = []
            if action.help:
                h = action.help
                if not h[0].isupper():
                    h = h[0].upper() + h[1:]
                h = h.replace("%(default)s", str(action.default))
                description += [h]
            # list choices if there are any
            if action.choices:
                description += [
                    "Choices: " + ", ".join(f'`{c}`'
                                            for c in action.choices) + "."
                ]
            # list default and recommended values.
            default_value = ""
            if action.default is not None and action.default is not argparse.SUPPRESS:
                default_value += f"Default: ``{action.default}``.  "
            if hasattr(action, 'recommended') and action.recommended:
                default_value += f"Recommended: ``{action.recommended}``. "

            # special escape for a few args which use a literal newline as their default
            if default_value:
                default_value = default_value.replace("\n", "\\n")
                description.append(default_value)

            description = "\n".join(description)
            # escape for the fact that we're inserting this inside a table
            description = description.replace("\n", "\n   \n   ")
            actions.append((action_strings, description))

        if not actions:
            continue

        readme.append(f'__{ag.title.title()}__\n\n')
        readme.append("| Argument | Description |\n")
        readme.append("|----------|----------|\n")
        for row in actions:
            text = "| " + " | ".join(row) + " |"
            text = text.replace("\n", "<br>")
            readme.append(f"{text}\n")
        readme.append("\n\n")
    return readme
Example #7
0
def main():
    """
    Main function for the DMG pilot data collection task
    :return: Nothing.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('--two_mturk_agents', dest='two_mturk_agents',
                           action='store_true', help='data collection mode '
                           'with converations between two MTurk agents')

    opt = argparser.parse_args()
    opt['task'] = 'dmg_pilot_dev'
    opt['datatype'] = 'dmg_pilot_data_1'
    opt.update(task_config)

    local_agent_1_id = 'local_1'
    mturk_agent_ids = ['mturk_agent_1']
    if opt['two_mturk_agents']:
        mturk_agent_ids.append('mturk_agent_2')

    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_ids
    )

    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        mturk_manager.set_onboard_function(onboard_function=None)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]

            # Create a local agent
            if not opt['two_mturk_agents']:
                if 'model' in opt:
                    local_agent = create_agent(opt)
                else:
                    local_agent = LocalHumanAgent(opt=None)

                local_agent.id = local_agent_1_id
                agents.append(local_agent)

            opt["batchindex"] = mturk_manager.started_conversations

            world = MTurkDMGDialogWorld(
                opt=opt,
                agents=agents
            )

            log_timestamp = time.time()

            # Loop over all five rounds of the game
            for r in range(5):
                print("--- Starting round {} ---".format(r+1))
                while not world.episode_done():
                    world.parley()

                # Write the log data to file

                print("Writing log to file")
                if not os.path.exists("logs"):
                    os.makedirs("logs")
                with open('logs/dmg_pilot_data_{}_{}.json'.format(world.game_nr, log_timestamp), 'w') as f:
                    json.dump(world.conversation_log, f)

                if not r == 4:
                    # Reset the world for the next round
                    world.selections = defaultdict(lambda: dict())
                    world.round_log = world.reset_round_log()
                    world.turn_nr = -1
                    world.round_nr += 1
                    world.doneCounter = 0
                    world.episodeDone = False

                else:
                    world.shutdown()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )

        print("Game ended.")

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #8
0
def main():
    """
        Personality-Captions Data Collection Task.

        This is the task setup used when collecting the Personality-Captions
        dataset (https://arxiv.org/abs/1810.10665).
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    PersonalityCaptionsTeacher.add_cmdline_args(argparser)
    argparser.add_argument('-ni',
                           '--num_images',
                           type=int,
                           default=10,
                           help='number of images to show \
                           to turker')
    argparser.add_argument('-mx_rsp_time',
                           '--max_resp_time',
                           default=1800,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('-mx_onb_time',
                           '--max_onboard_time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'in onboarding')
    argparser.add_argument('--auto-approve-delay',
                           type=int,
                           default=3600 * 24 * 5,
                           help='how long to wait for  \
                           auto approval')
    argparser.add_argument('--multiple-personality',
                           type='bool',
                           default=False,
                           help='for getting captions with '
                           'multiple personalities for same image')
    argparser.add_argument(
        '--task-type',
        type=str,
        default='personality',
        choices=['personality', 'no_personality', 'caption'],
        help='Task Type - specify `personality` for '
        'original task, `no_personality` for the same task '
        'instructions but with no personality, and '
        '`caption` for the task but asking for a normal '
        'caption.')

    opt = argparser.parse_args()

    directory_path = os.path.dirname(os.path.abspath(__file__))
    opt['task'] = os.path.basename(directory_path)
    if 'data_path' not in opt:
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    opt.update(TASK_TYPE_TO_CONFIG[opt['task_type']])
    build_pc_data(opt)
    mturk_agent_ids = [COMMENTER]
    mturk_manager = MTurkManager(opt=opt,
                                 mturk_agent_ids=mturk_agent_ids,
                                 use_db=True)
    personality_generator = PersonalityGenerator(opt)
    image_generator = ImageGenerator(opt)
    personality_and_image_generator = PersonalityAndImageGenerator(opt)
    mturk_manager.setup_server(task_directory_path=directory_path)

    try:
        mturk_manager.start_new_run()

        def run_onboard(worker):
            worker.personality_generator = personality_generator
            worker.image_generator = image_generator
            worker.personality_and_image_generator = personality_and_image_generator
            world = RoleOnboardWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for w in workers:
                w.id = mturk_agent_ids[0]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            conv_idx = mturk_manager.conversation_index
            world = MTurkPersonalityCaptionsWorld(
                opt,
                agents=agents,
                world_tag='conversation t_{}'.format(conv_idx),
            )
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #9
0
def setup_args():
    from parlai.core.params import ParlaiParser
    return ParlaiParser(True, True)
Example #10
0
def setup_args():
    """Set up args."""
    parser = ParlaiParser(False, False)
    parser.add_parlai_data_path()
    parser.add_messenger_args()
    return parser.parse_args()
Example #11
0
def main():
    """
    Main function for the DMG pilot data collection task
    :return: Nothing.
    """

    global available_games

    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('--two_mturk_agents',
                           dest='two_mturk_agents',
                           action='store_true',
                           help='data collection mode '
                           'with converations between two MTurk agents')

    opt = argparser.parse_args()
    opt['task'] = 'dmg_pilot_dev'
    opt['datatype'] = 'dmg_pilot_data_1'
    opt.update(task_config)

    is_sandbox = True if opt['is_sandbox'] else False
    if is_sandbox: print("\n- - - Running in Sandbox Mode - - -")
    local_agent_1_id = 'local_1'
    mturk_agent_ids = ['mturk_agent_1']
    if opt['two_mturk_agents']:
        mturk_agent_ids.append('mturk_agent_2')

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)
    mturk_manager.setup_server()

    # Remove qualification blocks
    # mturk_utils.delete_qualification('33DUYNETVPNVNFT4X22Z6F584M1WCM', is_sandbox)

    qual_name = 'DMG Pilot: Max Games Reached v1'
    qual_desc = 'Qualification for a worker who completed the maximum number of games in the DMG Pilot'
    qualification_id = mturk_utils.find_or_create_qualification(
        qual_name, qual_desc, is_sandbox, True)
    print('Created qualification: {}'.format(qualification_id))

    available_games = len(DMGMultiRoundTeacher(opt=opt).episodes)
    print("Available games: {}".format(available_games))

    try:

        def load_records():
            global worker_record
            global worker_bans

            try:
                with open('records/worker_record.dill', 'rb') as infile:
                    worker_record = dill.load(infile)
                    print("Loaded worker records.")
            except FileNotFoundError:
                pass
            try:
                with open('records/worker_bans.json', 'r') as infile:
                    worker_bans = json.load(infile)
            except FileNotFoundError:
                pass

            # Exclude players who are blocked by completing the maximum number of games in a previous HIT
            for worker_id in worker_bans:
                print("Excluded banned worker {}".format(worker_id))
                mturk_utils.give_worker_qualification(worker_id,
                                                      qualification_id)

        def save_records():
            global worker_record
            global worker_bans

            if VERBOSE: print("Writing worker records to file")
            if not os.path.exists("records"):
                os.makedirs("records")
            with open('records/worker_record.json', 'w') as f:
                json.dump(worker_record, f)
            with open('records/worker_record.dill', 'wb') as f:
                dill.dump(worker_record, f)
            with open('records/worker_bans.json', 'w') as f:
                json.dump(worker_bans, f)

        def select_random_game(exceptions=None):
            """
            Returns a random game ID with exception of those specified in exceptions
            :param exceptions: list of game IDs that cannot be selected
            :return: a random game ID with exception of those specified in exceptions
            """
            global available_games

            while True:
                game_id = randint(0, available_games)
                # if VERBOSE: print("Game ID is {}. Exceptions are {}".format(game_id, exceptions))
                if exceptions is not None:
                    if game_id not in exceptions:
                        break
                else:
                    break
                # if VERBOSE: print("Selected random game {}".format(game_id))
            return game_id

        def game_is_blocked(id, player_id):
            """
            Returns True if the given game ID is blocked for the given player, False otherwise
            :param id: ID of a given game
            :param player_id: ID of a given player
            :return: True if the given game ID is blocked for the given player, False otherwise
            """
            if player_id in worker_record:
                if worker_record[player_id]["games"].count(id) == 2:
                    return True

            return False

        def update_records(players, played_game_id):
            """
            Updates the HITs worker records
            :param players: players paired for a game
            :param played_game_id: game ID
            :return: Nothing.
            """
            print("UPDATING RECORDS!!!")

            # Add game ID to the worker record and to the blocked list if it is the second occurance.
            # If a player has played 10 games, he or she gets banned from the game.
            update_worker_record(players[0], players[1], played_game_id)
            update_worker_record(players[1], players[0], played_game_id)

        def update_worker_record(worker, partner, played_game_id):
            """
            Updates the record for a specific worker - partner pairing with a given game ID
            :param worker: worker agent
            :param partner: partner agent
            :param played_game_id: assigned game ID
            :return: Nothing.
            """
            global worker_record
            global worker_bans

            player_id = worker.worker_id
            partner_id = partner.worker_id
            worker_record[player_id]["games"].append(played_game_id)
            worker_record[player_id]["partners"].append(partner_id)
            if len(worker_record[player_id]["games"]) == 5:
                mturk_utils.give_worker_qualification(player_id,
                                                      qualification_id)
                worker_bans.append(player_id)
                worker.block_worker(
                    "Reached the maxiumum of 5 games in the DMG Pilot")

        def check_workers_eligibility(workers):
            """
            Checks the list of available workers, pairs the first two players that pass
            the game criteria and sets the corresponding game id
            :param workers: a list of all available workers
            :return: a list of two workers that are paired for the next game
            """
            global worker_record
            global game_id

            players = []
            # Return an empty list if not enough workers are in queue
            if len(workers) < 2:
                return players

            if VERBOSE: print("{} workers available:".format(len(workers)))

            for idx, worker in enumerate(workers):
                worker_id = worker.worker_id
                if VERBOSE: print("Worker: {}".format(worker_id))

                # Worker never played before. Pair him or her with the next queued worker who also never played before
                if worker_id not in worker_record:
                    if VERBOSE: print("Worker has no recorded games")

                    for partner in workers[idx + 1:]:
                        partner_id = partner.worker_id
                        if partner_id not in worker_record:
                            if VERBOSE: print("Partner: {}".format(partner_id))
                            players.append(worker)
                            players.append(partner)
                            next_game_id = select_random_game()
                            if VERBOSE:
                                print(
                                    "Partner has no recorded games. Setting game ID randomly to {}"
                                    .format(next_game_id))
                            game_id = next_game_id
                            return players

                    # Nobody in the queue is new. Continue with the loop
                    print("Nobody in the queue is new. Continue with the loop")
                    continue

                # Worker played before.
                else:
                    last_game_id = worker_record[worker_id]["games"][-1]
                    if not game_is_blocked(last_game_id, worker_id):
                        # Check if anybody in the queue has not played this game yet and didn't play with worker before
                        for partner in workers[idx + 1:]:
                            partner_id = partner.worker_id
                            # If partner also played before, pair them
                            if partner_id in worker_record and last_game_id not in worker_record[
                                    partner_id][
                                        "games"] and worker_id not in worker_record[
                                            partner_id]["partners"]:
                                if VERBOSE:
                                    print(
                                        "Partner has not played this game before."
                                    )
                                players.append(worker)
                                players.append(partner)
                                next_game_id = last_game_id
                                game_id = next_game_id
                                return players

                    # No suitable partner was found to play worker's last game.
                    # So pair worker with the next available player and check their games
                    for partner in workers[idx + 1:]:
                        partner_id = partner.worker_id
                        # If partner also played before, but never with worker, pair them
                        if partner_id in worker_record and worker_id not in worker_record[
                                partner_id]["partners"]:
                            last_game_id = worker_record[partner_id]["games"][
                                -1]
                            players.append(worker)
                            players.append(partner)
                            # Check if the partner's last game is not yet blocked and never played by the worker
                            if not game_is_blocked(
                                    last_game_id, partner_id
                            ) and last_game_id not in worker_record[worker_id][
                                    "games"]:
                                next_game_id = last_game_id
                                if VERBOSE:
                                    print(
                                        "Partner has recorded games. Setting game ID to {}"
                                        .format(next_game_id))
                                game_id = next_game_id
                                return players
                            # Else select a random one that none of the two played before
                            else:
                                blocked = copy(
                                    worker_record[worker_id]["games"])
                                blocked.extend(
                                    worker_record[partner_id]["games"])
                                next_game_id = select_random_game(
                                    exceptions=blocked)
                                if VERBOSE:
                                    print(
                                        "Selected game {} as it was not played by any of the players before"
                                        .format(next_game_id))
                                game_id = next_game_id
                                return players

                                # Nobody in the queue played before. Continue with the loop

                    print(
                        "Nobody in the queue played before. Continue with the loop"
                    )
                    continue

            # No match could be made since the only workers available are from different categories
            print(
                "No match could be made since the only workers available are from different categories"
            )
            return players

        def get_worker_names(players):
            """
            Returns gender-neutral nicknames for the players based on how many games they played already
            :param players: List of player IDs
            :return: a list of gender-neutral nicknames for the players based on how many games they played already
            """
            global worker_names

            player_names = []
            for player in players:
                player_id = player.worker_id
                if player_id in worker_record:
                    n = len(worker_record[player_id]) - 1
                    player_names.append(worker_names[n])
                else:
                    return ["Kelsey", "Robin"]

            return player_names

        def assign_worker_roles(workers):
            """
            Assigns indexes to the assigned workers
            :param workers: list of workers
            :return: Nothing.
            """
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def pay_workers(agents, get_pay, time_bonus=None):

            if not os.path.exists("records"):
                os.makedirs("records")

            for agent in agents:
                if get_pay[agent.worker_id]:
                    print("Paying worker {}".format(agent.worker_id))
                    if len(worker_record[agent.worker_id]["games"]) > 1:
                        agent.pay_bonus(
                            0.25, reason="DMP Pilot: Bonus for multiple games")
                        print("Paying bonus for multiple games!")
                        with open('records/payments.txt', 'a') as f:
                            f.write("{}; {}; {}; multiple_bonus\n".format(
                                agent.worker_id, 0.25, agent.assignment_id))

                    if time_bonus:
                        agent.pay_bonus(time_bonus,
                                        reason="DMG Pilot: Bonus for long HIT")
                        print("Paying bonus for long HIT!")
                        with open('records/payments.txt', 'a') as f:
                            f.write("{}; {}; {}; long_bonus\n".format(
                                agent.worker_id, time_bonus,
                                agent.assignment_id))

                    agent.approve_work()
                    with open('records/payments.txt', 'a') as f:
                        f.write("{}; {}; {}; payment\n".format(
                            agent.worker_id, 1.75, agent.assignment_id))

                else:
                    print(
                        "Rejecting agent {}'s work as he or she disconnected (too early) or score is too low."
                        .format(agent.worker_id))
                    agent.reject_work(
                        reason=
                        'Disconnected before end of HIT or scored too low')

        def run_conversation(mturk_manager, opt, workers):
            """
            Runs the conversation
            :param mturk_manager: MTurk manager
            :param opt: command line arguments
            :param workers: list of workers
            :return: Nothing.
            """
            global game_id
            global worker_record

            conversation_start_time = time.time()

            # Copy workers into agents list
            agents = workers[:]
            # Get worker names
            names = get_worker_names(agents)
            print(names)

            # Create a local agent
            if not opt['two_mturk_agents']:
                if 'model' in opt:
                    local_agent = create_agent(opt)
                else:
                    local_agent = LocalHumanAgent(opt=None)

                local_agent.id = local_agent_1_id
                agents.append(local_agent)

            opt["batchindex"] = mturk_manager.started_conversations

            print("Loading game {}".format(game_id))

            print(list(worker_record.keys()))
            print(agents[0].worker_id)
            print(agents[1].worker_id)

            # If the workers never played before, start with the warm-up round
            if (agents[0].worker_id
                    not in worker_record) and (agents[1].worker_id
                                               not in worker_record):
                world = MTurkDMGDialogWarmupWorld(
                    opt=opt,
                    agents=agents,
                )

                print("--- Starting Warming-Up Round ---")
                while not world.episode_done():
                    if world.parley():
                        break

            world = MTurkDMGDialogWorld(opt=opt,
                                        agents=agents,
                                        game_id=game_id,
                                        names=names)

            get_pay = {agents[0].worker_id: False, agents[1].worker_id: False}

            print("--- Starting Game ---")
            while not world.episode_done():
                print("Parley!")
                world.parley()

            print("# # # DONE # # #")

            if world.disconnected:
                print("Game ended due to disconnect.")
                if world.round_nr > 1:
                    for agent in agents:
                        if not agent.disconnected:
                            print("CHECK: Agent {} did NOT disconnect".format(
                                agent.worker_id))
                            get_pay[agent.worker_id] = True
                        else:
                            print("CHECK: Agent {} DID disconnect".format(
                                agent.worker_id))

            else:
                # Only save records when game was complete
                print("Updating records")
                update_records(agents, game_id)
                save_records()

                if world.total_score > 24:
                    print("Total score was above 24, paying both workers.")
                    get_pay = {
                        agents[0].worker_id: True,
                        agents[1].worker_id: True
                    }
                else:
                    print("Score too low!")

            if world.end_time:
                conversation_end_time = world.end_time
            else:
                conversation_end_time = conversation_start_time
            world.shutdown()
            print("# # # Game ended # # #")

            duration = conversation_end_time - conversation_start_time
            duration_mins = duration / 60.0
            time_bonus = None

            if duration_mins > 1:
                if duration_mins >= 25:
                    time_bonus = 1.50
                else:
                    time_bonus = int(duration_mins - 10) * 0.10
                    time_bonus = round(time_bonus, 2)

            if time_bonus and time_bonus > 1.5:
                time_bonus = 1.5
            if time_bonus and time_bonus < 0:
                time_bonus = None
            pay_workers(agents, get_pay, time_bonus)
            print("Conversation closed.")

        load_records()
        print("# # # Loaded records # # #")

        def run_onboard(worker):
            global worker_record

            if worker.worker_id not in worker_record:
                world = MTurkDMGDialogOnboardWorld(opt=opt, mturk_agent=worker)
                while not world.episode_done():
                    world.parley()
                world.shutdown()
                print("Onboarding done.")

        mturk_manager.set_onboard_function(onboard_function=run_onboard)

        mturk_manager.start_new_run()
        agent_qualifications = [{
            'QualificationTypeId': qualification_id,
            'Comparator': 'DoesNotExist',
            'RequiredToPreview': True
        }]
        mturk_manager.create_hits(qualifications=agent_qualifications)

        # Increasing restart time
        mturk_manager.ready_to_accept_workers(timeout_seconds=120)

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        mturk_manager.start_task(eligibility_function=eligibility_function,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

        print("HIT ended.")

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #12
0
def main():
    """Handles setting up and running a ParlAI-MTurk task by instantiating
    an MTurk manager and configuring it for the qa_data_collection task
    """
    # Get relevant arguments
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))

    # append the contents of task_config.py to the configuration
    opt.update(task_config)

    # Select an agent_id that worker agents will be assigned in their world
    mturk_agent_roles = ['Asker', 'Answerer', 'Evaluator']

    # Instantiate an MTurkManager with the given options and a maximum number
    # of agents per world of 1 (based on the length of mturk_agent_ids)
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_roles,
        use_db=True,
    )
    mturk_manager.setup_server(
        task_directory_path=os.path.dirname(os.path.abspath(__file__)))

    role_index = 0

    # Create an onboard_function, which will be run for workers who have
    # accepted your task and must be completed before they are put in the
    # queue for a task world.
    def run_onboard(worker):
        nonlocal role_index
        role = mturk_agent_roles[role_index % 3]
        role_index += 1
        worker.update_agent_id('Onboarding {}'.format(role))
        worker.demo_role = role
        if role == 'Asker':
            world = AskerOnboardingWorld(opt=opt, mturk_agent=worker)
        elif role == 'Answerer':
            world = AnswererOnboardingWorld(opt=opt, mturk_agent=worker)
        else:
            world = EvaluatorOnboardingWorld(opt=opt, mturk_agent=worker)
        while not world.episode_done():
            world.parley()
        world.shutdown()
        return world.prep_save_data([worker])

    # If we want to use the above onboard function, we can replace the below
    # with set_onboard_function(onboard_function=run_onboard)
    mturk_manager.set_onboard_function(onboard_function=run_onboard)

    try:
        # Initialize run information
        mturk_manager.start_new_run()

        # Set up the sockets and threads to recieve workers
        mturk_manager.ready_to_accept_workers()

        # Create the hits as specified by command line arguments
        mturk_manager.create_hits()

        # Check workers eligiblity acts as a filter, and should return
        # the list of all workers currently eligible to work on the task
        # Can be used to pair workers that meet certain criterea
        def check_workers_eligibility(workers):
            filled_roles = []
            use_workers = []
            for worker in workers:
                if worker.demo_role not in filled_roles:
                    use_workers.append(worker)
                    filled_roles.append(worker.demo_role)
            return use_workers

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        # Assign worker roles is used to determine what the role each worker
        # in the given worker list will play. Setting `id` to None will return
        # the worker to the pool rather than putting them in a given task,
        # which is useful for having tasks with different possible worker
        # counts.
        def assign_worker_roles(workers):
            for worker in workers:
                worker.id = worker.demo_role

        # Define the task function, which will be run with workers that are
        # as the main task.
        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            # Create the task world
            world = MultiRoleAgentWorld(opt=opt, mturk_agents=workers)
            # run the world to completion
            while not world.episode_done():
                world.parley()

            # shutdown and review the work
            world.shutdown()
            world.review_work()

            # Return the contents for saving
            return world.prep_save_data(workers)

        # Begin the task, allowing mturk_manager to start running the task
        # world on any workers who connect
        mturk_manager.start_task(eligibility_function=eligibility_function,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)
    except BaseException:
        raise
    finally:
        # Any hits that aren't claimed or completed have to be shut down. Must
        # keep the world running until that point.
        mturk_manager.expire_all_unassigned_hits()
        # Shutdown the manager and free all related resources
        mturk_manager.shutdown()
Example #13
0
File: run.py Project: xlrshop/Parl
def main():
    """
    This task consists of one local human agent and two MTurk agents,
    each MTurk agent will go through the onboarding step to provide
    information about themselves, before being put into a conversation.
    You can end the conversation by sending a message ending with
    `[DONE]` from human_1.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    mturk_agent_1_id = 'mturk_agent_1'
    mturk_agent_2_id = 'mturk_agent_2'
    human_agent_1_id = 'human_1'
    mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id]
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_ids
    )
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = MTurkMultiAgentDialogOnboardWorld(
                opt=opt,
                mturk_agent=worker
            )
            while not world.episode_done():
                world.parley()
            world.shutdown()

        # You can set onboard_function to None to skip onboarding
        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        eligibility_function = {
            'func': check_worker_eligibility,
            'multiple': False,
        }

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            # Create mturk agents
            mturk_agent_1 = workers[0]
            mturk_agent_2 = workers[1]

            # Create the local human agents
            human_agent_1 = LocalHumanAgent(opt=None)
            human_agent_1.id = human_agent_1_id

            world = MTurkMultiAgentDialogWorld(
                opt=opt,
                agents=[human_agent_1, mturk_agent_1, mturk_agent_2]
            )

            while not world.episode_done():
                world.parley()

            world.shutdown()

        mturk_manager.start_task(
            eligibility_function=eligibility_function,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #14
0
    def test_token_level_loss_logging(self):
        """
        Test functionality of token level probability + ranking logging.

        Regression for all inference types: 'beam', 'greedy', 'topk', 'nucleus',
        'delayedbeam'
        """
        inference_types = ['beam', 'greedy', 'topk', 'nucleus', 'delayedbeam']
        gold_data = {
            'beam': {
                'text_token_info': [
                    ('__start__', 0.0, 1.0),
                    ('5', -2.5510462364763953e-05, 0.0),
                    ('__end__', -1.1920922133867862e-06, 0.0),
                ],
                'extra_args': ['--beam-size', '3'],
            },
            'greedy': {
                'text_token_info': [
                    ('__start__', 0.0, 1.0),
                    ('5', -2.5510462364763953e-05, 0.0),
                    ('__end__', -1.1920922133867862e-06, 0.0),
                ],
                'extra_args': [],
            },
            # sampling based token selection will produce non-deterministic output, so we can't do data regression
            'topk': {
                'extra_args': ['--topk', '2']
            },
            'topk_multiple_beams': {
                'extra_args': ['--topk', '2', '--beam-size', '5']
            },
            # sampling based token selection will produce non-deterministic output, so we can't do data regression
            'nucleus': {
                'extra_args': ['--topp', '0.3']
            },
            'nucleus_multiple_beams': {
                'extra_args': ['--topp', '0.3', '--beam-size', '5']
            },
            # sampling based token selection will produce non-deterministic output, so we can't do data regression
            'delayedbeam': {
                'extra_args': ['--topk', '2', '--beam-delay', '2']
            },
        }

        for inference_type in inference_types:
            args = [
                '--model-file',
                'zoo:unittest/transformer_generator2/model',
                '--inference',
                inference_type,
                '--truncate',
                '1024',
                '-v',
            ] + gold_data[inference_type]['extra_args']

            pp = ParlaiParser(True, True)
            agent = create_agent(pp.parse_args(args), True)
            obs = {'text': '5', 'episode_done': False}
            agent.observe(obs)
            act = agent.act()

            if 'text_token_info' in gold_data[inference_type]:
                for i, tok_data in enumerate(act['text_token_info']):
                    assert (
                        gold_data[inference_type]['text_token_info'][i][0] ==
                        tok_data[0]
                    ), f"failed token prediction for inference type {inference_type} at token {gold_data[inference_type]['text_token_info'][i][0]}"
                    assert math.isclose(
                        gold_data[inference_type]['text_token_info'][i][1],
                        tok_data[1]
                    ), f"failed token probability prediction for inference type {inference_type} at token {gold_data[inference_type]['text_token_info'][i][0]}"
                    assert math.isclose(
                        gold_data[inference_type]['text_token_info'][i][2],
                        tok_data[2]
                    ), f"failed token rank prediction for inference type {inference_type} at token {gold_data[inference_type]['text_token_info'][i][0]}"
Example #15
0
 def setUp(self):
     self.datapath = ParlaiParser().parse_args([], print_args=False)['datapath']
     self.datapath = os.path.join(self.datapath, 'tmp_conversations')
     os.makedirs(self.datapath, exist_ok=True)
 def _get_args(self):
     parser = ParlaiParser(False, False)
     parser.add_parlai_data_path()
     parser.add_messenger_args()
     return parser.parse_args([])
Example #17
0
def setup_args(parser=None) -> ParlaiParser:
    """
    Build the ParlAI parser, adding command line args if necessary.
    :param ParlaiParser parser:
        Preexisting parser to append options to. Will be created if needed.
    :returns:
        the ParlaiParser with CLI options added.
    """
    if parser is None:
        parser = ParlaiParser(True, True, 'Train a model')
    train = parser.add_argument_group('Training Loop Arguments')
    train.add_argument(
        '-et',
        '--evaltask',
        help=
        'task to use for valid/test (defaults to the one used for training)',
    )
    train.add_argument(
        '--eval-batchsize',
        type=int,
        hidden=True,
        help='Eval time batch size (defaults to same as -bs)',
    )
    train.add_argument('--display-examples',
                       type='bool',
                       default=False,
                       hidden=True)
    train.add_argument('-eps', '--num-epochs', type=float, default=-1)
    train.add_argument('-ttim', '--max-train-time', type=float, default=-1)
    train.add_argument('-ltim', '--log-every-n-secs', type=float, default=10)
    train.add_argument(
        '-vtim',
        '--validation-every-n-secs',
        type=float,
        default=-1,
        help='Validate every n seconds. Saves model to model_file '
        '(if set) whenever best val metric is found',
    )
    train.add_argument(
        '-stim',
        '--save-every-n-secs',
        type=float,
        default=-1,
        help='Saves the model to model_file.checkpoint after '
        'every n seconds (default -1, never).',
    )
    train.add_argument(
        '-sval',
        '--save-after-valid',
        type='bool',
        default=False,
        help='Saves the model to model_file.checkpoint after '
        'every validation (default %(default)s).',
    )
    train.add_argument(
        '-veps',
        '--validation-every-n-epochs',
        type=float,
        default=-1,
        help='Validate every n epochs. Saves model to model_file '
        '(if set) whenever best val metric is found',
    )
    train.add_argument(
        '-vme',
        '--validation-max-exs',
        type=int,
        default=-1,
        hidden=True,
        help='max examples to use during validation (default -1 uses all)',
    )
    train.add_argument(
        '--short-final-eval',
        default=False,
        hidden=True,
        type='bool',
        help='If true, obeys --validation-max-exs in the final '
        'validation and test evaluations.',
    )
    train.add_argument(
        '-vp',
        '--validation-patience',
        type=int,
        default=10,
        help=('number of iterations of validation where result'
              ' does not improve before we stop training'),
    )
    train.add_argument(
        '-vmt',
        '--validation-metric',
        default='accuracy',
        help='key into report table for selecting best validation',
    )
    train.add_argument(
        '-vmm',
        '--validation-metric-mode',
        type=str,
        choices=['max', 'min'],
        help='how to optimize validation metric (max or min)',
    )
    train.add_argument(
        '-vcut',
        '--validation-cutoff',
        type=float,
        default=1.0,
        hidden=True,
        help='value at which training will stop if exceeded by metric',
    )
    train.add_argument(
        '-lfc',
        '--load-from-checkpoint',
        type='bool',
        default=True,
        hidden=True,
        help='load model from checkpoint if available',
    )
    train.add_argument(
        '-vshare',
        '--validation-share-agent',
        default=False,
        hidden=True,
        help='use a shared copy of the agent for validation. '
        'this will eventually default to True, but '
        'currently defaults to False.',
    )
    train.add_argument(
        '-mcs',
        '--metrics',
        type=str,
        default='default',
        help='list of metrics to show/compute, e.g. all, default,'
        'or give a list split by , like '
        'ppl,f1,accuracy,hits@1,rouge,bleu'
        'the rouge metrics will be computed as rouge-1, rouge-2 and rouge-l',
    )
    train.add_argument(
        '-micro',
        '--aggregate-micro',
        type='bool',
        default=False,
        help='Report micro-averaged metrics instead of macro averaged metrics.',
        recommended=False,
    )
    TensorboardLogger.add_cmdline_args(parser)

    parser = setup_dict_args(parser)
    return parser
Example #18
0
def main():
    """This task consists of an MTurk agent evaluating a chit-chat model. They
    are asked to chat to the model adopting a specific persona. After their
    conversation, they are asked to evaluate their partner on several metrics.
    """
    argparser = ParlaiParser(False, add_model_args=True)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument(
        '-dp',
        '--datapath',
        default='./',
        help='path to datasets, defaults to current directory')

    opt = argparser.parse_args()

    # add additional model args
    opt['override'] = {
        'no_cuda': True,
        'interactive_mode': True,
        'tensorboard_log': False
    }

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    # append the contents of task_config.py to the configuration
    opt.update(task_config)

    mturk_agent_id = 'Tourist'

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id])

    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits([LOCALE_QUALIF_SDBOX])

        mturk_manager.set_onboard_function(onboard_function=None)

        # mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        # def assign_worker_roles(workers):
        #     for index, worker in enumerate(workers):
        #         worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def assign_worker_roles(workers):
            workers[0].id = mturk_agent_id

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]
            # workers[0].assignment_generator = assignment_generator

            world = MultiWozEvalWorld(opt=opt, agent=workers[0])

            while not world.episode_done():
                print("parley")
                world.parley()

            print("save data")
            world.save_data()

            print("world shutdown")
            world.shutdown()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #19
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True)
    parser.set_defaults(datatype='valid', )
    return parser
Example #20
0
def main():
    # Get command line arguments
    parser = ParlaiParser(add_model_args=True)
    DictionaryAgent.add_cmdline_args(parser)
    Seq2seqAgent.add_cmdline_args(parser)
    parser.add_argument('--dict-maxexs', default=100000, type=int)
    opt = parser.parse_args()

    # set model_file if none set, default is based on task name
    if not opt['model_file']:
        logdir = os.path.join(opt['parlai_home'], 'logs')
        bld.make_dir(logdir)
        task_short = opt['task'].lower()[:30]
        opt['model_file'] = os.path.join(logdir, task_short + '.model')

    #
    opt['cuda'] = not opt['no_cuda'] and torch.cuda.is_available()
    if opt['cuda']:
        print('[ Using CUDA ]')
        torch.cuda.set_device(opt['gpu'])

    # set up dictionary
    print('Setting up dictionary.')
    if '.model' in opt['model_file']:
        dict_fn = opt['model_file'].replace('.model', '.dict')
    else:
        dict_fn = opt['model_file'] + '.dict'
    if os.path.isfile(dict_fn):
        opt['dict_loadpath'] = dict_fn
    dictionary = DictionaryAgent(opt)
    ordered_opt = copy.deepcopy(opt)
    cnt = 0

    # if dictionary was not loaded, create one
    if not opt.get('dict_loadpath'):
        for datatype in ['train:ordered', 'valid']:
            # we use train and valid sets to build dictionary
            ordered_opt['datatype'] = datatype
            ordered_opt['numthreads'] = 1
            ordered_opt['batchsize'] = 1
            world_dict = create_task(ordered_opt, dictionary)

            # pass examples to dictionary
            for _ in world_dict:
                cnt += 1
                if cnt > opt['dict_maxexs'] and opt['dict_maxexs'] > 0:
                    print('Processed {} exs, moving on.'.format(
                          opt['dict_maxexs']))
                    # don't wait too long...
                    break
                world_dict.parley()
        dictionary.save(dict_fn, sort=True)

    # create agent
    agent = Seq2seqAgent(opt, {'dictionary': dictionary})

    if os.path.isfile(opt['model_file']):
        print('Loading existing model parameters from ' + opt['model_file'])
        agent.load(opt['model_file'])

    # create train and validation worlds
    opt['datatype'] = 'train'
    world_train = create_task(opt, agent)

    opt['datatype'] = 'valid'
    world_valid = create_task(opt, agent)

    # set up logging
    start = time.time()
    best_accuracy = 0
    if '.model' in opt['model_file']:
        valid_fn = opt['model_file'].replace('.model', '.validations')
        log_fn = opt['model_file'].replace('.model', '.log')
    else:
        valid_fn = opt['model_file'] + '.validations'
        log_fn = opt['model_file'] + '.log'

    # train / valid loop
    total = 0
    with open(valid_fn, 'w') as validations, open(log_fn, 'w') as log:
        while True:
            # train for a bit
            print('[ training ]')
            world_train.reset()
            for _ in range(200):
                world_train.parley()
                total += opt['batchsize']
            log.write('[ training example. ]\n')
            log.write(world_train.display() + '\n')

            # log training results
            print('[ training summary. ]')
            log.write('[ training summary. ]\n')
            report_train = world_train.report()
            report_train['cumulative_total'] = total
            print(report_train)
            log.write(str(report_train))
            log.write('\n')
            log.flush()

            # do one epoch of validation
            print('[ validating ]')
            world_valid.reset()
            for _ in world_valid:  # check valid accuracy
                world_valid.parley()
            log.write('[ validation example. ]\n')
            log.write(world_valid.display() + '\n')

            # get validation summary
            print('[ validation summary. ]')
            log.write('[ validation summary. ]\n')
            report_valid = world_valid.report()

            # update best accuracy if applicable
            annotation = ''
            if report_valid['accuracy'] > best_accuracy:
                best_accuracy = report_valid['accuracy']
                agent.save(opt['model_file'])
                annotation = '*'  # mark this validation as a best one
            curr_time = time.strftime('%Y/%m/%d %H:%M:%S', time.localtime())
            validations.write('{}: {} {}\n'.format(
                curr_time, report_valid['accuracy'], annotation))
            validations.flush()
            report_valid['best_accuracy'] = best_accuracy

            # log validation summary
            print(report_valid)
            log.write(str(report_valid))
            log.write('\n')
            log.flush()

            # break if accuracy reaches ~100%
            if report_valid['accuracy'] > 99.5:
                break

    print('finished in {} s'.format(round(time.time() - start, 2)))
Example #21
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()

    # The dialog model we want to evaluate
    from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent
    IrBaselineAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    # The task that we will evaluate the dialog model on
    task_opt = {}
    task_opt['datatype'] = 'test'
    task_opt['datapath'] = opt['datapath']
    task_opt['task'] = '#MovieDD-Reddit'

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id])
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = ModelEvaluatorOnboardWorld(opt=opt, mturk_agent=worker)
            while not world.episode_done():
                world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(worker):
            worker[0].id = mturk_agent_id

        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            mturk_agent = workers[0]

            model_agent = IrBaselineAgent(opt=opt)

            world = ModelEvaluatorWorld(opt=opt,
                                        model_agent=model_agent,
                                        task_opt=task_opt,
                                        mturk_agent=mturk_agent)

            while not world.episode_done():
                world.parley()
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)
    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Example #22
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.set_defaults(
        model='transformer/generatorMMI',
        model_file='/Users/lexine/Documents/DLforDialog/ParlAI/tmp/transMMI',
        dict_file='/Users/lexine/Documents/DLforDialog/ParlAI/tmp/transMMI.dict',
        inference='beam',
        beam_size=20
    )
    # The dialog model we want to evaluate
    from parlai.agents.transformer.generatorMMI import GeneratorMMIAgent

    GeneratorMMIAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt['override'] = {
        'no_cuda': True,
        'interactive_mode': True,
        'tensorboard_log': False,
    }
    #opt['is_sandbox']=False
    opt.update(task_config)
    # The task that we will evaluate the dialog model on
    task_opt = {}
    task_opt['datatype'] = 'test'
    task_opt['datapath'] = opt['datapath']
    task_opt['task'] = '#DailyDialog'
    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id])
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.ready_to_accept_workers()
        mturk_manager.create_hits()
        def run_onboard(worker):
            world = ModelEvaluatorOnboardWorld(opt=opt, mturk_agent=worker)
            while not world.episode_done():
                world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(worker):
            worker[0].id = mturk_agent_id

        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            mturk_agent = workers[0]
            model_agent = _agent = create_agent(opt)
            world = ModelEvaluatorWorld(
                opt=opt,
                model_agent=model_agent,
                task_opt=task_opt,
                mturk_agent=mturk_agent,
            )
            #while not world.episode_done():
            for i in range(10):
                world.parley()
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation,
        )
    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
def setup_args():
    """
    Set up args.
    """
    parser = ParlaiParser(False, False)
    parser.add_parlai_data_path()
    cc = parser.add_argument_group('Download Support Docs')
    cc.add_argument(
        '-nw',
        '--slsize',
        default=716,
        type=int,
        metavar='N',
        help='number of wet files in a slice',
    )
    cc.add_argument(
        '-ns',
        '--slnum',
        default=0,
        type=int,
        metavar='N',
        help='commoncrawl slice number [0, ..., 71520 / args.slsize]',
    )
    cc.add_argument(
        '-wf',
        '--wet_urls',
        default='pre_computed/wet.paths',
        type=str,
        help='path from data folder to file containing WET file URLs',
    )
    cc.add_argument(
        '-sr_l',
        '--subreddit_names',
        default='["explainlikeimfive"]',
        type=str,
        help='subreddit names',
    )
    cc.add_argument(
        '-nu',
        '--n_urls',
        default=100,
        type=int,
        metavar='N',
        help='number of support documents to gather for each example',
    )
    cc.add_argument(
        '-sfq',
        '--save_freq',
        default=50,
        type=int,
        metavar='N',
        help='how often are results written to file',
    )
    cc.add_argument(
        '-o',
        '--output_dir',
        default='eli5',
        type=str,
        help='where to save the output in data folder',
    )
    cc.add_argument(
        '-u',
        '--urls',
        type=str,
        help='path to a json file of URLs to gather (in a list format)',
    )
    cc.add_argument(
        '-ids',
        '--ccuids',
        type=str,
        help=
        'path to a json file of Common Crawl IDs to gather (in a list format)',
    )
    return parser.parse_args()
Example #24
0
def make_flags(from_argv=False):
    """ Add arguments to parser and either parse from commandline or initialize
    to defaults (for overriding in scripts)
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument(
        '--dialogs_path',
        type=str,
        default=None,
        help='path to folder with conversation log files for evaluation')
    argparser.add_argument('--out_file',
                           type=str,
                           default=None,
                           help='path to file to write worker data')
    argparser.add_argument(
        '--bad_worker_file',
        type=str,
        default=None,
        help='(optional) path to file with workers to exclude')
    argparser.add_argument('--bonus_file',
                           type=str,
                           default=None,
                           help='(optional) path to file with bonuses awarded')

    argparser.add_argument(
        '--annotations_per_pair',
        type=int,
        default=1,
        help='Number of annotations per conversation comparison pair')
    argparser.add_argument(
        '--pair_data',
        type=list,
        default=None,
        help=
        'list of (conv1, conv2, hit, desc) (for pre-chosen pairs, e.g. for replicating previous experiments)'
    )
    argparser.add_argument(
        '--onboard_pair_data',
        type=list,
        default=None,
        help=
        'list of (conv1, conv2, hit, desc) onboarding tasks (for pre-chosen pairs, e.g. for replicating previous experiments)'
    )
    argparser.add_argument('--s1_choice',
                           type=str,
                           default='',
                           help='text next to speaker 1 radio button')
    argparser.add_argument('--s2_choice',
                           type=str,
                           default='',
                           help='text next to speaker 2 radio button')
    argparser.add_argument('--mode',
                           type=str,
                           choices=['precision', 'recall'],
                           default='precision',
                           help='HIT task type')
    argparser.add_argument(
        '--question',
        type=str,
        default='Is the sentence supported by the article?',
        help=
        'question to present to turker for comparison (e.g. "Which speaker is better?")'
    )
    argparser.add_argument(
        '--correctness_is_flipped',
        default=False,
        action='store_true',
        help=
        'question phrasing flips the better model - e.g. question is "Which speaker is more boring?"'
    )
    argparser.add_argument(
        '--model_comparisons',
        type=str,
        help=
        'list of model pairs to compare, comma separated. E.g. ["transformer,human_eval"] '
    )
    argparser.add_argument(
        '--pairs_per_matchup',
        type=int,
        default=160,
        help='Number of conversation pairs to generate for the comparison')
    argparser.add_argument(
        '--num_onboarding_tasks',
        type=int,
        default=5,
        help='Number of onboarding tasks total to screen workers with')
    argparser.add_argument('--block_on_onboarding',
                           action='store_false',
                           help='whether to block on onboarding failure')
    argparser.add_argument('--block_qualification',
                           type=str,
                           default='testytestytest',
                           help='unique name of block for this job')
    argparser.add_argument(
        '--onboarding_tasks',
        type=list,
        default=None,
        help=
        'onboarding tasks to screen workers with, list of (conv1id, conv2id, matchup) tuples'
    )
    argparser.add_argument(
        '--onboarding_model_comparison',
        type=str,
        default='greedy_model,human_eval',
        help=
        'models to compare for the onboarding task. E.g. "greedy,human_eval" ')
    argparser.add_argument('--comparisons_per_hit',
                           type=int,
                           default=5,
                           help='number of comparisons to do per hit')
    argparser.add_argument(
        '--onboarding_threshold',
        type=float,
        default=.75,
        help='minimum accuracy on onboarding tasks, as a float 0-1.0')
    argparser.add_argument('--seed',
                           type=int,
                           default=42,
                           help='np.random seed')
    argparser.set_defaults(allowed_conversation=1)
    if from_argv:
        return argparser.parse_args()
    else:
        return argparser.parse_args(args=[])
Example #25
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True, 'Train a model')
    train = parser.add_argument_group('Training Loop Arguments')
    train.add_argument('-et',
                       '--evaltask',
                       help=('task to use for valid/test (defaults to the '
                             'one used for training if not set)'))
    train.add_argument('--eval-batchsize',
                       type=int,
                       help='Eval time batch size (defaults to same as -bs)')
    train.add_argument('--display-examples', type='bool', default=False)
    train.add_argument('-eps', '--num-epochs', type=float, default=-1)
    train.add_argument('-ttim', '--max-train-time', type=float, default=-1)
    train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    train.add_argument('-vtim',
                       '--validation-every-n-secs',
                       type=float,
                       default=-1,
                       help='Validate every n seconds. Whenever the the best '
                       'validation metric is found, saves the model to '
                       'the model_file path if set.')
    train.add_argument('-stim',
                       '--save-every-n-secs',
                       type=float,
                       default=-1,
                       help='Saves the model to model_file.checkpoint after '
                       'every n seconds (default -1, never).')
    train.add_argument('-sval',
                       '--save-after-valid',
                       type='bool',
                       default=False,
                       help='Saves the model to model_file.checkpoint after '
                       'every validation (default %(default)s).')
    train.add_argument('-veps',
                       '--validation-every-n-epochs',
                       type=float,
                       default=-1,
                       help='Validate every n epochs. Whenever the the best '
                       'validation metric is found, saves the model to '
                       'the model_file path if set.')
    train.add_argument('-vme',
                       '--validation-max-exs',
                       type=int,
                       default=-1,
                       help='max examples to use during validation (default '
                       '-1 uses all)')
    train.add_argument('-vp',
                       '--validation-patience',
                       type=int,
                       default=10,
                       help=('number of iterations of validation where result'
                             ' does not improve before we stop training'))
    train.add_argument('-vmt',
                       '--validation-metric',
                       default='accuracy',
                       help='key into report table for selecting best '
                       'validation')
    train.add_argument('-vmm',
                       '--validation-metric-mode',
                       default='max',
                       type=str,
                       choices=['max', 'min'],
                       help='how to optimize validation metric (max or min)')
    train.add_argument('-vcut',
                       '--validation-cutoff',
                       type=float,
                       default=1.0,
                       help='value at which training will stop if exceeded by '
                       'training metric')
    train.add_argument('-dbf',
                       '--dict-build-first',
                       type='bool',
                       default=True,
                       help='build dictionary first before training agent')
    train.add_argument('-lfc',
                       '--load-from-checkpoint',
                       type='bool',
                       default=False,
                       help='load model from checkpoint if available')
    train.add_argument('-vshare',
                       '--validation-share-agent',
                       default=False,
                       help='use a shared copy of the agent for validation. '
                       'this will eventually default to True, but '
                       'currently defaults to False.')
    TensorboardLogger.add_cmdline_args(parser)
    parser = setup_dict_args(parser)
    return parser
Example #26
0
def setup_args():
    return ParlaiParser(True, True)
Example #27
0
def superscript_main(args=None):
    """
    Superscript is a loader for all the other scripts.
    """
    setup_script_registry()

    parser = _SupercommandParser(False,
                                 False,
                                 formatter_class=_SuperscriptHelpFormatter)
    parser.add_argument(
        '--helpall',
        action='helpall',
        help='show all commands, including advanced ones.',
    )
    parser.set_defaults(super_command=None)
    subparsers = parser.add_subparsers(parser_class=_SubcommandParser,
                                       title="Commands",
                                       metavar="COMMAND")
    hparser = subparsers.add_parser(
        'help',
        aliases=['h'],
        help=argparse.SUPPRESS,
        description="List the main commands",
    )
    hparser.set_defaults(super_command='help')
    hparser = subparsers.add_parser(
        'helpall',
        help=argparse.SUPPRESS,
        description="List all commands, including advanced ones.",
    )
    hparser.set_defaults(super_command='helpall')

    # build the supercommand
    for script_name, registration in SCRIPT_REGISTRY.items():
        logging.verbose(f"Discovered command {script_name}")
        script_parser = registration.klass.setup_args()
        if script_parser is None:
            # user didn't bother defining command line args. let's just fill
            # in for them
            script_parser = ParlaiParser(False, False)
        help_ = argparse.SUPPRESS if registration.hidden else script_parser.description
        subparser = subparsers.add_parser(
            script_name,
            aliases=registration.aliases,
            help=help_,
            description=script_parser.description,
            formatter_class=CustomHelpFormatter,
        )
        subparser.set_defaults(
            # carries the name of the full command so we know what to execute
            super_command=script_name,
            # used in ParlAI parser to find CLI options set by user
            _subparser=subparser,
        )
        subparser.set_defaults(**script_parser._defaults)
        for action in script_parser._actions:
            subparser._add_action(action)
        for action_group in script_parser._action_groups:
            subparser._action_groups.append(action_group)

    try:
        import argcomplete

        argcomplete.autocomplete(parser)
    except ModuleNotFoundError:
        pass

    opt = parser.parse_args(args)
    cmd = opt.pop('super_command')
    if cmd == 'helpall':
        parser.print_helpall()
    elif cmd == 'help' or cmd is None:
        parser.print_help()
    elif cmd is not None:
        SCRIPT_REGISTRY[cmd].klass._run_from_parser_and_opt(opt, parser)
Example #28
0
from download_models import build
from parlai.core.params import ParlaiParser
from examples.interactive import interactive
from projects.personachat.persona_seq2seq import PersonachatSeqseqAgentSplit
'''Interact with pre-trained model
Profile memory model trained on personachat using persona 'self'
Run from ParlAI directory
'''

if __name__ == '__main__':
    parser = ParlaiParser(add_model_args=True)
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    parser.set_defaults(
        task='parlai.agents.local_human.local_human:LocalHumanAgent',
        model=
        'projects.personachat.persona_seq2seq:PersonachatSeqseqAgentSplit',
        model_file=
        'models:personachat/profile_memory/profilememory_learnreweight_sharelt_encdropout0.4_s2s_usepersona_self_useall_attn_general_lstm_1024_1_1e-3_0.1',
    )
    PersonachatSeqseqAgentSplit.add_cmdline_args(parser)

    opt = parser.parse_args()
    opt['model_type'] = 'profile_memory'  # for builder
    # build all profile memory models
    fnames = [
        'profilememory_mem2_reweight_sharelt_encdropout0.2_selfpersona_useall_attn_general_lstm_1024_1_1e-3_0.1',
        'profilememory_learnreweight_sharelt_encdropout0.4_s2s_usepersona_self_useall_attn_general_lstm_1024_1_1e-3_0.1',
        'fulldict.dict'
    ]
    build(opt, fnames)
Example #29
0
    def __init__(self, datapath: str = None):
        """
        Get data from external sources and build data representation.
        """
        import parlai.core.build_data as build_data
        from parlai.core.dict import DictionaryAgent

        self.tokenize = DictionaryAgent.split_tokenize

        def _path():
            # Build the data if it doesn't exist.
            build()
            return os.path.join(
                self.datapath, 'OffensiveLanguage', 'OffensiveLanguage.txt'
            )

        def build():
            version = 'v1.0'
            dpath = os.path.join(self.datapath, 'OffensiveLanguage')
            if not build_data.built(dpath, version):
                logging.info(f'building data: {dpath}')
                if build_data.built(dpath):
                    # An older version exists, so remove these outdated files.
                    build_data.remove_dir(dpath)
                build_data.make_dir(dpath)

                # Download the data.
                fname = 'OffensiveLanguage.txt'
                url = 'http://parl.ai/downloads/offensive_language/' + fname
                build_data.download(url, dpath, fname)

                # Mark the data as built.
                build_data.mark_done(dpath, version)

        if datapath is None:
            from parlai.core.params import ParlaiParser

            parser = ParlaiParser(False, False)
            self.datapath = os.path.join(parser.parlai_home, 'data')
        else:
            self.datapath = datapath
        self.datafile = _path()

        # store a token trie: e.g.
        # {'2': {'girls': {'1': {'cup': {'__END__': True}}}}
        self.END = '__END__'
        self.max_len = 1
        self.offensive_trie = {}
        self.word_prefixes = [
            'de',
            'de-',
            'dis',
            'dis-',
            'ex',
            'ex-',
            'mis',
            'mis-',
            'pre',
            'pre-',
            'non',
            'non-',
            'semi',
            'semi-',
            'sub',
            'sub-',
            'un',
            'un-',
        ]
        self.word_suffixes = [
            'a',
            'able',
            'as',
            'dom',
            'ed',
            'er',
            'ers',
            'ery',
            'es',
            'est',
            'ful',
            'fy',
            'ies',
            'ify',
            'in',
            'ing',
            'ish',
            'less',
            'ly',
            's',
            'y',
        ]
        self.allow_list = [
            'butter',
            'buttery',
            'spicy',
            'spiced',
            'spices',
            'spicier',
            'spicing',
            'twinkies',
        ]

        with open(self.datafile, 'r') as f:
            for p in f.read().splitlines():
                mod_ps = [p]
                mod_ps += [pref + p for pref in self.word_prefixes]
                mod_ps += [p + suff for suff in self.word_suffixes]
                for mod_p in mod_ps:
                    if mod_p not in self.allow_list:
                        self.add_phrase(mod_p)
Example #30
0
def setup_args():
    from parlai.core.params import ParlaiParser

    parser = ParlaiParser(True, True, 'Builds a pytorch data file.')
    parser.add_pytorch_datateacher_args()
    return dict_setup(parser)