Ejemplo n.º 1
0
def setup_args():
    parser = ParlaiParser(True, True)
    train = parser.add_argument_group('Training Loop Arguments')
    train.add_argument('-et', '--evaltask',
                       help=('task to use for valid/test (defaults to the '
                             'one used for training if not set)'))
    train.add_argument('-d', '--display-examples',
                       type='bool', default=False)
    train.add_argument('-e', '--num-epochs', type=float, default=-1)
    train.add_argument('-ttim', '--max-train-time',
                       type=float, default=-1)
    train.add_argument('-ltim', '--log-every-n-secs',
                       type=float, default=2)
    train.add_argument('-vtim', '--validation-every-n-secs',
                       type=float, default=-1)
    train.add_argument('-vme', '--validation-max-exs',
                       type=int, default=-1,
                       help='max examples to use during validation (default '
                            '-1 uses all)')
    train.add_argument('-vp', '--validation-patience',
                       type=int, default=10,
                       help=('number of iterations of validation where result'
                             ' does not improve before we stop training'))
    train.add_argument('-vmt', '--validation-metric', default='accuracy',
                       help='key into report table for selecting best '
                            'validation')
    train.add_argument('-dbf', '--dict-build-first',
                       type='bool', default=True,
                       help='build dictionary first before training agent')
    return parser
Ejemplo n.º 2
0
def main():
    # Get command line arguments
    parser = ParlaiParser()
    parser.add_argument('-n', '--num-examples', default=10)
    opt = parser.parse_args()

    agent = Agent(opt)

    opt['datatype'] = 'train'
    world_train = create_task(opt, agent)

    opt['datatype'] = 'valid'
    world_valid = create_task(opt, agent)

    start = time.time()
    # train / valid loop
    for _ in range(1):
        print('[ training ]')
        for _ in range(10):  # train for a bit
            world_train.parley()

        print('[ training summary. ]')
        print(world_train.report())

        print('[ validating ]')
        for _ in range(1):  # check valid accuracy
            world_valid.parley()

        print('[ validation summary. ]')
        print(world_valid.report())

    print('finished in {} s'.format(round(time.time() - start, 2)))
Ejemplo n.º 3
0
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser(True, True)
    RemoteAgentAgent.add_cmdline_args(parser)
    opt = parser.parse_args()

    remote = RemoteAgentAgent(opt)
    if opt.get('task'):
        world = create_task(opt, [remote])
    else:
        if opt.get('model'):
            local = create_agent(opt)
        else:
            local = LocalHumanAgent(opt)
        # the remote-host goes **second**
        agents = [local, remote] if not opt['remote_host'] else [remote, local]
        world = DialogPartnerWorld(opt, agents)


    # Talk to the remote agent
    with world:
        while True:
            world.parley()
            print(world.display())
Ejemplo n.º 4
0
    def test_basic_parse(self):
        """Check that the dictionary is correctly adding and parsing short
        sentence.
        """
        from parlai.core.dict import DictionaryAgent
        from parlai.core.params import ParlaiParser

        argparser = ParlaiParser()
        DictionaryAgent.add_cmdline_args(argparser)
        opt = argparser.parse_args()
        dictionary = DictionaryAgent(opt)
        num_builtin = len(dictionary)

        dictionary.observe({'text': 'hello world'})
        dictionary.act()
        assert len(dictionary) - num_builtin == 2

        vec = dictionary.parse('hello world')
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1

        vec = dictionary.parse('hello world', vec_type=list)
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1

        vec = dictionary.parse('hello world', vec_type=tuple)
        assert len(vec) == 2
        assert vec[0] == num_builtin
        assert vec[1] == num_builtin + 1
Ejemplo n.º 5
0
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser()
    parser.add_argument('-n', '--num-examples', default=10, type=int)
    opt = parser.parse_args()

    display_data(opt)
Ejemplo n.º 6
0
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser(True, True)
    parser.add_argument('-n', '--num-examples', default=100000000)
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.set_defaults(datatype='valid')
    opt = parser.parse_args(print_args=False)

    eval_model(opt, parser)
Ejemplo n.º 7
0
def main():
    parser = ParlaiParser(True, True)
    ConvAIWorld.add_cmdline_args(parser)
    opt = parser.parse_args()

    agent = ConvAISampleAgent(opt)
    world = ConvAIWorld(opt, [agent])

    while True:
        try:
            world.parley()
        except Exception as e:
            print('Exception: {}'.format(e))
Ejemplo n.º 8
0
def main():
    # Get command line arguments
    argparser = ParlaiParser(True, True)
    build = argparser.add_argument_group('Data Building Args')
    build.add_argument('--datafile',
                       help=('The file to be loaded, preprocessed, and saved'))
    build.add_argument('--pytorch-buildteacher', type=str, default='',
        help='Which teacher to use when building the pytorch data')
    build.add_argument('--pytorch-preprocess', type='bool', default=True,
        help='Whether the agent should preprocess the data while building'
             'the pytorch data')
    opt = argparser.parse_args()
    build_data(opt)
Ejemplo n.º 9
0
    def test_fvqa(self):
        from parlai.core.params import ParlaiParser
        parser = ParlaiParser()
        parser.add_task_args(['-t', 'fvqa'])
        opt = parser.parse_args(args=self.args)

        from parlai.tasks.fvqa.agents import DefaultTeacher
        for dt in ['train:ordered', 'test']:
            opt['datatype'] = dt

            teacher = DefaultTeacher(opt)
            reply = teacher.act()
            check(opt, reply)

        shutil.rmtree(self.TMP_PATH)
Ejemplo n.º 10
0
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser(True, True)
    parser.add_argument('-n', '--num-examples', default=10)
    opt = parser.parse_args()

    # Create model and assign it to the specified task
    agent = create_agent(opt)
    world = create_task(opt, agent)

    # Show some example dialogs.
    with world:
        for k in range(int(opt['num_examples'])):
            world.parley()
            print(world.display() + "\n~~")
            if world.epoch_done():
                print("EPOCH DONE")
                break
Ejemplo n.º 11
0
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser()
    parser.add_argument('-n', '--num-examples', default=10, type=int)
    opt = parser.parse_args()

    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)

    # Show some example dialogs.
    with world:
        for _ in range(opt['num_examples']):
            world.parley()
            print(world.display() + '\n~~')
            if world.epoch_done():
                print('EPOCH DONE')
                break
Ejemplo n.º 12
0
    def __init__(self, args=None, **kwargs):
        """Initializes the predictor, setting up opt automatically if necessary.

        Args is expected to be in the same format as sys.argv: e.g. a list in
        the form ['--model', 'seq2seq', '-hs', 128, '-lr', 0.5].

        kwargs is interpreted by appending '--' to it and replacing underscores
        with hyphens, so 'dict_file=/tmp/dict.tsv' would be interpreted as
        '--dict-file /tmp/dict.tsv'.
        """
        from parlai.core.params import ParlaiParser
        from parlai.core.agents import create_agent

        if args is None:
            args = []
        for k, v in kwargs.items():
            args.append('--' + str(k).replace('_', '-'))
            args.append(str(v))
        parser = ParlaiParser(True, True, model_argv=args)
        self.opt = parser.parse_args(args)
        self.agent = create_agent(self.opt)
Ejemplo n.º 13
0
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser()
    parser.add_argument('-n', '--num-examples', default=10)
    parser.set_defaults(datatype='train:ordered')

    ImageLoader.add_cmdline_args(parser)
    opt = parser.parse_args()

    opt['no_cuda'] = False
    opt['gpu'] = 0
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)

    # Show some example dialogs.
    with world:
        for k in range(int(opt['num_examples'])):
            world.parley()
            print(world.display() + '\n~~')
            if world.epoch_done():
                print('EPOCH DONE')
                break
Ejemplo n.º 14
0
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser(True, True)
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    opt = parser.parse_args()
    opt['task'] = 'parlai.agents.local_human.local_human:LocalHumanAgent'
    print(opt)
    # Create model and assign it to the specified task
    agent = create_agent(opt)
    world = create_task(opt, agent)

    # Show some example dialogs:
    while True:
        world.parley()
        if opt['display_examples']:
            print("---")
            print(world.display() + "\n~~")
        if world.epoch_done():
            print("EPOCH DONE")
            break
Ejemplo n.º 15
0
def main():
    # Get command line arguments
    parser = ParlaiParser(True, False)
    parser.set_defaults(datatype='train:ordered')

    opt = parser.parse_args()
    bsz = opt.get('batchsize', 1)
    opt['no_cuda'] = False
    opt['gpu'] = 0
    opt['num_epochs'] = 1
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)

    logger = ProgressLogger(should_humanize=False)
    print("Beginning image extraction...")
    exs_seen = 0
    total_exs = world.num_examples()
    while not world.epoch_done():
        world.parley()
        exs_seen += bsz
        logger.log(exs_seen, total_exs)
    print("Finished extracting images")
Ejemplo n.º 16
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_messenger_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))

    # Initialize a SQuAD teacher agent, which we will get context from
    module_name = 'parlai.tasks.squad.agents'
    class_name = 'DefaultTeacher'
    my_module = importlib.import_module(module_name)
    task_class = getattr(my_module, class_name)
    task_opt = {}
    task_opt['datatype'] = 'train'
    task_opt['datapath'] = opt['datapath']

    messenger_manager = MessengerManager(opt=opt)
    messenger_manager.setup_server()
    messenger_manager.init_new_state()

    def get_overworld(agent):
        return MessengerOverworld(None, agent)

    def assign_agent_role(agent):
        agent[0].disp_id = 'Agent'

    def run_conversation(manager, opt, agents, task_id):
        task = task_class(task_opt)
        agent = agents[0]
        world = QADataCollectionWorld(
            opt=opt,
            task=task,
            agent=agent
        )
        while not world.episode_done():
            world.parley()
        world.shutdown()

    # World with no onboarding
    messenger_manager.set_onboard_functions({'default': None})
    task_functions = {'default': run_conversation}
    assign_agent_roles = {'default': assign_agent_role}
    messenger_manager.set_agents_required({'default': 1})

    messenger_manager.set_overworld_func(get_overworld)
    messenger_manager.setup_socket()
    try:
        messenger_manager.start_new_run()
        messenger_manager.start_task(
            assign_role_functions=assign_agent_roles,
            task_functions=task_functions,
        )
    except BaseException:
        raise
    finally:
        messenger_manager.shutdown()
Ejemplo n.º 17
0
def main():
    random.seed(42)

    # Get command line arguments
    parser = ParlaiParser(True, True)
    parser.add_argument('-n', '--num-examples', default=100000000)
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    parser.set_defaults(datatype='valid')
    opt = parser.parse_args()
    # Create model and assign it to the specified task
    agent = create_agent(opt)
    world = create_task(opt, agent)

    # Show some example dialogs:
    for k in range(int(opt['num_examples'])):
        world.parley()
        print("---")
        if opt['display_examples']:
            print(world.display() + "\n~~")
        print(world.report())
        if world.epoch_done():
            print("EPOCH DONE")
            break
    world.shutdown()
Ejemplo n.º 18
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()

    # The dialog model we want to evaluate
    from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent
    IrBaselineAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    # The task that we will evaluate the dialog model on
    task_opt = {}
    task_opt['datatype'] = 'test'
    task_opt['datapath'] = opt['datapath']
    task_opt['task'] = '#MovieDD-Reddit'

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids = [mturk_agent_id],
        all_agent_ids = [ModelEvaluatorWorld.evaluator_agent_id, mturk_agent_id] # In speaking order
    )
    mturk_manager.init_aws(opt=opt)
    
    global run_hit
    def run_hit(hit_index, assignment_index, opt, task_opt, mturk_manager):
        conversation_id = str(hit_index) + '_' + str(assignment_index)

        model_agent = IrBaselineAgent(opt=opt)
        # Create the MTurk agent which provides a chat interface to the Turker
        mturk_agent = MTurkAgent(id=mturk_agent_id, manager=mturk_manager, conversation_id=conversation_id, opt=opt)
        world = ModelEvaluatorWorld(opt=opt, model_agent=model_agent, task_opt=task_opt, mturk_agent=mturk_agent)

        while not world.episode_done():
            world.parley()
        world.shutdown()
        world.review_work()

    mturk_manager.create_hits(opt=opt)
    results = Parallel(n_jobs=opt['num_hits'] * opt['num_assignments'], backend='threading') \
                (delayed(run_hit)(hit_index, assignment_index, opt, task_opt, mturk_manager) \
                    for hit_index, assignment_index in product(range(1, opt['num_hits']+1), range(1, opt['num_assignments']+1)))    
    mturk_manager.shutdown()
Ejemplo n.º 19
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    mturk_agent_1_id = 'mturk_agent_1'
    mturk_agent_2_id = 'mturk_agent_2'
    human_agent_1_id = 'human_1'
    human_agent_2_id = 'human_2'
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id],
        all_agent_ids = [human_agent_1_id, human_agent_2_id, mturk_agent_1_id, mturk_agent_2_id] # In speaking order
    )
    mturk_manager.init_aws(opt=opt)

    global run_hit
    def run_hit(hit_index, assignment_index, opt, mturk_manager):
        conversation_id = str(hit_index) + '_' + str(assignment_index)

        # Create mturk agents
        mturk_agent_1 = MTurkAgent(id=mturk_agent_1_id, manager=mturk_manager, conversation_id=conversation_id, opt=opt)
        mturk_agent_2 = MTurkAgent(id=mturk_agent_2_id, manager=mturk_manager, conversation_id=conversation_id, opt=opt)

        # Create the local human agents
        human_agent_1 = LocalHumanAgent(opt=None)
        human_agent_1.id = human_agent_1_id
        human_agent_2 = LocalHumanAgent(opt=None)
        human_agent_2.id = human_agent_2_id

        world = MultiAgentDialogWorld(opt=opt, agents=[human_agent_1, human_agent_2, mturk_agent_1, mturk_agent_2])

        while not world.episode_done():
            world.parley()
        world.shutdown()

    mturk_manager.create_hits(opt=opt)
    results = Parallel(n_jobs=opt['num_hits'] * opt['num_assignments'], backend='threading') \
                (delayed(run_hit)(hit_index, assignment_index, opt, mturk_manager) \
                    for hit_index, assignment_index in product(range(1, opt['num_hits']+1), range(1, opt['num_assignments']+1)))
    mturk_manager.shutdown()
Ejemplo n.º 20
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    # Initialize a SQuAD teacher agent, which we will get context from
    module_name = 'parlai.tasks.squad.agents'
    class_name = 'DefaultTeacher'
    my_module = importlib.import_module(module_name)
    task_class = getattr(my_module, class_name)
    task_opt = {}
    task_opt['datatype'] = 'train'
    task_opt['datapath'] = opt['datapath']

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids = [mturk_agent_id],
        all_agent_ids = [QADataCollectionWorld.collector_agent_id, mturk_agent_id] # In speaking order
    )
    mturk_manager.init_aws(opt=opt)

    global run_hit
    def run_hit(hit_index, assignment_index, task_class, task_opt, opt, mturk_manager):
        conversation_id = str(hit_index) + '_' + str(assignment_index)

        task = task_class(task_opt)
        # Create the MTurk agent which provides a chat interface to the Turker
        mturk_agent = MTurkAgent(id=mturk_agent_id, manager=mturk_manager, conversation_id=conversation_id, opt=opt)
        world = QADataCollectionWorld(opt=opt, task=task, mturk_agent=mturk_agent)
        while not world.episode_done():
            world.parley()
        world.shutdown()
        world.review_work()

    mturk_manager.create_hits(opt=opt)
    results = Parallel(n_jobs=opt['num_hits'] * opt['num_assignments'], backend='threading') \
                (delayed(run_hit)(hit_index, assignment_index, task_class, task_opt, opt, mturk_manager) \
                    for hit_index, assignment_index in product(range(1, opt['num_hits']+1), range(1, opt['num_assignments']+1)))    
    mturk_manager.shutdown()
Ejemplo n.º 21
0
        stddev = np.std(np.array(accs), dtype=np.float64)
        return acc, acc_len, stddev

    start, end = 0, M
    for train_name in NAMES:
        for valid_name in ['INIT', 'ALL']:
            for round_index in range(5):
                sub_perfs = perfs[start: end]
                acc, acc_len, stddev = get_acc_and_acc_len(sub_perfs)
                print_and_log('{} on {} round{}: acc {} stddev {} acc_len {}'.format(train_name, valid_name, round_index, acc, stddev, acc_len))
                log_only('{} on {} round {}: {}'.format(train_name, valid_name, round_index, sub_perfs))
                start = end
                end = start + M

if __name__ == '__main__':
    argparser = ParlaiParser(False, False)

    # ============ below copied from projects/graph_world2/train.py ============
    argparser.add_arg('--vocab_size', type=int, default=1000)
    argparser.add_arg('--terminate', type=bool, default=False)
    argparser.add_arg('--lr', type=float, default=1e-3)
    argparser.add_arg('--max_seq_in', type=int, default=30)
    argparser.add_arg('--embedding_dim', type=int, default=50)
    argparser.add_arg('--rnn_h', type=int, default=350)
    argparser.add_arg('--rnn_layers', type=int, default=1)
    argparser.add_arg('--cuda', type=bool, default=True)
    argparser.add_arg('--eval_period', type=int, default=200)
    argparser.add_arg('--max_seq_out', type=int, default=5)
    argparser.add_arg('--label_ratio', type=float, default=1.0)
    argparser.add_arg('--max_iter', type=int, default=100000)
    argparser.add_arg('--exit_iter', type=int, default=3000)
Ejemplo n.º 22
0
def main():
    # Get command line arguments
    parser = ParlaiParser(True, True)
    train = parser.add_argument_group('Training Loop Arguments')
    train.add_argument('-et',
                       '--evaltask',
                       help=('task to use for valid/test (defaults to the ' +
                             'one used for training if not set)'))
    train.add_argument('-d', '--display-examples', type='bool', default=False)
    train.add_argument('-e', '--num-epochs', type=float, default=-1)
    train.add_argument('-ttim', '--max-train-time', type=float, default=-1)
    train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    train.add_argument('-vtim',
                       '--validation-every-n-secs',
                       type=float,
                       default=-1)
    train.add_argument('-vme',
                       '--validation-max-exs',
                       type=int,
                       default=-1,
                       help='max examples to use during validation (default ' +
                       '-1 uses all)')
    train.add_argument(
        '-vp',
        '--validation-patience',
        type=int,
        default=5,
        help=('number of iterations of validation where result ' +
              'does not improve before we stop training'))
    train.add_argument('-dbf',
                       '--dict-build-first',
                       type='bool',
                       default=True,
                       help='build dictionary first before training agent')
    opt = parser.parse_args()
    # Possibly build a dictionary (not all models do this).
    if opt['dict_build_first'] and 'dict_file' in opt:
        if opt['dict_file'] is None and opt.get('model_file'):
            opt['dict_file'] = opt['model_file'] + '.dict'
        print("[ building dictionary first... ]")
        build_dict.build_dict(opt)
    # Create model and assign it to the specified task
    agent = create_agent(opt)
    world = create_task(opt, agent)

    train_time = Timer()
    validate_time = Timer()
    log_time = Timer()
    print('[ training... ]')
    parleys = 0
    total_exs = 0
    max_exs = opt['num_epochs'] * len(world)
    max_parleys = math.ceil(max_exs / opt['batchsize'])
    best_accuracy = 0
    impatience = 0
    saved = False
    valid_world = None
    while True:
        world.parley()
        parleys += 1

        if opt['num_epochs'] > 0 and parleys >= max_parleys:
            print('[ num_epochs completed: {} ]'.format(opt['num_epochs']))
            break
        if opt['max_train_time'] > 0 and train_time.time(
        ) > opt['max_train_time']:
            print('[ max_train_time elapsed: {} ]'.format(train_time.time()))
            break
        if opt['log_every_n_secs'] > 0 and log_time.time(
        ) > opt['log_every_n_secs']:
            if opt['display_examples']:
                print(world.display() + '\n~~')

            logs = []
            # time elapsed
            logs.append('time:{}s'.format(math.floor(train_time.time())))
            logs.append('parleys:{}'.format(parleys))

            # get report and update total examples seen so far
            if hasattr(agent, 'report'):
                train_report = agent.report()
                agent.reset_metrics()
            else:
                train_report = world.report()
                world.reset_metrics()

            if hasattr(train_report, 'get') and train_report.get('total'):
                total_exs += train_report['total']
                logs.append('total_exs:{}'.format(total_exs))

            # check if we should log amount of time remaining
            time_left = None
            if opt['num_epochs'] > 0:
                exs_per_sec = train_time.time() / total_exs
                time_left = (max_exs - total_exs) * exs_per_sec
            if opt['max_train_time'] > 0:
                other_time_left = opt['max_train_time'] - train_time.time()
                if time_left is not None:
                    time_left = min(time_left, other_time_left)
                else:
                    time_left = other_time_left
            if time_left is not None:
                logs.append('time_left:{}s'.format(math.floor(time_left)))

            # join log string and add full metrics report to end of log
            log = '[ {} ] {}'.format(' '.join(logs), train_report)

            print(log)
            log_time.reset()

        if (opt['validation_every_n_secs'] > 0
                and validate_time.time() > opt['validation_every_n_secs']):
            valid_report, valid_world = run_eval(agent,
                                                 opt,
                                                 'valid',
                                                 opt['validation_max_exs'],
                                                 valid_world=valid_world)
            if valid_report['accuracy'] > best_accuracy:
                best_accuracy = valid_report['accuracy']
                impatience = 0
                print('[ new best accuracy: ' + str(best_accuracy) + ' ]')
                world.save_agents()
                saved = True
                if best_accuracy == 1:
                    print('[ task solved! stopping. ]')
                    break
            else:
                impatience += 1
                print(
                    '[ did not beat best accuracy: {} impatience: {} ]'.format(
                        round(best_accuracy, 4), impatience))
            validate_time.reset()
            if opt['validation_patience'] > 0 and impatience >= opt[
                    'validation_patience']:
                print('[ ran out of patience! stopping training. ]')
                break
    world.shutdown()
    if not saved:
        world.save_agents()
    else:
        # reload best validation model
        agent = create_agent(opt)

    run_eval(agent, opt, 'valid', write_log=True)
    run_eval(agent, opt, 'test', write_log=True)
Ejemplo n.º 23
0
def main():
    """This task consists of an MTurk agent evaluating a chit-chat model. They
    are asked to chat to the model adopting a specific persona. After their
    conversation, they are asked to evaluate their partner on several metrics.
    """
    argparser = ParlaiParser(False, add_model_args=True)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-mt',
                           '--max-turns',
                           default=10,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('--max-resp-time',
                           default=180,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--max-persona-time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'entering the persona')
    argparser.add_argument('--ag-shutdown-time',
                           default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--persona-type',
                           default='both',
                           type=str,
                           choices=['both', 'self', 'other'],
                           help='Which personas to load from personachat')
    argparser.add_argument('--revised',
                           default=False,
                           type='bool',
                           help='Whether to use revised personas')
    argparser.add_argument('-rt',
                           '--range-turn',
                           default='5,6',
                           help='sample range of number of turns')
    argparser.add_argument('--auto-approve-delay',
                           type=int,
                           default=3600 * 24 * 1,
                           help='how long to wait for  \
                           auto approval')

    # ADD MODEL ARGS HERE (KVMEMNN ADDED AS AN EXAMPLE)
    argparser.set_defaults(
        model='projects.personachat.kvmemnn.kvmemnn:Kvmemnn',
        model_file='models:convai2/kvmemnn/model',
    )
    opt = argparser.parse_args()

    # add additional model args
    opt['no_cuda'] = True
    opt['override'] = ['interactive_mode']
    opt['interactive_mode'] = True

    bot = create_agent(opt)
    shared_bot_params = bot.share()

    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    if 'data_path' not in opt:
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    opt.update(task_config)

    mturk_agent_ids = ['PERSON_1']

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    persona_generator = PersonasGenerator(opt)
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        if not opt['is_sandbox']:
            # ADD BLOCKED WORKERS HERE
            blocked_worker_list = []
            for w in blocked_worker_list:
                mturk_manager.block_worker(
                    w, 'We found that you have unexpected behaviors in our \
                     previous HITs. For more questions please email us.')

        def run_onboard(worker):
            worker.persona_generator = persona_generator
            world = PersonaProfileWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[0]
            conv_idx = mturk_manager.conversation_index
            world = Convai2EvalWorld(
                opt=opt,
                agents=[agents],
                range_turn=[int(s) for s in opt['range_turn'].split(',')],
                max_turn=opt['max_turns'],
                max_resp_time=opt['max_resp_time'],
                model_agent_opt=shared_bot_params,
                world_tag='conversation t_{}'.format(conv_idx),
                agent_timeout_shutdown=opt['ag_shutdown_time'],
            )
            world.reset_random()
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 24
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True, 'Self chat with a model')
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('-d', '--display-examples', type='bool', default=True)
    parser.add_argument(
        '--display-ignore-fields',
        type=str,
        default='label_candidates,text_candidates',
        help='Do not display these fields',
    )
    parser.add_argument(
        '-st',
        '--selfchat-task',
        type='bool',
        default=True,
        help='Create a self chat version of the task',
    )
    parser.add_argument(
        '--num-self-chats', type=int, default=1, help='Number of self chats to run'
    )
    parser.add_argument(
        '--selfchat-max-turns',
        type=int,
        default=6,
        help='The number of dialogue turns before self chat ends',
    )
    parser.add_argument(
        '--seed-messages-from-task',
        action='store_true',
        help='Automatically seed conversation with messages from task dataset.',
    )
    parser.add_argument(
        '--outfile', type=str, default=None, help='File to save self chat logs'
    )
    parser.add_argument(
        '--save-format',
        type=str,
        default='conversations',
        choices=['conversations', 'parlai', 'jsonl'],
        help='Format to save logs in',
    )
    parser.set_defaults(interactive_mode=True, task='self_chat')
    WorldLogger.add_cmdline_args(parser)
    return parser
Ejemplo n.º 25
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    # Initialize a SQuAD teacher agent, which we will get context from
    module_name = 'parlai.tasks.squad.agents'
    class_name = 'DefaultTeacher'
    my_module = importlib.import_module(module_name)
    task_class = getattr(my_module, class_name)
    task_opt = {}
    task_opt['datatype'] = 'train'
    task_opt['datapath'] = opt['datapath']

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=[mturk_agent_id]
    )
    mturk_manager.setup_server()

    def run_onboard(worker):
        world = QADataCollectionOnboardWorld(opt=opt, mturk_agent=worker)
        while not world.episode_done():
            world.parley()
        world.shutdown()

    mturk_manager.set_onboard_function(onboard_function=None)

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        mturk_manager.ready_to_accept_workers()

        def check_workers_eligibility(workers):
            return workers

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        def assign_worker_roles(worker):
            worker[0].id = mturk_agent_id

        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            task = task_class(task_opt)
            mturk_agent = workers[0]
            world = QADataCollectionWorld(
                opt=opt,
                task=task,
                mturk_agent=mturk_agent
            )
            while not world.episode_done():
                world.parley()
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=eligibility_function,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )
    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 26
0
def main():
    # Get command line arguments
    argparser = ParlaiParser()
    DictionaryAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    build_dict(opt)
Ejemplo n.º 27
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True)
    # Get command line arguments
    parser.add_argument('-ne', '--num-examples', type=int, default=-1)
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.add_argument(
        '--metrics',
        type=str,
        default="all",
        help="list of metrics to show/compute, e.g. ppl,f1,accuracy,hits@1."
        "If 'all' is specified [default] all are shown.")
    TensorboardLogger.add_cmdline_args(parser)
    parser.set_defaults(datatype='valid')
    return parser
Ejemplo n.º 28
0
 def add_cmdline_args(
     cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None
 ) -> ParlaiParser:
     """
     Add CLI args.
     """
     super().add_cmdline_args(parser, partial_opt=partial_opt)
     agent = parser.add_argument_group('TorchRankerAgent')
     agent.add_argument(
         '-cands',
         '--candidates',
         type=str,
         default='inline',
         choices=['batch', 'inline', 'fixed', 'batch-all-cands'],
         help='The source of candidates during training '
         '(see TorchRankerAgent._build_candidates() for details).',
     )
     agent.add_argument(
         '-ecands',
         '--eval-candidates',
         type=str,
         default='inline',
         choices=['batch', 'inline', 'fixed', 'vocab', 'batch-all-cands'],
         help='The source of candidates during evaluation (defaults to the same'
         'value as --candidates if no flag is given)',
     )
     agent.add_argument(
         '-icands',
         '--interactive-candidates',
         type=str,
         default='fixed',
         choices=['fixed', 'inline', 'vocab'],
         help='The source of candidates during interactive mode. Since in '
         'interactive mode, batchsize == 1, we cannot use batch candidates.',
     )
     agent.add_argument(
         '--repeat-blocking-heuristic',
         type='bool',
         default=True,
         help='Block repeating previous utterances. '
         'Helpful for many models that score repeats highly, so switched '
         'on by default.',
     )
     agent.add_argument(
         '-fcp',
         '--fixed-candidates-path',
         type=str,
         help='A text file of fixed candidates to use for all examples, one '
         'candidate per line',
     )
     agent.add_argument(
         '--fixed-candidate-vecs',
         type=str,
         default='reuse',
         help='One of "reuse", "replace", or a path to a file with vectors '
         'corresponding to the candidates at --fixed-candidates-path. '
         'The default path is a /path/to/model-file.<cands_name>, where '
         '<cands_name> is the name of the file (not the full path) passed by '
         'the flag --fixed-candidates-path. By default, this file is created '
         'once and reused. To replace it, use the "replace" option.',
     )
     agent.add_argument(
         '--encode-candidate-vecs',
         type='bool',
         default=True,
         help='Cache and save the encoding of the candidate vecs. This '
         'might be used when interacting with the model in real time '
         'or evaluating on fixed candidate set when the encoding of '
         'the candidates is independent of the input.',
     )
     agent.add_argument(
         '--encode-candidate-vecs-batchsize',
         type=int,
         default=256,
         hidden=True,
         help='Batchsize when encoding candidate vecs',
     )
     agent.add_argument(
         '--init-model',
         type=str,
         default=None,
         help='Initialize model with weights from this file.',
     )
     agent.add_argument(
         '--train-predict',
         type='bool',
         default=False,
         help='Get predictions and calculate mean rank during the train '
         'step. Turning this on may slow down training.',
     )
     agent.add_argument(
         '--cap-num-predictions',
         type=int,
         default=100,
         help='Limit to the number of predictions in output.text_candidates',
     )
     agent.add_argument(
         '--ignore-bad-candidates',
         type='bool',
         default=False,
         help='Ignore examples for which the label is not present in the '
         'label candidates. Default behavior results in RuntimeError. ',
     )
     agent.add_argument(
         '--rank-top-k',
         type=int,
         default=-1,
         help='Ranking returns the top k results of k > 0, otherwise sorts every '
         'single candidate according to the ranking.',
     )
     agent.add_argument(
         '--inference',
         choices={'max', 'topk'},
         default='max',
         help='Final response output algorithm',
     )
     agent.add_argument(
         '--topk',
         type=int,
         default=5,
         help='K used in Top K sampling inference, when selected',
     )
     agent.add_argument(
         '--return-cand-scores',
         type='bool',
         default=False,
         help='Return sorted candidate scores from eval_step',
     )
     return parser
Ejemplo n.º 29
0
def main():
    completed_workers = []
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=[mturk_agent_id]
    )
    mturk_manager.setup_server()
    qual_name = 'ParlAIExcludeQual{}t{}'.format(
        random.randint(10000, 99999), random.randint(10000, 99999))
    qual_desc = (
        'Qualification for a worker not correctly completing the '
        'first iteration of a task. Used to filter to different task pools.'
    )
    qualification_id = \
        mturk_utils.find_or_create_qualification(qual_name, qual_desc)
    print('Created qualification: ', qualification_id)

    def run_onboard(worker):
        world = QualificationFlowOnboardWorld(opt, worker)
        while not world.episode_done():
            world.parley()
        world.shutdown()

    mturk_manager.set_onboard_function(onboard_function=run_onboard)

    try:
        mturk_manager.start_new_run()
        agent_qualifications = [{
            'QualificationTypeId': qualification_id,
            'Comparator': 'DoesNotExist',
            'RequiredToPreview': True
        }]
        mturk_manager.create_hits(qualifications=agent_qualifications)

        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(worker):
            worker[0].id = mturk_agent_id

        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            mturk_agent = workers[0]
            world = QualificationFlowSoloWorld(
                opt=opt,
                mturk_agent=mturk_agent,
                qualification_id=qualification_id,
                firstTime=(mturk_agent.worker_id not in completed_workers),
            )
            while not world.episode_done():
                world.parley()
            completed_workers.append(mturk_agent.worker_id)
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )
    except BaseException:
        raise
    finally:
        mturk_utils.delete_qualification(qualification_id)
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 30
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True, 'Self chat with a model')
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('-d', '--display-examples', type='bool', default=True)
    parser.add_argument('-n', '-ne', '--num-examples', type=int, default=10)
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.add_argument(
        '--display-ignore-fields',
        type=str,
        default='label_candidates,text_candidates',
        help='Do not display these fields',
    )
    parser.add_argument(
        '-it',
        '--interactive-task',
        type='bool',
        default=True,
        help='Create interactive version of task',
    )
    parser.add_argument(
        '--selfchat-max-turns',
        type=int,
        default=10,
        help="The number of dialogue turns before self chat ends.",
    )
    parser.add_argument(
        '--seed-messages-from-task',
        action='store_true',
        help="Automatically seed conversation with messages from task dataset.",
    )
    parser.add_argument('--outfile', type=str, default='/tmp/selfchat.json')
    parser.add_argument('--format',
                        type=str,
                        default='json',
                        choices={'parlai', 'json'})
    parser.set_defaults(interactive_mode=True, task='self_chat')
    WorldLogger.add_cmdline_args(parser)
    return parser
Ejemplo n.º 31
0
def main():
    '''Handles setting up and running a ParlAI-MTurk task by instantiating
    an MTurk manager and configuring it for the qa_data_collection task
    '''
    # Get relevant arguments
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()

    # Set the task name to be the folder name
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))

    # append the contents of task_config.py to the configuration
    opt.update(task_config)

    # Initialize a SQuAD teacher agent, which we will get context from
    module_name = 'parlai.tasks.squad.agents'
    class_name = 'DefaultTeacher'
    my_module = importlib.import_module(module_name)
    task_class = getattr(my_module, class_name)
    task_opt = opt.copy()
    task_opt['datatype'] = 'train'
    task_opt['datapath'] = opt['datapath']

    # Select an agent_id that worker agents will be assigned in their world
    mturk_agent_id = 'Worker'

    # Instantiate an MTurkManager with the given options and a maximum number
    # of agents per world of 1 (based on the length of mturk_agent_ids)
    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id])
    mturk_manager.setup_server()

    # Create an onboard_function, which will be run for workers who have
    # accepted your task and must be completed before they are put in the
    # queue for a task world.
    def run_onboard(worker):
        world = QADataCollectionOnboardWorld(opt=opt, mturk_agent=worker)
        while not world.episode_done():
            world.parley()
        world.shutdown()

    # If we want to use the above onboard function, we can replace the below
    # with set_onboard_function(onboard_function=run_onboard)
    mturk_manager.set_onboard_function(onboard_function=None)

    try:
        # Initialize run information
        mturk_manager.start_new_run()

        # Set up the sockets and threads to recieve workers
        mturk_manager.ready_to_accept_workers()

        # Create the hits as specified by command line arguments
        mturk_manager.create_hits()

        # Check workers eligiblity acts as a filter, and should return
        # the list of all workers currently eligible to work on the task
        def check_workers_eligibility(workers):
            return workers

        eligibility_function = {
            'func': check_workers_eligibility,
            'multiple': True,
        }

        # Assign worker roles is used to determine what the role each worker
        # in the given worker list will play. Setting `id` to None will return
        # the worker to the pool rather than putting them in a given task,
        # which is useful for having tasks with different possible worker
        # counts.
        def assign_worker_roles(workers):
            workers[0].id = mturk_agent_id

        # Define the task function, which will be run with workers that are
        # as the main task.
        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            # create a task agent to ask the questions
            task = task_class(task_opt)
            # Create the task world
            world = QADataCollectionWorld(opt=opt,
                                          task=task,
                                          mturk_agent=workers[0])
            # run the world to completion
            while not world.episode_done():
                world.parley()

            # shutdown and review the work
            world.shutdown()
            world.review_work()
            world.save_data()

        # Begin the task, allowing mturk_manager to start running the task
        # world on any workers who connect
        mturk_manager.start_task(eligibility_function=eligibility_function,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)
    except BaseException:
        raise
    finally:
        # Any hits that aren't claimed or completed have to be shut down. Must
        # keep the world running until that point.
        mturk_manager.expire_all_unassigned_hits()
        # Shutdown the manager and free all related resources
        mturk_manager.shutdown()
def _make_argparse_table(class_):
    """
    Build the reStructuredText table containing the args and descriptions.
    """
    readme = []
    parser = ParlaiParser(False, False)
    class_.add_cmdline_args(parser, partial_opt=None)
    # group by whatever ArgumentGroups there are
    for ag in parser._action_groups:
        actions = []
        # get options defined within only this group
        for action in ag._group_actions:
            if hasattr(action, 'hidden') and action.hidden:
                # some options are marked hidden
                continue
            if action.dest == argparse.SUPPRESS or action.dest == 'help':
                continue
            action_strings = ",  ".join(f'`{a}`'
                                        for a in action.option_strings)
            description = []
            if action.help:
                h = action.help
                if not h[0].isupper():
                    h = h[0].upper() + h[1:]
                h = h.replace("%(default)s", str(action.default))
                description += [h]
            # list choices if there are any
            if action.choices:
                description += [
                    "Choices: " + ", ".join(f'`{c}`'
                                            for c in action.choices) + "."
                ]
            # list default and recommended values.
            default_value = ""
            if action.default is not None and action.default is not argparse.SUPPRESS:
                default_value += f"Default: ``{action.default}``.  "
            if hasattr(action, 'recommended') and action.recommended:
                default_value += f"Recommended: ``{action.recommended}``. "

            # special escape for a few args which use a literal newline as their default
            if default_value:
                default_value = default_value.replace("\n", "\\n")
                description.append(default_value)

            description = "\n".join(description)
            # escape for the fact that we're inserting this inside a table
            description = description.replace("\n", "\n   \n   ")
            actions.append((action_strings, description))

        if not actions:
            continue

        readme.append(f'__{ag.title.title()}__\n\n')
        readme.append("| Argument | Description |\n")
        readme.append("|----------|----------|\n")
        for row in actions:
            text = "| " + " | ".join(row) + " |"
            text = text.replace("\n", "<br>")
            readme.append(f"{text}\n")
        readme.append("\n\n")
    return readme
Ejemplo n.º 33
0
def setup_args(parser=None) -> ParlaiParser:
    """
    Build the ParlAI parser, adding command line args if necessary.

    :param ParlaiParser parser:
        Preexisting parser to append options to. Will be created if needed.

    :returns:
        the ParlaiParser with CLI options added.
    """
    if parser is None:
        parser = ParlaiParser(True, True, 'Train a model')
    parser.add_pytorch_datateacher_args()
    train = parser.add_argument_group('Training Loop Arguments')
    train.add_argument(
        '-et',
        '--evaltask',
        help=
        'task to use for valid/test (defaults to the one used for training)',
    )
    train.add_argument(
        '--eval-batchsize',
        type=int,
        hidden=True,
        help='Eval time batch size (defaults to same as -bs)',
    )
    train.add_argument('--display-examples',
                       type='bool',
                       default=False,
                       hidden=True)
    train.add_argument('-eps', '--num-epochs', type=float, default=-1)
    train.add_argument('-ttim', '--max-train-time', type=float, default=-1)
    train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    train.add_argument(
        '-vtim',
        '--validation-every-n-secs',
        type=float,
        default=-1,
        help='Validate every n seconds. Saves model to model_file '
        '(if set) whenever best val metric is found',
    )
    train.add_argument(
        '-stim',
        '--save-every-n-secs',
        type=float,
        default=-1,
        help='Saves the model to model_file.checkpoint after '
        'every n seconds (default -1, never).',
    )
    train.add_argument(
        '-sval',
        '--save-after-valid',
        type='bool',
        default=False,
        help='Saves the model to model_file.checkpoint after '
        'every validation (default %(default)s).',
    )
    train.add_argument(
        '-veps',
        '--validation-every-n-epochs',
        type=float,
        default=-1,
        help='Validate every n epochs. Saves model to model_file '
        '(if set) whenever best val metric is found',
    )
    train.add_argument(
        '-vme',
        '--validation-max-exs',
        type=int,
        default=-1,
        hidden=True,
        help='max examples to use during validation (default -1 uses all)',
    )
    train.add_argument(
        '--short-final-eval',
        default=False,
        hidden=True,
        type='bool',
        help='If true, obeys --validation-max-exs in the final '
        'validation and test evaluations.',
    )
    train.add_argument(
        '-vp',
        '--validation-patience',
        type=int,
        default=10,
        help=('number of iterations of validation where result'
              ' does not improve before we stop training'),
    )
    train.add_argument(
        '-vmt',
        '--validation-metric',
        default='accuracy',
        help='key into report table for selecting best validation',
    )
    train.add_argument(
        '-vmm',
        '--validation-metric-mode',
        type=str,
        choices=['max', 'min'],
        help='how to optimize validation metric (max or min)',
    )
    train.add_argument(
        '-vcut',
        '--validation-cutoff',
        type=float,
        default=1.0,
        hidden=True,
        help='value at which training will stop if exceeded by metric',
    )
    train.add_argument(
        '-lfc',
        '--load-from-checkpoint',
        type='bool',
        default=False,
        hidden=True,
        help='load model from checkpoint if available',
    )
    train.add_argument(
        '-vshare',
        '--validation-share-agent',
        default=False,
        hidden=True,
        help='use a shared copy of the agent for validation. '
        'this will eventually default to True, but '
        'currently defaults to False.',
    )
    train.add_argument(
        '-micro',
        '--aggregate-micro',
        type='bool',
        default=False,
        help='If multitasking, average metrics over the number of examples. '
        'If false, averages over the number of tasks.',
    )
    train.add_argument(
        '-mcs',
        '--metrics',
        type=str,
        default='default',
        help='list of metrics to show/compute, e.g. all, default,'
        'or give a list split by , like '
        'ppl,f1,accuracy,hits@1,rouge,bleu'
        'the rouge metrics will be computed as rouge-1, rouge-2 and rouge-l',
    )
    TensorboardLogger.add_cmdline_args(parser)
    parser = setup_dict_args(parser)
    return parser
Ejemplo n.º 34
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True, 'Train a model')
    parser.add_pytorch_datateacher_args()
    train = parser.add_argument_group('Training Loop Arguments')
    train.add_argument('-et',
                       '--evaltask',
                       help=('task to use for valid/test (defaults to the '
                             'one used for training if not set)'))
    train.add_argument('--eval-batchsize',
                       type=int,
                       hidden=True,
                       help='Eval time batch size (defaults to same as -bs)')
    train.add_argument('--display-examples',
                       type='bool',
                       default=False,
                       hidden=True)
    train.add_argument('-eps', '--num-epochs', type=float, default=-1)
    train.add_argument('-ttim', '--max-train-time', type=float, default=-1)
    train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    train.add_argument(
        '-vtim',
        '--validation-every-n-secs',
        type=float,
        default=-1,
        help='Validate every n seconds. Saves model to model_file '
        '(if set) whenever best val metric is found')
    train.add_argument('-stim',
                       '--save-every-n-secs',
                       type=float,
                       default=-1,
                       help='Saves the model to model_file.checkpoint after '
                       'every n seconds (default -1, never).')
    train.add_argument('-sval',
                       '--save-after-valid',
                       type='bool',
                       default=False,
                       help='Saves the model to model_file.checkpoint after '
                       'every validation (default %(default)s).')
    train.add_argument(
        '-veps',
        '--validation-every-n-epochs',
        type=float,
        default=-1,
        help='Validate every n epochs. Saves model to model_file '
        '(if set) whenever best val metric is found')
    train.add_argument('-vme',
                       '--validation-max-exs',
                       type=int,
                       default=-1,
                       hidden=True,
                       help='max examples to use during validation (default '
                       '-1 uses all)')
    train.add_argument('--short-final-eval',
                       default=False,
                       hidden=True,
                       type='bool',
                       help='If true, obeys --validation-max-exs in the final '
                       'validation and test evaluations.')
    train.add_argument('-vp',
                       '--validation-patience',
                       type=int,
                       default=10,
                       help=('number of iterations of validation where result'
                             ' does not improve before we stop training'))
    train.add_argument('-vmt',
                       '--validation-metric',
                       default='accuracy',
                       help='key into report table for selecting best '
                       'validation')
    train.add_argument('-vmm',
                       '--validation-metric-mode',
                       type=str,
                       choices=['max', 'min'],
                       help='how to optimize validation metric (max or min)')
    train.add_argument('-vcut',
                       '--validation-cutoff',
                       type=float,
                       default=1.0,
                       hidden=True,
                       help='value at which training will stop if exceeded by '
                       'training metric')
    train.add_argument('-dbf',
                       '--dict-build-first',
                       hidden=True,
                       type='bool',
                       default=True,
                       help='build dictionary first before training agent')
    train.add_argument('-lfc',
                       '--load-from-checkpoint',
                       type='bool',
                       default=False,
                       hidden=True,
                       help='load model from checkpoint if available')
    train.add_argument('-vshare',
                       '--validation-share-agent',
                       default=False,
                       hidden=True,
                       help='use a shared copy of the agent for validation. '
                       'this will eventually default to True, but '
                       'currently defaults to False.')
    TensorboardLogger.add_cmdline_args(parser)
    parser = setup_dict_args(parser)
    return parser
Ejemplo n.º 35
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, False, 'Lint for ParlAI tasks')
    parser.add_pytorch_datateacher_args()
    # Get command line arguments
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.add_argument(
        '--agent',
        type=int,
        default=0,
        help='Use teacher (agent 0) or model (agent 1)',
        choices=[0, 1],
    )
    parser.add_argument(
        '--new_line_new_utt',
        type='bool',
        default=False,
        help='New lines treat substrings as separate utterances.',
    )
    parser.add_argument(
        '--ignore_tokens',
        type=str,
        default='',
        help='ignore tokens containings these substrings (comma-separated)',
    )
    parser.set_defaults(datatype='train:ordered')
    DictionaryAgent.add_cmdline_args(parser)
    return parser
Ejemplo n.º 36
0
def main():
    """This task consists of one agent, model or MTurk worker, talking to an
    MTurk worker to negotiate a deal.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('--two_mturk_agents', dest='two_mturk_agents',
                           action='store_true', help='data collection mode '
                           'with converations between two MTurk agents')

    opt = argparser.parse_args()
    opt['task'] = 'dealnodeal'
    opt['datatype'] = 'valid'
    opt.update(task_config)

    local_agent_1_id = 'local_1'
    mturk_agent_ids = ['mturk_agent_1']
    if opt['two_mturk_agents']:
        mturk_agent_ids.append('mturk_agent_2')

    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_ids
    )

    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        mturk_manager.set_onboard_function(onboard_function=None)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]

            # Create a local agent
            if not opt['two_mturk_agents']:
                if 'model' in opt:
                    local_agent = create_agent(opt)
                else:
                    local_agent = LocalHumanAgent(opt=None)

                local_agent.id = local_agent_1_id
                agents.append(local_agent)

            opt["batchindex"] = mturk_manager.started_conversations

            world = MTurkDealNoDealDialogWorld(
                opt=opt,
                agents=agents
            )

            while not world.episode_done():
                world.parley()

            world.shutdown()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 37
0
 def _get_args(self):
     parser = ParlaiParser(False, False)
     parser.add_parlai_data_path()
     parser.add_messenger_args()
     return parser.parse_args([])
def setup_args():
    # Get command line arguments
    parser = ParlaiParser()
    parser.add_argument(
        '-n',
        '--num-examples',
        default=-1,
        type=int,
        help='Total number of exs to convert, -1 to convert all examples',
    )
    parser.add_argument(
        '-of',
        '--outfile',
        default=None,
        type=str,
        help='Output file where to save, by default will be created in tmp',
    )
    parser.add_argument(
        '-if',
        '--ignore-fields',
        default='id',
        type=str,
        help='Ignore these fields from the message (returned with .act() )',
    )
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.set_defaults(datatype='train:stream')
    return parser
Ejemplo n.º 39
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True, 'Interactive chat with a model')
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    # Get command line arguments
    parser.add_argument(
        '-rf',
        '--report-filename',
        type=str,
        default='',
        help='Saves a json file of the evaluation report either as an '
        'extension to the model-file (if begins with a ".") or a whole '
        'file path. Set to the empty string to not save at all.',
    )
    parser.add_argument(
        '--save-world-logs',
        type='bool',
        default=False,
        help='Saves a jsonl file containing all of the task examples and '
        'model replies. Must also specify --report-filename.',
    )
    parser.add_argument(
        '--display-prettify',
        type='bool',
        default=False,
        help='Set to use a prettytable when displaying '
        'examples with text candidates',
    )
    parser.add_argument(
        '--display-ignore-fields',
        type=str,
        default='label_candidates,text_candidates',
        help='Do not display these fields',
    )
    parser.add_argument(
        '-it',
        '--interactive-task',
        type='bool',
        default=True,
        help='Create interactive version of task',
    )
    WorldLogger.add_cmdline_args(parser)
    parser.set_defaults(interactive_mode=True, task='interactive')
    LocalHumanAgent.add_cmdline_args(parser)
    return parser
Ejemplo n.º 40
0
def setup_args(model_args=None):
    parser = ParlaiParser(True, True, model_argv=model_args)
    train = parser.add_argument_group('Training Loop Arguments')
    train.add_argument('-et',
                       '--evaltask',
                       help=('task to use for valid/test (defaults to the '
                             'one used for training if not set)'))
    train.add_argument('-d', '--display-examples', type='bool', default=False)
    train.add_argument('-e', '--num-epochs', type=float, default=-1)
    train.add_argument('-ttim', '--max-train-time', type=float, default=-1)
    train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    train.add_argument('-vtim',
                       '--validation-every-n-secs',
                       type=float,
                       default=-1,
                       help='Validate every n seconds. Whenever the the best '
                       'validation metric is found, saves the model to '
                       'the model_file path if set.')
    train.add_argument('-stim',
                       '--save-every-n-secs',
                       type=float,
                       default=-1,
                       help='Saves the model to model_file.checkpoint after '
                       'every n seconds (default -1, never).')
    train.add_argument('-sval',
                       '--save-after-valid',
                       type='bool',
                       default=False,
                       help='Saves the model to model_file.checkpoint after '
                       'every validation (default True).')
    train.add_argument('-vme',
                       '--validation-max-exs',
                       type=int,
                       default=-1,
                       help='max examples to use during validation (default '
                       '-1 uses all)')
    train.add_argument('-vp',
                       '--validation-patience',
                       type=int,
                       default=10,
                       help=('number of iterations of validation where result'
                             ' does not improve before we stop training'))
    train.add_argument('-vmt',
                       '--validation-metric',
                       default='accuracy',
                       help='key into report table for selecting best '
                       'validation')
    train.add_argument('-vmm',
                       '--validation-metric-mode',
                       default='max',
                       type=str,
                       choices=['max', 'min'],
                       help='how to optimize validation metric (max or min)')
    train.add_argument('-vcut',
                       '--validation-cutoff',
                       type=float,
                       default=1.0,
                       help='value at which training will stop if exceeded by '
                       'training metric')
    train.add_argument('-dbf',
                       '--dict-build-first',
                       type='bool',
                       default=True,
                       help='build dictionary first before training agent')
    return parser
Ejemplo n.º 41
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True, 'Evaluate a model')
    # Get command line arguments
    parser.add_argument(
        '-rf',
        '--report-filename',
        type=str,
        default='',
        help='Saves a json file of the evaluation report either as an '
        'extension to the model-file (if begins with a ".") or a whole '
        'file path. Set to the empty string to not save at all.',
    )
    parser.add_argument(
        '--save-world-logs',
        type='bool',
        default=False,
        help='Saves a jsonl file containing all of the task examples and '
        'model replies. Must also specify --report-filename.',
    )
    parser.add_argument(
        '--save-format',
        type=str,
        default='conversations',
        choices=['conversations', 'parlai'],
    )
    parser.add_argument('-ne', '--num-examples', type=int, default=-1)
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=10)
    parser.add_argument(
        '-mcs',
        '--metrics',
        type=str,
        default='default',
        help='list of metrics to show/compute, e.g. all, default,'
        'or give a list split by , like '
        'ppl,f1,accuracy,hits@1,rouge,bleu'
        'the rouge metrics will be computed as rouge-1, rouge-2 and rouge-l',
    )
    parser.add_argument(
        '-micro',
        '--aggregate-micro',
        type='bool',
        default=False,
        help='Report micro-averaged metrics instead of macro averaged metrics.',
        recommended=False,
    )
    WorldLogger.add_cmdline_args(parser)
    TensorboardLogger.add_cmdline_args(parser)
    parser.set_params(datatype='valid')
    return parser
Ejemplo n.º 42
0
    def get_parlai_opt(self) -> Opt:
        """
        Parser for converting fairseq argument to ParlAI opt.

        :return opt:
            opt parsed by ParlAI Parser
        """
        # assume encoder/decoder symetrical except for number of layers
        state = self.state
        fairseq_args = state['args'].__dict__

        transformer_common_config = {}

        # 1. Map transformer params
        for each in TRANSFORMER_PARAMETER_MAPPING:
            transformer_common_config[TRANSFORMER_PARAMETER_MAPPING[
                each]] = fairseq_args[f'encoder_{each}']
        # 2. Map dropout
        for each in TRANSFORMER_DROPOUT:
            transformer_common_config[each] = fairseq_args[each]

        if 'activation_dropout' in fairseq_args:
            transformer_common_config['relu_dropout'] = fairseq_args[
                'activation_dropout']
        else:
            transformer_common_config['relu_dropout'] = fairseq_args[
                'relu_dropout']

        # 3. Map other options
        transformer_common_config.update({
            'model':
            self.opt['model'],
            # number of layers
            'n_encoder_layers':
            fairseq_args['encoder_layers'],
            'n_decoder_layers':
            fairseq_args['decoder_layers'],
            # tokenization args
            'dict_tokenizer':
            self.opt['tokenizer'],
            'bpe_vocab':
            self.opt['vocab'],
            'bpe_merge':
            self.opt['merge'],
            'n_positions':
            fairseq_args['max_source_positions'],
        })

        # 4. Embedding scale
        if 'encoder_embed_scale' in fairseq_args:
            transformer_common_config['embeddings_scale'] = (
                fairseq_args['encoder_embed_scale'] != 1.0)
        else:
            transformer_common_config[
                'embeddings_scale'] = not fairseq_args['no_scale_embedding']

        # 5. Determine variant
        if fairseq_args['encoder_normalize_before']:
            transformer_common_config['variant'] = 'prelayernorm'
        elif fairseq_args['layernorm_embedding']:
            transformer_common_config['variant'] = 'bart'
        else:
            transformer_common_config['variant'] = 'aiayn'

        if self.opt['add_prefix_space']:
            transformer_common_config['bpe_add_prefix_space'] = True
        parser = ParlaiParser()
        parser.set_params(**transformer_common_config)
        opt = parser.parse_args([])

        # 6. Augment opt with additional ParlAI options
        opt['fp16'] = self.opt['fp16']
        opt['activation'] = self.opt['activation']
        opt['delimiter'] = self.opt['delimiter']
        opt['history_add_global_end_token'] = self.opt[
            'history_add_global_end_token']
        # Makes model fp16 ready for fine-tuning, means 4 extra padding tokens.
        opt['force_fp16_tokens'] = True
        opt['converting'] = True

        return opt
Ejemplo n.º 43
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()

    # The dialog model we want to evaluate
    from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent
    IrBaselineAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    # The task that we will evaluate the dialog model on
    task_opt = {}
    task_opt['datatype'] = 'test'
    task_opt['datapath'] = opt['datapath']
    task_opt['task'] = '#MovieDD-Reddit'

    mturk_agent_id = 'Worker'
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=[mturk_agent_id]
    )
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = ModelEvaluatorOnboardWorld(opt=opt, mturk_agent=worker)
            while not world.episode_done():
                world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(worker):
            worker[0].id = mturk_agent_id

        global run_conversation

        def run_conversation(mturk_manager, opt, workers):
            mturk_agent = workers[0]

            model_agent = IrBaselineAgent(opt=opt)

            world = ModelEvaluatorWorld(
                opt=opt,
                model_agent=model_agent,
                task_opt=task_opt,
                mturk_agent=mturk_agent
            )

            while not world.episode_done():
                world.parley()
            world.shutdown()
            world.review_work()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )
    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
    WizardTransformerRankerAgent, )
"""Evaluate pre-trained retrieval model on the full Wizard Dialogue task.

NOTE: Metrics here differ slightly to those reported in the paper as a result
of code changes.

Results on seen test set:
Hits@1/100: 86.7

Results on unseen test set (run with flag
`-t wizard_of_wikipedia:WizardDialogKnowledge:topic_split`):
Hits@1/100: 68.96
"""

if __name__ == '__main__':
    parser = ParlaiParser(add_model_args=True)
    parser.add_argument('-n', '--num-examples', default=100000000)
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    WizardTransformerRankerAgent.add_cmdline_args(parser)
    parser.set_params(
        task='wizard_of_wikipedia',
        model='projects:wizard_of_wikipedia:wizard_transformer_ranker',
        model_file=
        'models:wizard_of_wikipedia/full_dialogue_retrieval_model/model',
        datatype='test',
        n_heads=6,
        ffn_size=1200,
        embeddings_scale=False,
        delimiter=' __SOC__ ',
        n_positions=1000,
Ejemplo n.º 45
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(
            True, True, 'Interactive chat with a model on the command line')
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    parser.add_argument(
        '--display-prettify',
        type='bool',
        default=False,
        help='Set to use a prettytable when displaying '
        'examples with text candidates',
    )
    parser.add_argument(
        '--display-add-fields',
        type=str,
        default='',
        help=
        'Display these fields when verbose is off (e.g., "--display-add-fields label_candidates,beam_texts")',
    )
    parser.add_argument(
        '-it',
        '--interactive-task',
        type='bool',
        default=True,
        help='Create interactive version of task',
    )
    parser.add_argument(
        '--outfile',
        type=str,
        default='',
        help='Saves a jsonl file containing all of the task examples and '
        'model replies. Set to the empty string to not save at all',
    )
    parser.add_argument(
        '--save-format',
        type=str,
        default='conversations',
        choices=['conversations', 'parlai'],
        help=
        'Format to save logs in. conversations is a jsonl format, parlai is a text format.',
    )
    parser.set_defaults(interactive_mode=True, task='interactive')
    LocalHumanAgent.add_cmdline_args(parser)
    WorldLogger.add_cmdline_args(parser)
    return parser
Ejemplo n.º 46
0
def main():
    random.seed(42)
    # Get command line arguments
    parser = ParlaiParser()
    parser.add_argument(
        '-n',
        '--num-examples',
        default=-1,
        type=int,
        help='Total number of exs to convert, -1 to convert \
                                all examples',
    )
    parser.add_argument(
        '-of',
        '--outfile',
        default=None,
        type=str,
        help='Output file where to save, by default will be \
                                created in /tmp',
    )
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.set_defaults(datatype='train:ordered')
    opt = parser.parse_args()
    dump_data(opt)
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from parlai.core.build_data import download_models
from parlai.core.params import ParlaiParser
from parlai.scripts.interactive import interactive
from projects.personachat.persona_seq2seq import PersonachatSeqseqAgentBasic
'''Interact with pre-trained model
Generative model trained on personachat using persona 'self'
Run from ParlAI directory
'''

if __name__ == '__main__':
    parser = ParlaiParser(add_model_args=True)
    parser.add_argument('-d', '--display-examples', type='bool', default=False)
    PersonachatSeqseqAgentBasic.add_cmdline_args(parser)
    parser.set_defaults(
        dict_file='models:personachat/profile_memory/fulldict.dict',
        interactive_mode=True,
        task='parlai.agents.local_human.local_human:LocalHumanAgent',
        model=
        'projects.personachat.persona_seq2seq:PersonachatSeqseqAgentBasic',
        model_file=
        'models:personachat/seq2seq_personachat/seq2seq_no_dropout0.2_lstm_1024_1e-3'
    )

    opt = parser.parse_args()
    opt['model_type'] = 'seq2seq_personachat'  # for builder
    # build all profile memory models
Ejemplo n.º 48
0
 def add_cmdline_args(
     cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None
 ) -> ParlaiParser:
     parser = parser.add_argument_group('Retriever Arguments')
     parser.add_argument(
         '--retriever-numworkers',
         type=int,
         default=None,
         help='Number of CPU processes (for tokenizing, etc)',
     )
     parser.add_argument(
         '--retriever-ngram',
         type=int,
         default=2,
         help='Use up to N-size n-grams (e.g. 2 = unigrams + bigrams)',
     )
     parser.add_argument(
         '--retriever-hashsize',
         type=int,
         default=int(math.pow(2, 24)),
         help='Number of buckets to use for hashing ngrams',
     )
     parser.add_argument(
         '--retriever-tokenizer',
         type=str,
         default='simple',
         help='String option specifying tokenizer type to use.',
     )
     parser.add_argument(
         '--retriever-num-retrieved',
         default=5,
         type=int,
         help='How many docs to retrieve.',
     )
     parser.add_argument(
         '--remove-title',
         type='bool',
         default=False,
         help='Whether to remove the title from the retrieved passage',
     )
     parser.add_argument(
         '--retriever-mode',
         choices=['keys', 'values'],
         default='values',
         help='Whether to retrieve the stored key or the stored value. For '
         'example, if you want to return the text of an example, use '
         'keys here; if you want to return the label, use values here.',
     )
     parser.add_argument(
         '--index-by-int-id',
         type='bool',
         default=True,
         help=(
             'Whether to index into database by doc id as an integer. This '
             'defaults to true for DBs built using ParlAI.'
         ),
     )
     parser.add_argument(
         '--tfidf-context-length',
         default=-1,
         type=int,
         help='Number of past utterances to remember when '
         'building flattened batches of data in multi-'
         'example episodes.',
     )
     parser.add_argument(
         '--tfidf-include-labels',
         default=True,
         type='bool',
         help='Specifies whether or not to include labels '
         'as past utterances when building flattened '
         'batches of data in multi-example episodes.',
     )
Ejemplo n.º 49
0
def main():
    # Get command line arguments
    argparser = ParlaiParser()
    DictionaryAgent.add_cmdline_args(argparser)
    ParsedRemoteAgent.add_cmdline_args(argparser)
    argparser.add_argument('--num-examples', default=1000, type=int)
    argparser.add_argument('--num-its', default=100, type=int)
    argparser.add_argument('--dict-max-exs', default=10000, type=int)
    parlai_home = os.environ['PARLAI_HOME']
    if '--remote-cmd' not in sys.argv:
        if os.system('which luajit') != 0:
            raise RuntimeError('Could not detect torch luajit installed: ' +
                               'please install torch from http://torch.ch ' +
                               'or manually set --remote-cmd for this example.')
        sys.argv.append('--remote-cmd')
        sys.argv.append('luajit {}/parlai/agents/'.format(parlai_home) +
                        'memnn_luatorch_cpu/memnn_zmq_parsed.lua')
    if '--remote-args' not in sys.argv:
        sys.argv.append('--remote-args')
        sys.argv.append('{}/examples/'.format(parlai_home) +
                        'memnn_luatorch_cpu/params_default.lua')

    opt = argparser.parse_args()

    # set up dictionary
    print('Setting up dictionary.')
    dictionary = DictionaryAgent(opt)
    if not opt.get('dict_file'):
        # build dictionary since we didn't load it
        ordered_opt = copy.deepcopy(opt)
        ordered_opt['datatype'] = 'train:ordered'
        ordered_opt['numthreads'] = 1
        world_dict = create_task(ordered_opt, dictionary)

        print('Dictionary building on training data.')
        cnt = 0
        # pass examples to dictionary
        for _ in world_dict:
            cnt += 1
            if cnt > opt['dict_max_exs'] and opt['dict_max_exs'] > 0:
                print('Processed {} exs, moving on.'.format(
                      opt['dict_max_exs']))
                # don't wait too long...
                break

            world_dict.parley()

        # we need to save the dictionary to load it in memnn (sort it by freq)
        dictionary.sort()
        dictionary.save('/tmp/dict.txt', sort=True)

    print('Dictionary ready, moving on to training.')

    opt['datatype'] = 'train'
    agent = ParsedRemoteAgent(opt, {'dictionary_shared': dictionary.share()})
    world_train = create_task(opt, agent)
    opt['datatype'] = 'valid'
    world_valid = create_task(opt, agent)

    start = time.time()
    with world_train:
        for _ in range(opt['num_its']):
            print('[ training ]')
            for _ in range(opt['num_examples'] * opt.get('numthreads', 1)):
                world_train.parley()
            world_train.synchronize()

            print('[ validating ]')
            world_valid.reset()
            for _ in world_valid:  # check valid accuracy
                world_valid.parley()

            print('[ validation summary. ]')
            report_valid = world_valid.report()
            print(report_valid)
            if report_valid['accuracy'] > 0.95:
                break

        # show some example dialogs after training:
        world_valid = create_task(opt, agent)
        for _k in range(3):
            world_valid.parley()
            print(world_valid.display())

    print('finished in {} s'.format(round(time.time() - start, 2)))
Ejemplo n.º 50
0
def superscript_main(args=None):
    """
    Superscript is a loader for all the other scripts.
    """
    setup_script_registry()

    parser = _SupercommandParser(False,
                                 False,
                                 formatter_class=_SuperscriptHelpFormatter)
    parser.add_argument(
        '--helpall',
        action='helpall',
        help='show all commands, including advanced ones.',
    )
    parser.set_defaults(super_command=None)
    subparsers = parser.add_subparsers(
        parser_class=_SubcommandParser,
        title="Commands",
        metavar="COMMAND",
    )
    hparser = subparsers.add_parser(
        'help',
        aliases=['h'],
        help=argparse.SUPPRESS,
        description="List the main commands",
    )
    hparser.set_defaults(super_command='help')
    hparser = subparsers.add_parser(
        'helpall',
        help=argparse.SUPPRESS,
        description="List all commands, including advanced ones.",
    )
    hparser.set_defaults(super_command='helpall')

    # build the supercommand
    for script_name, registration in SCRIPT_REGISTRY.items():
        logging.verbose(f"Discovered command {script_name}")
        script_parser = registration.klass.setup_args()
        if script_parser is None:
            # user didn't bother defining command line args. let's just fill
            # in for them
            script_parser = ParlaiParser(False, False)
        help_ = argparse.SUPPRESS if registration.hidden else script_parser.description
        subparser = subparsers.add_parser(
            script_name,
            aliases=registration.aliases,
            help=help_,
            description=script_parser.description,
            formatter_class=CustomHelpFormatter,
        )
        subparser.set_defaults(super_command=script_name)
        for action in script_parser._actions:
            subparser._add_action(action)
        for action_group in script_parser._action_groups:
            subparser._action_groups.append(action_group)

    try:
        import argcomplete

        argcomplete.autocomplete(parser)
    except ModuleNotFoundError:
        pass

    opt = parser.parse_args(args, print_args=False)
    cmd = opt.pop('super_command')
    if cmd == 'helpall':
        parser.print_helpall()
    elif cmd == 'help' or cmd is None:
        parser.print_help()
    elif cmd is not None:
        SCRIPT_REGISTRY[cmd].klass._run_from_parser_and_opt(opt, parser)
Ejemplo n.º 51
0
def main():
    """
    This task consists of one local human agent and two MTurk agents,
    each MTurk agent will go through the onboarding step to provide
    information about themselves, before being put into a conversation.
    You can end the conversation by sending a message ending with
    `[DONE]` from human_1.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    mturk_agent_1_id = 'mturk_agent_1'
    mturk_agent_2_id = 'mturk_agent_2'
    human_agent_1_id = 'human_1'
    mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id]
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids=mturk_agent_ids
    )
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = MTurkMultiAgentDialogOnboardWorld(
                opt=opt,
                mturk_agent=worker
            )
            while not world.episode_done():
                world.parley()
            world.shutdown()

        # You can set onboard_function to None to skip onboarding
        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        eligibility_function = {
            'func': check_worker_eligibility,
            'multiple': False,
        }

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            # Create mturk agents
            mturk_agent_1 = workers[0]
            mturk_agent_2 = workers[1]

            # Create the local human agents
            human_agent_1 = LocalHumanAgent(opt=None)
            human_agent_1.id = human_agent_1_id

            world = MTurkMultiAgentDialogWorld(
                opt=opt,
                agents=[human_agent_1, mturk_agent_1, mturk_agent_2]
            )

            while not world.episode_done():
                world.parley()

            world.shutdown()

        mturk_manager.start_task(
            eligibility_function=eligibility_function,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 52
0
def setup_rag_args(parser: ParlaiParser) -> ParlaiParser:
    group = parser.add_argument_group('RAG Model Args')
    # Standard RAG Agent Arguments
    group.add_argument(
        '--generation-model',
        type=str,
        default='bart',
        help='which generation model to use',
        choices=['transformer/generator', 'bart', 't5'],
    )
    group.add_argument(
        '--query-model',
        type=str,
        default='bert',
        help='Which query model to use for DPR.',
        choices=QUERY_MODEL_TYPES,
    )
    group.add_argument(
        '--rag-model-type',
        type=str,
        default='token',
        help='which rag model decoding to use.',
        choices=['token', 'sequence', 'turn'],
    )
    group.add_argument(
        '--thorough',
        type='bool',
        default=False,
        help='whether to use thorough decoding for rag sequence. ',
    )
    modified_group = parser.add_argument_group('Modified RAG Args')
    modified_group.add_argument(
        '--n-extra-positions',
        type=int,
        default=0,
        help='Specify > 0 to include extra positions in the encoder, in which '
        'retrieved knowledge will go. In this setup, knowledge is _appended_ '
        'instead of prepended.',
    )
    modified_group.add_argument(
        '--gold-knowledge-passage-key',
        type=str,
        default='checked_sentence',
        help=
        'key in the observation dict that indicates the gold knowledge passage. '
        'Specify, along with --debug, to compute passage retrieval metrics at train/test time.',
    )
    modified_group.add_argument(
        '--gold-knowledge-title-key',
        type=str,
        default='title',
        help=
        'key in the observation dict that indicates the gold knowledge passage title. '
        'Specify, along with --debug, to compute passage retrieval metrics at train/test time.',
    )
    retriever_group = parser.add_argument_group('RAG Retriever Args')
    retriever_group.add_argument(
        '--rag-retriever-query',
        type=str,
        default='full_history',
        choices=['one_turn', 'full_history'],
        help=
        'What to use as the query for retrieval. `one_turn` retrieves only on the last turn '
        'of dialogue; `full_history` retrieves based on the full dialogue history.',
    )
    retriever_group.add_argument(
        '--rag-retriever-type',
        type=str,
        default=RetrieverType.DPR.value,
        choices=[r.value for r in RetrieverType],
        help='Which retriever to use',
    )
    retriever_group.add_argument(
        '--retriever-debug-index',
        type=str,
        default=None,
        choices=SMALL_INDEX_TYPES,
        help='Load specified small index, for debugging.',
    )
    retriever_group.add_argument('--n-docs',
                                 type=int,
                                 default=5,
                                 help='How many documents to retrieve')
    retriever_group.add_argument(
        '--min-doc-token-length',
        type=int,
        default=64,
        help='minimum amount of information to retain from document. '
        'Useful to define if encoder does not use a lot of BPE token context.',
    )
    retriever_group.add_argument(
        '--max-doc-token-length',
        type=int,
        default=256,
        help='maximum amount of information to retain from document. ',
    )
    retriever_group.add_argument(
        '--rag-query-truncate',
        type=int,
        default=512,
        help='Max token length of query for retrieval.',
    )
    retriever_group.add_argument(
        '--print-docs',
        type='bool',
        default=False,
        help='Whether to print docs; usually useful during interactive mode.',
    )
    dense_retriever_group = parser.add_argument_group(
        'RAG Dense Passage Retriever Args')
    dense_retriever_group.add_argument(
        '--path-to-index',
        type=str,
        default=WIKIPEDIA_COMPRESSED_INDEX,
        help='path to FAISS Index.',
    )
    dense_retriever_group.add_argument(
        '--path-to-dense-embeddings',
        type=str,
        default=None,
        help='path to dense embeddings directory used to build index. '
        'Default None will assume embeddings and index are in the same directory.',
    )
    dense_retriever_group.add_argument('--dpr-model-file',
                                       type=str,
                                       default=DPR_ZOO_MODEL,
                                       help='path to DPR Model.')
    dense_retriever_group.add_argument(
        '--path-to-dpr-passages',
        type=str,
        default=WIKIPEDIA_ZOO_PASSAGES,
        help='Path to DPR passages, used to build index.',
    )
    dense_retriever_group.add_argument(
        '--retriever-embedding-size',
        type=int,
        default=768,
        help='Embedding size of dense retriever',
    )
    tfidf_retriever_group = parser.add_argument_group(
        'RAG TFIDF Retriever Args')
    tfidf_retriever_group.add_argument(
        '--tfidf-max-doc-paragraphs',
        type=int,
        default=-1,
        help='If > 0, limit documents to this many paragraphs',
    )
    tfidf_retriever_group.add_argument(
        '--tfidf-model-path',
        type=str,
        default=TFIDF_ZOO_MODEL,
        help='Optionally override TFIDF model.',
    )
    dpr_poly_retriever_group = parser.add_argument_group(
        'RAG DPR-POLY Retriever Args')
    dpr_poly_retriever_group.add_argument(
        '--dpr-num-docs',
        type=int,
        default=25,
        help='In two stage retrieval, how many DPR documents to retrieve',
    )
    dpr_poly_retriever_group.add_argument(
        '--poly-score-initial-lambda',
        type=float,
        default=0.5,
        help=
        'In two stage retrieval, how much weight to give to the poly scores. '
        'Note: Learned parameter. Specify initial value here',
    )
    dpr_poly_retriever_group.add_argument(
        '--polyencoder-init-model',
        type=str,
        default='wikito',
        help=
        'Which init model to initialize polyencoder with. Specify wikito or reddit to use '
        'models from the ParlAI zoo; otherwise, provide a path to a trained polyencoder',
    )
    poly_faiss_group = parser.add_argument_group(
        'RAG PolyFAISS retriever args')
    poly_faiss_group.add_argument(
        '--poly-faiss-model-file',
        type=str,
        default=None,
        help='path to poly-encoder for use in poly-faiss retrieval.',
    )
    regret_group = parser.add_argument_group("RAG ReGReT args")
    regret_group.add_argument(
        '--regret',
        type='bool',
        default=False,
        help='Retrieve, Generate, Retrieve, Tune. '
        'Retrieve, generate, then retrieve again, and finally tune (refine).',
    )
    regret_group.add_argument(
        '--regret-intermediate-maxlen',
        type=int,
        default=32,
        help='Maximum length in intermediate regret generation',
    )
    regret_group.add_argument(
        '--regret-model-file',
        type=str,
        default=None,
        help='Path to model for initial round of retrieval. ',
    )
    indexer_group = parser.add_argument_group("RAG Indexer Args")
    indexer_group.add_argument(
        '--indexer-type',
        type=str,
        default='compressed',
        choices=['exact', 'compressed'],
        help=
        'Granularity of RAG Indexer. Choose compressed to save on RAM costs, at the '
        'possible expense of accuracy.',
    )
    indexer_group.add_argument(
        '--indexer-buffer-size',
        type=int,
        default=65536,
        help='buffer size for adding vectors to the index',
    )
    indexer_group.add_argument(
        '--compressed-indexer-factory',
        type=str,
        default='IVF4096_HNSW128,PQ128',
        help=
        'If specified, builds compressed indexer from a FAISS Index Factory. '
        'see https://github.com/facebookresearch/faiss/wiki/The-index-factory for details',
    )
    indexer_group.add_argument(
        '--compressed-indexer-gpu-train',
        type='bool',
        default=False,
        hidden=True,
        help='Set False to not train compressed indexer on the gpu.',
    )
    indexer_group.add_argument(
        '--compressed-indexer-nprobe',
        type=int,
        default=64,
        help='How many centroids to search in compressed indexer. See '
        'https://github.com/facebookresearch/faiss/wiki/Faiss-indexes#cell-probe-methods-indexivf-indexes '
        'for details',
    )
    # See https://github.com/facebookresearch/faiss/wiki/Faiss-indexes#indexhnsw-variants for details
    indexer_group.add_argument(
        '--hnsw-indexer-store-n',
        type=int,
        default=128,
        hidden=True,
        help=
        'Granularity of  DenseHNSWIndexer. Higher == more accurate, more RAM',
    )
    indexer_group.add_argument(
        '--hnsw-ef-search',
        type=int,
        default=128,
        hidden=True,
        help='Depth of exploration of search for HNSW.',
    )
    indexer_group.add_argument(
        '--hnsw-ef-construction',
        type=int,
        default=200,
        hidden=True,
        help='Depth of exploration at add time for HNSW',
    )
    return parser
Ejemplo n.º 53
0
def main():
    # Get command line arguments
    argparser = ParlaiParser()
    DictionaryAgent.add_cmdline_args(argparser)
    opt = argparser.parse_args()
    build_dict(opt)
Ejemplo n.º 54
0
def main():
    """This task consists of one agent, model or MTurk worker, talking to an
    MTurk worker to negotiate a deal.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('-min_t',
                           '--min_turns',
                           default=5,
                           type=int,
                           help='minimum number of turns')
    argparser.add_argument('-mt',
                           '--max_turns',
                           default=10,
                           type=int,
                           help='maximal number of chat turns')
    argparser.add_argument('-mx_rsp_time',
                           '--max_resp_time',
                           default=150,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('-mx_psn_time',
                           '--max_persona_time',
                           type=int,
                           default=300,
                           help='time limit for turker'
                           'entering the persona')
    argparser.add_argument('--ag_shutdown_time',
                           default=120,
                           type=int,
                           help='time limit for entering a dialog message')
    argparser.add_argument('--persona-type',
                           default='both',
                           type=str,
                           choices=['both', 'self', 'other'],
                           help='Which personas to load from personachat')
    argparser.add_argument('--revised',
                           default=True,
                           type='bool',
                           help='Whether to use revised personas')
    argparser.add_argument('-rt',
                           '--range_turn',
                           default='5,7',
                           help='sample range of number of turns')
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    if 'data_path' not in opt:
        opt['data_path'] = os.getcwd() + '/data/' + opt['task']
    opt.update(task_config)

    mturk_agent_ids = ['PERSON_1']

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    persona_generator = PersonasGenerator(opt)
    mturk_manager.setup_server()

    # SET MODEL AGENT OPT HERE
    model_agent_opt = {}

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        if not opt['is_sandbox']:
            blocked_worker_list = []
            for w in blocked_worker_list:
                mturk_manager.block_worker(
                    w,
                    'We found that you have unexpected behaviors in our previous HITs. For more questions please email us.'
                )

        def run_onboard(worker):
            worker.persona_generator = persona_generator
            world = PersonaProfileWorld(opt, worker)
            world.parley()
            world.shutdown()

        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[0]
            conv_idx = mturk_manager.conversation_index
            world = PersonaChatEvalWorld(
                opt=opt,
                agents=[agents],
                range_turn=[int(s) for s in opt['range_turn'].split(',')],
                max_turn=opt['max_turns'],
                max_resp_time=opt['max_resp_time'],
                model_agent_opt=model_agent_opt,
                world_tag='conversation t_{}'.format(conv_idx))
            world.reset_random()
            while not world.episode_done():
                world.parley()
            world.save_data()

            world.shutdown()
            world.review_work()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 55
0
def main():
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    opt = argparser.parse_args()
    opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
    opt.update(task_config)

    mturk_agent_1_id = 'mturk_agent_1'
    mturk_agent_2_id = 'mturk_agent_2'
    human_agent_1_id = 'human_1'
    mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id]
    mturk_manager = MTurkManager(
        opt=opt,
        mturk_agent_ids = mturk_agent_ids
    )
    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        def run_onboard(worker):
            world = MTurkMultiAgentDialogOnboardWorld(
                opt=opt,
                mturk_agent=worker
            )
            while not world.episode_done():
                world.parley()
            world.shutdown()

        # You can set onboard_function to None to skip onboarding
        mturk_manager.set_onboard_function(onboard_function=run_onboard)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            # Create mturk agents
            mturk_agent_1 = workers[0]
            mturk_agent_2 = workers[1]

            # Create the local human agents
            human_agent_1 = LocalHumanAgent(opt=None)
            human_agent_1.id = human_agent_1_id

            world = MTurkMultiAgentDialogWorld(
                opt=opt,
                agents=[human_agent_1, mturk_agent_1, mturk_agent_2]
            )

            while not world.episode_done():
                world.parley()

            world.shutdown()

        mturk_manager.start_task(
            eligibility_function=check_worker_eligibility,
            assign_role_function=assign_worker_roles,
            task_function=run_conversation
        )

    except:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 56
0
def setup_args(parser=None):
    if parser is None:
        parser = ParlaiParser(True, True,
                              'compute statistics from model predictions')
    DictionaryAgent.add_cmdline_args(parser)
    # Get command line arguments
    parser.add_argument('-ne', '--num-examples', type=int, default=-1)
    parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2)
    parser.add_argument(
        '-ed',
        '--external-dict',
        type=str,
        default=None,
        help='External dictionary for stat computation',
    )
    parser.add_argument(
        '-fb',
        '--freq-bins',
        type=str,
        default='0,100,1000,10000',
        help='Bins boundaries for rare words stat',
    )
    parser.add_argument(
        '-dup',
        '--dump-predictions-path',
        type=str,
        default=None,
        help='Dump predictions into file',
    )
    parser.add_argument(
        '-cun',
        '--compute-unique',
        type='bool',
        default=True,
        help='Compute %% of unique responses from the model',
    )
    parser.set_defaults(datatype='valid')
    TensorboardLogger.add_cmdline_args(parser)
    return parser
Ejemplo n.º 57
0
def main():
    # Get command line arguments
    parser = ParlaiParser(True, True)
    train = parser.add_argument_group('Training Loop Arguments')
    train.add_argument('-et', '--evaltask',
                       help=('task to use for valid/test (defaults to the '
                             'one used for training if not set)'))
    train.add_argument('-d', '--display-examples',
                       type='bool', default=False)
    train.add_argument('-e', '--num-epochs', type=float, default=-1)
    train.add_argument('-ttim', '--max-train-time',
                       type=float, default=-1)
    train.add_argument('-ltim', '--log-every-n-secs',
                       type=float, default=2)
    train.add_argument('-vtim', '--validation-every-n-secs',
                       type=float, default=-1)
    train.add_argument('-vme', '--validation-max-exs',
                       type=int, default=-1,
                       help='max examples to use during validation (default '
                            '-1 uses all)')
    train.add_argument('-vp', '--validation-patience',
                       type=int, default=5,
                       help=('number of iterations of validation where result'
                             ' does not improve before we stop training'))
    train.add_argument('-vmt', '--validation-metric', default='accuracy',
                       help='key into report table for selecting best '
                            'validation')
    train.add_argument('-dbf', '--dict-build-first',
                       type='bool', default=True,
                       help='build dictionary first before training agent')
    opt = parser.parse_args()
    # Possibly build a dictionary (not all models do this).
    if opt['dict_build_first'] and 'dict_file' in opt:
        if opt['dict_file'] is None and opt.get('model_file'):
            opt['dict_file'] = opt['model_file'] + '.dict'
        print("[ building dictionary first... ]")
        build_dict.build_dict(opt)
    # Create model and assign it to the specified task
    agent = create_agent(opt)
    world = create_task(opt, agent)

    train_time = Timer()
    validate_time = Timer()
    log_time = Timer()
    print('[ training... ]')
    parleys = 0
    total_exs = 0
    max_exs = opt['num_epochs'] * len(world)
    max_parleys = math.ceil(max_exs / opt['batchsize'])
    best_valid = 0
    impatience = 0
    saved = False
    valid_world = None
    while True:
        world.parley()
        parleys += 1

        if opt['num_epochs'] > 0 and parleys >= max_parleys:
            print('[ num_epochs completed: {} ]'.format(opt['num_epochs']))
            break
        if opt['max_train_time'] > 0 and train_time.time() > opt['max_train_time']:
            print('[ max_train_time elapsed: {} ]'.format(train_time.time()))
            break
        if opt['log_every_n_secs'] > 0 and log_time.time() > opt['log_every_n_secs']:
            if opt['display_examples']:
                print(world.display() + '\n~~')

            logs = []
            # time elapsed
            logs.append('time:{}s'.format(math.floor(train_time.time())))
            logs.append('parleys:{}'.format(parleys))

            # get report and update total examples seen so far
            if hasattr(agent, 'report'):
                train_report = agent.report()
                agent.reset_metrics()
            else:
                train_report = world.report()
                world.reset_metrics()

            if hasattr(train_report, 'get') and train_report.get('total'):
                total_exs += train_report['total']
                logs.append('total_exs:{}'.format(total_exs))

            # check if we should log amount of time remaining
            time_left = None
            if opt['num_epochs'] > 0:
                exs_per_sec = train_time.time() / total_exs
                time_left = (max_exs - total_exs) * exs_per_sec
            if opt['max_train_time'] > 0:
                other_time_left = opt['max_train_time'] - train_time.time()
                if time_left is not None:
                    time_left = min(time_left, other_time_left)
                else:
                    time_left = other_time_left
            if time_left is not None:
                logs.append('time_left:{}s'.format(math.floor(time_left)))

            # join log string and add full metrics report to end of log
            log = '[ {} ] {}'.format(' '.join(logs), train_report)

            print(log)
            log_time.reset()

        if (opt['validation_every_n_secs'] > 0 and
                validate_time.time() > opt['validation_every_n_secs']):
            valid_report, valid_world = run_eval(
                agent, opt, 'valid', opt['validation_max_exs'],
                valid_world=valid_world)
            if valid_report[opt['validation_metric']] > best_valid:
                best_valid = valid_report[opt['validation_metric']]
                impatience = 0
                print('[ new best {}: {} ]'.format(
                    opt['validation_metric'], best_valid))
                world.save_agents()
                saved = True
                if opt['validation_metric'] == 'accuracy' and best_valid == 1:
                    print('[ task solved! stopping. ]')
                    break
            else:
                impatience += 1
                print('[ did not beat best {}: {} impatience: {} ]'.format(
                        opt['validation_metric'], round(best_valid, 4),
                        impatience))
            validate_time.reset()
            if opt['validation_patience'] > 0 and impatience >= opt['validation_patience']:
                print('[ ran out of patience! stopping training. ]')
                break
    world.shutdown()
    if not saved:
        world.save_agents()
    else:
        # reload best validation model
        agent = create_agent(opt)

    run_eval(agent, opt, 'valid', write_log=True)
    run_eval(agent, opt, 'test', write_log=True)
Ejemplo n.º 58
0
Archivo: run.py Proyecto: zwcdp/ParlAI
def main():
    """This task consists of one agent, model or MTurk worker, talking to an
    MTurk worker to negotiate a deal.
    """
    argparser = ParlaiParser(False, False)
    argparser.add_parlai_data_path()
    argparser.add_mturk_args()
    argparser.add_argument('--two_mturk_agents',
                           dest='two_mturk_agents',
                           action='store_true',
                           help='data collection mode '
                           'with converations between two MTurk agents')

    opt = argparser.parse_args()
    opt['task'] = 'dealnodeal'
    opt['datatype'] = 'valid'
    opt.update(task_config)

    local_agent_1_id = 'local_1'
    mturk_agent_ids = ['mturk_agent_1']
    if opt['two_mturk_agents']:
        mturk_agent_ids.append('mturk_agent_2')

    mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids)

    mturk_manager.setup_server()

    try:
        mturk_manager.start_new_run()
        mturk_manager.create_hits()

        mturk_manager.set_onboard_function(onboard_function=None)
        mturk_manager.ready_to_accept_workers()

        def check_worker_eligibility(worker):
            return True

        def assign_worker_roles(workers):
            for index, worker in enumerate(workers):
                worker.id = mturk_agent_ids[index % len(mturk_agent_ids)]

        def run_conversation(mturk_manager, opt, workers):
            agents = workers[:]

            # Create a local agent
            if not opt['two_mturk_agents']:
                if 'model' in opt:
                    local_agent = create_agent(opt)
                else:
                    local_agent = LocalHumanAgent(opt=None)

                local_agent.id = local_agent_1_id
                agents.append(local_agent)

            opt["batchindex"] = mturk_manager.started_conversations

            world = MTurkDealNoDealDialogWorld(opt=opt, agents=agents)

            while not world.episode_done():
                world.parley()

            world.shutdown()

        mturk_manager.start_task(eligibility_function=check_worker_eligibility,
                                 assign_role_function=assign_worker_roles,
                                 task_function=run_conversation)

    except BaseException:
        raise
    finally:
        mturk_manager.expire_all_unassigned_hits()
        mturk_manager.shutdown()
Ejemplo n.º 59
0
 def setup_args(cls) -> ParlaiParser:
     parser = ParlaiParser()
     parser.add_argument(
         '--input',
         type=str,
         nargs='+',
         help=
         'The input fairseq model path. Specify multiple to imply a join is necessary',
     )
     parser.add_argument('--output',
                         type=str,
                         help='The output ParlAI model path')
     parser.add_argument(
         '--vocab',
         type=str,
         help='The hugging face vocab file path, if applicable')
     parser.add_argument(
         '--merge',
         type=str,
         help='The hugging face merge file path, if applicable')
     parser.add_argument(
         '--add-prefix-space',
         type='bool',
         default=True,
         help='Add prefix space for hugging face bpe',
     )
     parser.add_argument(
         '--activation',
         type=str,
         help='Activation function',
         choices=['relu', 'gelu'],
         default='gelu',
     )
     parser.add_argument(
         '--tokenizer',
         type=str,
         help='Dict tokenizer',
         choices=['bytelevelbpe', 'gpt2'],
         default='bytelevelbpe',
     )
     parser.add_argument('--delimiter',
                         type=str,
                         default='  ',
                         help='Delimiter')
     parser.add_argument(
         '--retain-bos-emb',
         type='bool',
         default=False,
         help='Retain the BOS embedding.',
     )
     parser.add_argument(
         '--model',
         type=str,
         default='transformer/generator',
         help='Which ParlAI agent to use.',
     )
     parser.add_argument('--fp16',
                         type='bool',
                         default=False,
                         help='Whether to initialize with fp16')
     parser.add_argument(
         '--history-add-global-end-token',
         type='nonestr',
         default='end',
         hidden=True,
         choices=[None, 'end'],
         help='Add special token to the end of history encoding.',
     )
     return parser
Ejemplo n.º 60
0
    def add_cmdline_args(
        cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None
    ) -> ParlaiParser:
        """
        Add command-line arguments specifically for this agent.
        """
        TransformerRankerAgent.add_cmdline_args(parser, partial_opt=partial_opt)
        agent = parser.add_argument_group('Polyencoder Arguments')
        agent.add_argument(
            '--polyencoder-type',
            type=str,
            default='codes',
            choices=['codes', 'n_first'],
            help='Type of polyencoder, either we compute'
            'vectors using codes + attention, or we '
            'simply take the first N vectors.',
            recommended='codes',
        )
        agent.add_argument(
            '--poly-n-codes',
            type=int,
            default=64,
            help='number of vectors used to represent the context'
            'in the case of n_first, those are the number'
            'of vectors that are considered.',
            recommended=64,
        )
        agent.add_argument(
            '--poly-attention-type',
            type=str,
            default='basic',
            choices=['basic', 'sqrt', 'multihead'],
            help='Type of the top aggregation layer of the poly-'
            'encoder (where the candidate representation is'
            'the key)',
            recommended='basic',
        )
        agent.add_argument(
            '--poly-attention-num-heads',
            type=int,
            default=4,
            help='In case poly-attention-type is multihead, '
            'specify the number of heads',
        )

        # Those arguments are here in case where polyencoder type is 'code'
        agent.add_argument(
            '--codes-attention-type',
            type=str,
            default='basic',
            choices=['basic', 'sqrt', 'multihead'],
            help='Type ',
            recommended='basic',
        )
        agent.add_argument(
            '--codes-attention-num-heads',
            type=int,
            default=4,
            help='In case codes-attention-type is multihead, '
            'specify the number of heads',
        )
        return agent