def setup_args(): parser = ParlaiParser(True, True) train = parser.add_argument_group('Training Loop Arguments') train.add_argument('-et', '--evaltask', help=('task to use for valid/test (defaults to the ' 'one used for training if not set)')) train.add_argument('-d', '--display-examples', type='bool', default=False) train.add_argument('-e', '--num-epochs', type=float, default=-1) train.add_argument('-ttim', '--max-train-time', type=float, default=-1) train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) train.add_argument('-vtim', '--validation-every-n-secs', type=float, default=-1) train.add_argument('-vme', '--validation-max-exs', type=int, default=-1, help='max examples to use during validation (default ' '-1 uses all)') train.add_argument('-vp', '--validation-patience', type=int, default=10, help=('number of iterations of validation where result' ' does not improve before we stop training')) train.add_argument('-vmt', '--validation-metric', default='accuracy', help='key into report table for selecting best ' 'validation') train.add_argument('-dbf', '--dict-build-first', type='bool', default=True, help='build dictionary first before training agent') return parser
def main(): # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10) opt = parser.parse_args() agent = Agent(opt) opt['datatype'] = 'train' world_train = create_task(opt, agent) opt['datatype'] = 'valid' world_valid = create_task(opt, agent) start = time.time() # train / valid loop for _ in range(1): print('[ training ]') for _ in range(10): # train for a bit world_train.parley() print('[ training summary. ]') print(world_train.report()) print('[ validating ]') for _ in range(1): # check valid accuracy world_valid.parley() print('[ validation summary. ]') print(world_valid.report()) print('finished in {} s'.format(round(time.time() - start, 2)))
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) RemoteAgentAgent.add_cmdline_args(parser) opt = parser.parse_args() remote = RemoteAgentAgent(opt) if opt.get('task'): world = create_task(opt, [remote]) else: if opt.get('model'): local = create_agent(opt) else: local = LocalHumanAgent(opt) # the remote-host goes **second** agents = [local, remote] if not opt['remote_host'] else [remote, local] world = DialogPartnerWorld(opt, agents) # Talk to the remote agent with world: while True: world.parley() print(world.display())
def test_basic_parse(self): """Check that the dictionary is correctly adding and parsing short sentence. """ from parlai.core.dict import DictionaryAgent from parlai.core.params import ParlaiParser argparser = ParlaiParser() DictionaryAgent.add_cmdline_args(argparser) opt = argparser.parse_args() dictionary = DictionaryAgent(opt) num_builtin = len(dictionary) dictionary.observe({'text': 'hello world'}) dictionary.act() assert len(dictionary) - num_builtin == 2 vec = dictionary.parse('hello world') assert len(vec) == 2 assert vec[0] == num_builtin assert vec[1] == num_builtin + 1 vec = dictionary.parse('hello world', vec_type=list) assert len(vec) == 2 assert vec[0] == num_builtin assert vec[1] == num_builtin + 1 vec = dictionary.parse('hello world', vec_type=tuple) assert len(vec) == 2 assert vec[0] == num_builtin assert vec[1] == num_builtin + 1
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10, type=int) opt = parser.parse_args() display_data(opt)
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) parser.add_argument('-n', '--num-examples', default=100000000) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.set_defaults(datatype='valid') opt = parser.parse_args(print_args=False) eval_model(opt, parser)
def main(): parser = ParlaiParser(True, True) ConvAIWorld.add_cmdline_args(parser) opt = parser.parse_args() agent = ConvAISampleAgent(opt) world = ConvAIWorld(opt, [agent]) while True: try: world.parley() except Exception as e: print('Exception: {}'.format(e))
def main(): # Get command line arguments argparser = ParlaiParser(True, True) build = argparser.add_argument_group('Data Building Args') build.add_argument('--datafile', help=('The file to be loaded, preprocessed, and saved')) build.add_argument('--pytorch-buildteacher', type=str, default='', help='Which teacher to use when building the pytorch data') build.add_argument('--pytorch-preprocess', type='bool', default=True, help='Whether the agent should preprocess the data while building' 'the pytorch data') opt = argparser.parse_args() build_data(opt)
def test_fvqa(self): from parlai.core.params import ParlaiParser parser = ParlaiParser() parser.add_task_args(['-t', 'fvqa']) opt = parser.parse_args(args=self.args) from parlai.tasks.fvqa.agents import DefaultTeacher for dt in ['train:ordered', 'test']: opt['datatype'] = dt teacher = DefaultTeacher(opt) reply = teacher.act() check(opt, reply) shutil.rmtree(self.TMP_PATH)
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) parser.add_argument('-n', '--num-examples', default=10) opt = parser.parse_args() # Create model and assign it to the specified task agent = create_agent(opt) world = create_task(opt, agent) # Show some example dialogs. with world: for k in range(int(opt['num_examples'])): world.parley() print(world.display() + "\n~~") if world.epoch_done(): print("EPOCH DONE") break
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10, type=int) opt = parser.parse_args() # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) # Show some example dialogs. with world: for _ in range(opt['num_examples']): world.parley() print(world.display() + '\n~~') if world.epoch_done(): print('EPOCH DONE') break
def __init__(self, args=None, **kwargs): """Initializes the predictor, setting up opt automatically if necessary. Args is expected to be in the same format as sys.argv: e.g. a list in the form ['--model', 'seq2seq', '-hs', 128, '-lr', 0.5]. kwargs is interpreted by appending '--' to it and replacing underscores with hyphens, so 'dict_file=/tmp/dict.tsv' would be interpreted as '--dict-file /tmp/dict.tsv'. """ from parlai.core.params import ParlaiParser from parlai.core.agents import create_agent if args is None: args = [] for k, v in kwargs.items(): args.append('--' + str(k).replace('_', '-')) args.append(str(v)) parser = ParlaiParser(True, True, model_argv=args) self.opt = parser.parse_args(args) self.agent = create_agent(self.opt)
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument('-n', '--num-examples', default=10) parser.set_defaults(datatype='train:ordered') ImageLoader.add_cmdline_args(parser) opt = parser.parse_args() opt['no_cuda'] = False opt['gpu'] = 0 # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) # Show some example dialogs. with world: for k in range(int(opt['num_examples'])): world.parley() print(world.display() + '\n~~') if world.epoch_done(): print('EPOCH DONE') break
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) parser.add_argument('-d', '--display-examples', type='bool', default=False) opt = parser.parse_args() opt['task'] = 'parlai.agents.local_human.local_human:LocalHumanAgent' print(opt) # Create model and assign it to the specified task agent = create_agent(opt) world = create_task(opt, agent) # Show some example dialogs: while True: world.parley() if opt['display_examples']: print("---") print(world.display() + "\n~~") if world.epoch_done(): print("EPOCH DONE") break
def main(): # Get command line arguments parser = ParlaiParser(True, False) parser.set_defaults(datatype='train:ordered') opt = parser.parse_args() bsz = opt.get('batchsize', 1) opt['no_cuda'] = False opt['gpu'] = 0 opt['num_epochs'] = 1 # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) logger = ProgressLogger(should_humanize=False) print("Beginning image extraction...") exs_seen = 0 total_exs = world.num_examples() while not world.epoch_done(): world.parley() exs_seen += bsz logger.log(exs_seen, total_exs) print("Finished extracting images")
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_messenger_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) # Initialize a SQuAD teacher agent, which we will get context from module_name = 'parlai.tasks.squad.agents' class_name = 'DefaultTeacher' my_module = importlib.import_module(module_name) task_class = getattr(my_module, class_name) task_opt = {} task_opt['datatype'] = 'train' task_opt['datapath'] = opt['datapath'] messenger_manager = MessengerManager(opt=opt) messenger_manager.setup_server() messenger_manager.init_new_state() def get_overworld(agent): return MessengerOverworld(None, agent) def assign_agent_role(agent): agent[0].disp_id = 'Agent' def run_conversation(manager, opt, agents, task_id): task = task_class(task_opt) agent = agents[0] world = QADataCollectionWorld( opt=opt, task=task, agent=agent ) while not world.episode_done(): world.parley() world.shutdown() # World with no onboarding messenger_manager.set_onboard_functions({'default': None}) task_functions = {'default': run_conversation} assign_agent_roles = {'default': assign_agent_role} messenger_manager.set_agents_required({'default': 1}) messenger_manager.set_overworld_func(get_overworld) messenger_manager.setup_socket() try: messenger_manager.start_new_run() messenger_manager.start_task( assign_role_functions=assign_agent_roles, task_functions=task_functions, ) except BaseException: raise finally: messenger_manager.shutdown()
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser(True, True) parser.add_argument('-n', '--num-examples', default=100000000) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.set_defaults(datatype='valid') opt = parser.parse_args() # Create model and assign it to the specified task agent = create_agent(opt) world = create_task(opt, agent) # Show some example dialogs: for k in range(int(opt['num_examples'])): world.parley() print("---") if opt['display_examples']: print(world.display() + "\n~~") print(world.report()) if world.epoch_done(): print("EPOCH DONE") break world.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() # The dialog model we want to evaluate from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent IrBaselineAgent.add_cmdline_args(argparser) opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # The task that we will evaluate the dialog model on task_opt = {} task_opt['datatype'] = 'test' task_opt['datapath'] = opt['datapath'] task_opt['task'] = '#MovieDD-Reddit' mturk_agent_id = 'Worker' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids = [mturk_agent_id], all_agent_ids = [ModelEvaluatorWorld.evaluator_agent_id, mturk_agent_id] # In speaking order ) mturk_manager.init_aws(opt=opt) global run_hit def run_hit(hit_index, assignment_index, opt, task_opt, mturk_manager): conversation_id = str(hit_index) + '_' + str(assignment_index) model_agent = IrBaselineAgent(opt=opt) # Create the MTurk agent which provides a chat interface to the Turker mturk_agent = MTurkAgent(id=mturk_agent_id, manager=mturk_manager, conversation_id=conversation_id, opt=opt) world = ModelEvaluatorWorld(opt=opt, model_agent=model_agent, task_opt=task_opt, mturk_agent=mturk_agent) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.create_hits(opt=opt) results = Parallel(n_jobs=opt['num_hits'] * opt['num_assignments'], backend='threading') \ (delayed(run_hit)(hit_index, assignment_index, opt, task_opt, mturk_manager) \ for hit_index, assignment_index in product(range(1, opt['num_hits']+1), range(1, opt['num_assignments']+1))) mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_1_id = 'mturk_agent_1' mturk_agent_2_id = 'mturk_agent_2' human_agent_1_id = 'human_1' human_agent_2_id = 'human_2' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id], all_agent_ids = [human_agent_1_id, human_agent_2_id, mturk_agent_1_id, mturk_agent_2_id] # In speaking order ) mturk_manager.init_aws(opt=opt) global run_hit def run_hit(hit_index, assignment_index, opt, mturk_manager): conversation_id = str(hit_index) + '_' + str(assignment_index) # Create mturk agents mturk_agent_1 = MTurkAgent(id=mturk_agent_1_id, manager=mturk_manager, conversation_id=conversation_id, opt=opt) mturk_agent_2 = MTurkAgent(id=mturk_agent_2_id, manager=mturk_manager, conversation_id=conversation_id, opt=opt) # Create the local human agents human_agent_1 = LocalHumanAgent(opt=None) human_agent_1.id = human_agent_1_id human_agent_2 = LocalHumanAgent(opt=None) human_agent_2.id = human_agent_2_id world = MultiAgentDialogWorld(opt=opt, agents=[human_agent_1, human_agent_2, mturk_agent_1, mturk_agent_2]) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.create_hits(opt=opt) results = Parallel(n_jobs=opt['num_hits'] * opt['num_assignments'], backend='threading') \ (delayed(run_hit)(hit_index, assignment_index, opt, mturk_manager) \ for hit_index, assignment_index in product(range(1, opt['num_hits']+1), range(1, opt['num_assignments']+1))) mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # Initialize a SQuAD teacher agent, which we will get context from module_name = 'parlai.tasks.squad.agents' class_name = 'DefaultTeacher' my_module = importlib.import_module(module_name) task_class = getattr(my_module, class_name) task_opt = {} task_opt['datatype'] = 'train' task_opt['datapath'] = opt['datapath'] mturk_agent_id = 'Worker' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids = [mturk_agent_id], all_agent_ids = [QADataCollectionWorld.collector_agent_id, mturk_agent_id] # In speaking order ) mturk_manager.init_aws(opt=opt) global run_hit def run_hit(hit_index, assignment_index, task_class, task_opt, opt, mturk_manager): conversation_id = str(hit_index) + '_' + str(assignment_index) task = task_class(task_opt) # Create the MTurk agent which provides a chat interface to the Turker mturk_agent = MTurkAgent(id=mturk_agent_id, manager=mturk_manager, conversation_id=conversation_id, opt=opt) world = QADataCollectionWorld(opt=opt, task=task, mturk_agent=mturk_agent) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.create_hits(opt=opt) results = Parallel(n_jobs=opt['num_hits'] * opt['num_assignments'], backend='threading') \ (delayed(run_hit)(hit_index, assignment_index, task_class, task_opt, opt, mturk_manager) \ for hit_index, assignment_index in product(range(1, opt['num_hits']+1), range(1, opt['num_assignments']+1))) mturk_manager.shutdown()
stddev = np.std(np.array(accs), dtype=np.float64) return acc, acc_len, stddev start, end = 0, M for train_name in NAMES: for valid_name in ['INIT', 'ALL']: for round_index in range(5): sub_perfs = perfs[start: end] acc, acc_len, stddev = get_acc_and_acc_len(sub_perfs) print_and_log('{} on {} round{}: acc {} stddev {} acc_len {}'.format(train_name, valid_name, round_index, acc, stddev, acc_len)) log_only('{} on {} round {}: {}'.format(train_name, valid_name, round_index, sub_perfs)) start = end end = start + M if __name__ == '__main__': argparser = ParlaiParser(False, False) # ============ below copied from projects/graph_world2/train.py ============ argparser.add_arg('--vocab_size', type=int, default=1000) argparser.add_arg('--terminate', type=bool, default=False) argparser.add_arg('--lr', type=float, default=1e-3) argparser.add_arg('--max_seq_in', type=int, default=30) argparser.add_arg('--embedding_dim', type=int, default=50) argparser.add_arg('--rnn_h', type=int, default=350) argparser.add_arg('--rnn_layers', type=int, default=1) argparser.add_arg('--cuda', type=bool, default=True) argparser.add_arg('--eval_period', type=int, default=200) argparser.add_arg('--max_seq_out', type=int, default=5) argparser.add_arg('--label_ratio', type=float, default=1.0) argparser.add_arg('--max_iter', type=int, default=100000) argparser.add_arg('--exit_iter', type=int, default=3000)
def main(): # Get command line arguments parser = ParlaiParser(True, True) train = parser.add_argument_group('Training Loop Arguments') train.add_argument('-et', '--evaltask', help=('task to use for valid/test (defaults to the ' + 'one used for training if not set)')) train.add_argument('-d', '--display-examples', type='bool', default=False) train.add_argument('-e', '--num-epochs', type=float, default=-1) train.add_argument('-ttim', '--max-train-time', type=float, default=-1) train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) train.add_argument('-vtim', '--validation-every-n-secs', type=float, default=-1) train.add_argument('-vme', '--validation-max-exs', type=int, default=-1, help='max examples to use during validation (default ' + '-1 uses all)') train.add_argument( '-vp', '--validation-patience', type=int, default=5, help=('number of iterations of validation where result ' + 'does not improve before we stop training')) train.add_argument('-dbf', '--dict-build-first', type='bool', default=True, help='build dictionary first before training agent') opt = parser.parse_args() # Possibly build a dictionary (not all models do this). if opt['dict_build_first'] and 'dict_file' in opt: if opt['dict_file'] is None and opt.get('model_file'): opt['dict_file'] = opt['model_file'] + '.dict' print("[ building dictionary first... ]") build_dict.build_dict(opt) # Create model and assign it to the specified task agent = create_agent(opt) world = create_task(opt, agent) train_time = Timer() validate_time = Timer() log_time = Timer() print('[ training... ]') parleys = 0 total_exs = 0 max_exs = opt['num_epochs'] * len(world) max_parleys = math.ceil(max_exs / opt['batchsize']) best_accuracy = 0 impatience = 0 saved = False valid_world = None while True: world.parley() parleys += 1 if opt['num_epochs'] > 0 and parleys >= max_parleys: print('[ num_epochs completed: {} ]'.format(opt['num_epochs'])) break if opt['max_train_time'] > 0 and train_time.time( ) > opt['max_train_time']: print('[ max_train_time elapsed: {} ]'.format(train_time.time())) break if opt['log_every_n_secs'] > 0 and log_time.time( ) > opt['log_every_n_secs']: if opt['display_examples']: print(world.display() + '\n~~') logs = [] # time elapsed logs.append('time:{}s'.format(math.floor(train_time.time()))) logs.append('parleys:{}'.format(parleys)) # get report and update total examples seen so far if hasattr(agent, 'report'): train_report = agent.report() agent.reset_metrics() else: train_report = world.report() world.reset_metrics() if hasattr(train_report, 'get') and train_report.get('total'): total_exs += train_report['total'] logs.append('total_exs:{}'.format(total_exs)) # check if we should log amount of time remaining time_left = None if opt['num_epochs'] > 0: exs_per_sec = train_time.time() / total_exs time_left = (max_exs - total_exs) * exs_per_sec if opt['max_train_time'] > 0: other_time_left = opt['max_train_time'] - train_time.time() if time_left is not None: time_left = min(time_left, other_time_left) else: time_left = other_time_left if time_left is not None: logs.append('time_left:{}s'.format(math.floor(time_left))) # join log string and add full metrics report to end of log log = '[ {} ] {}'.format(' '.join(logs), train_report) print(log) log_time.reset() if (opt['validation_every_n_secs'] > 0 and validate_time.time() > opt['validation_every_n_secs']): valid_report, valid_world = run_eval(agent, opt, 'valid', opt['validation_max_exs'], valid_world=valid_world) if valid_report['accuracy'] > best_accuracy: best_accuracy = valid_report['accuracy'] impatience = 0 print('[ new best accuracy: ' + str(best_accuracy) + ' ]') world.save_agents() saved = True if best_accuracy == 1: print('[ task solved! stopping. ]') break else: impatience += 1 print( '[ did not beat best accuracy: {} impatience: {} ]'.format( round(best_accuracy, 4), impatience)) validate_time.reset() if opt['validation_patience'] > 0 and impatience >= opt[ 'validation_patience']: print('[ ran out of patience! stopping training. ]') break world.shutdown() if not saved: world.save_agents() else: # reload best validation model agent = create_agent(opt) run_eval(agent, opt, 'valid', write_log=True) run_eval(agent, opt, 'test', write_log=True)
def main(): """This task consists of an MTurk agent evaluating a chit-chat model. They are asked to chat to the model adopting a specific persona. After their conversation, they are asked to evaluate their partner on several metrics. """ argparser = ParlaiParser(False, add_model_args=True) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-mt', '--max-turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument('--max-resp-time', default=180, type=int, help='time limit for entering a dialog message') argparser.add_argument('--max-persona-time', type=int, default=300, help='time limit for turker' 'entering the persona') argparser.add_argument('--ag-shutdown-time', default=120, type=int, help='time limit for entering a dialog message') argparser.add_argument('--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat') argparser.add_argument('--revised', default=False, type='bool', help='Whether to use revised personas') argparser.add_argument('-rt', '--range-turn', default='5,6', help='sample range of number of turns') argparser.add_argument('--auto-approve-delay', type=int, default=3600 * 24 * 1, help='how long to wait for \ auto approval') # ADD MODEL ARGS HERE (KVMEMNN ADDED AS AN EXAMPLE) argparser.set_defaults( model='projects.personachat.kvmemnn.kvmemnn:Kvmemnn', model_file='models:convai2/kvmemnn/model', ) opt = argparser.parse_args() # add additional model args opt['no_cuda'] = True opt['override'] = ['interactive_mode'] opt['interactive_mode'] = True bot = create_agent(opt) shared_bot_params = bot.share() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + opt['task'] opt.update(task_config) mturk_agent_ids = ['PERSON_1'] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) persona_generator = PersonasGenerator(opt) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() if not opt['is_sandbox']: # ADD BLOCKED WORKERS HERE blocked_worker_list = [] for w in blocked_worker_list: mturk_manager.block_worker( w, 'We found that you have unexpected behaviors in our \ previous HITs. For more questions please email us.') def run_onboard(worker): worker.persona_generator = persona_generator world = PersonaProfileWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[0] conv_idx = mturk_manager.conversation_index world = Convai2EvalWorld( opt=opt, agents=[agents], range_turn=[int(s) for s in opt['range_turn'].split(',')], max_turn=opt['max_turns'], max_resp_time=opt['max_resp_time'], model_agent_opt=shared_bot_params, world_tag='conversation t_{}'.format(conv_idx), agent_timeout_shutdown=opt['ag_shutdown_time'], ) world.reset_random() while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Self chat with a model') parser.add_argument('--seed', type=int, default=42) parser.add_argument('-d', '--display-examples', type='bool', default=True) parser.add_argument( '--display-ignore-fields', type=str, default='label_candidates,text_candidates', help='Do not display these fields', ) parser.add_argument( '-st', '--selfchat-task', type='bool', default=True, help='Create a self chat version of the task', ) parser.add_argument( '--num-self-chats', type=int, default=1, help='Number of self chats to run' ) parser.add_argument( '--selfchat-max-turns', type=int, default=6, help='The number of dialogue turns before self chat ends', ) parser.add_argument( '--seed-messages-from-task', action='store_true', help='Automatically seed conversation with messages from task dataset.', ) parser.add_argument( '--outfile', type=str, default=None, help='File to save self chat logs' ) parser.add_argument( '--save-format', type=str, default='conversations', choices=['conversations', 'parlai', 'jsonl'], help='Format to save logs in', ) parser.set_defaults(interactive_mode=True, task='self_chat') WorldLogger.add_cmdline_args(parser) return parser
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # Initialize a SQuAD teacher agent, which we will get context from module_name = 'parlai.tasks.squad.agents' class_name = 'DefaultTeacher' my_module = importlib.import_module(module_name) task_class = getattr(my_module, class_name) task_opt = {} task_opt['datatype'] = 'train' task_opt['datapath'] = opt['datapath'] mturk_agent_id = 'Worker' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=[mturk_agent_id] ) mturk_manager.setup_server() def run_onboard(worker): world = QADataCollectionOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=None) try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.ready_to_accept_workers() def check_workers_eligibility(workers): return workers eligibility_function = { 'func': check_workers_eligibility, 'multiple': True, } def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): task = task_class(task_opt) mturk_agent = workers[0] world = QADataCollectionWorld( opt=opt, task=task, mturk_agent=mturk_agent ) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): # Get command line arguments argparser = ParlaiParser() DictionaryAgent.add_cmdline_args(argparser) opt = argparser.parse_args() build_dict(opt)
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True) # Get command line arguments parser.add_argument('-ne', '--num-examples', type=int, default=-1) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.add_argument( '--metrics', type=str, default="all", help="list of metrics to show/compute, e.g. ppl,f1,accuracy,hits@1." "If 'all' is specified [default] all are shown.") TensorboardLogger.add_cmdline_args(parser) parser.set_defaults(datatype='valid') return parser
def add_cmdline_args( cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None ) -> ParlaiParser: """ Add CLI args. """ super().add_cmdline_args(parser, partial_opt=partial_opt) agent = parser.add_argument_group('TorchRankerAgent') agent.add_argument( '-cands', '--candidates', type=str, default='inline', choices=['batch', 'inline', 'fixed', 'batch-all-cands'], help='The source of candidates during training ' '(see TorchRankerAgent._build_candidates() for details).', ) agent.add_argument( '-ecands', '--eval-candidates', type=str, default='inline', choices=['batch', 'inline', 'fixed', 'vocab', 'batch-all-cands'], help='The source of candidates during evaluation (defaults to the same' 'value as --candidates if no flag is given)', ) agent.add_argument( '-icands', '--interactive-candidates', type=str, default='fixed', choices=['fixed', 'inline', 'vocab'], help='The source of candidates during interactive mode. Since in ' 'interactive mode, batchsize == 1, we cannot use batch candidates.', ) agent.add_argument( '--repeat-blocking-heuristic', type='bool', default=True, help='Block repeating previous utterances. ' 'Helpful for many models that score repeats highly, so switched ' 'on by default.', ) agent.add_argument( '-fcp', '--fixed-candidates-path', type=str, help='A text file of fixed candidates to use for all examples, one ' 'candidate per line', ) agent.add_argument( '--fixed-candidate-vecs', type=str, default='reuse', help='One of "reuse", "replace", or a path to a file with vectors ' 'corresponding to the candidates at --fixed-candidates-path. ' 'The default path is a /path/to/model-file.<cands_name>, where ' '<cands_name> is the name of the file (not the full path) passed by ' 'the flag --fixed-candidates-path. By default, this file is created ' 'once and reused. To replace it, use the "replace" option.', ) agent.add_argument( '--encode-candidate-vecs', type='bool', default=True, help='Cache and save the encoding of the candidate vecs. This ' 'might be used when interacting with the model in real time ' 'or evaluating on fixed candidate set when the encoding of ' 'the candidates is independent of the input.', ) agent.add_argument( '--encode-candidate-vecs-batchsize', type=int, default=256, hidden=True, help='Batchsize when encoding candidate vecs', ) agent.add_argument( '--init-model', type=str, default=None, help='Initialize model with weights from this file.', ) agent.add_argument( '--train-predict', type='bool', default=False, help='Get predictions and calculate mean rank during the train ' 'step. Turning this on may slow down training.', ) agent.add_argument( '--cap-num-predictions', type=int, default=100, help='Limit to the number of predictions in output.text_candidates', ) agent.add_argument( '--ignore-bad-candidates', type='bool', default=False, help='Ignore examples for which the label is not present in the ' 'label candidates. Default behavior results in RuntimeError. ', ) agent.add_argument( '--rank-top-k', type=int, default=-1, help='Ranking returns the top k results of k > 0, otherwise sorts every ' 'single candidate according to the ranking.', ) agent.add_argument( '--inference', choices={'max', 'topk'}, default='max', help='Final response output algorithm', ) agent.add_argument( '--topk', type=int, default=5, help='K used in Top K sampling inference, when selected', ) agent.add_argument( '--return-cand-scores', type='bool', default=False, help='Return sorted candidate scores from eval_step', ) return parser
def main(): completed_workers = [] argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_id = 'Worker' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=[mturk_agent_id] ) mturk_manager.setup_server() qual_name = 'ParlAIExcludeQual{}t{}'.format( random.randint(10000, 99999), random.randint(10000, 99999)) qual_desc = ( 'Qualification for a worker not correctly completing the ' 'first iteration of a task. Used to filter to different task pools.' ) qualification_id = \ mturk_utils.find_or_create_qualification(qual_name, qual_desc) print('Created qualification: ', qualification_id) def run_onboard(worker): world = QualificationFlowOnboardWorld(opt, worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) try: mturk_manager.start_new_run() agent_qualifications = [{ 'QualificationTypeId': qualification_id, 'Comparator': 'DoesNotExist', 'RequiredToPreview': True }] mturk_manager.create_hits(qualifications=agent_qualifications) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): mturk_agent = workers[0] world = QualificationFlowSoloWorld( opt=opt, mturk_agent=mturk_agent, qualification_id=qualification_id, firstTime=(mturk_agent.worker_id not in completed_workers), ) while not world.episode_done(): world.parley() completed_workers.append(mturk_agent.worker_id) world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_utils.delete_qualification(qualification_id) mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Self chat with a model') parser.add_argument('--seed', type=int, default=42) parser.add_argument('-d', '--display-examples', type='bool', default=True) parser.add_argument('-n', '-ne', '--num-examples', type=int, default=10) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.add_argument( '--display-ignore-fields', type=str, default='label_candidates,text_candidates', help='Do not display these fields', ) parser.add_argument( '-it', '--interactive-task', type='bool', default=True, help='Create interactive version of task', ) parser.add_argument( '--selfchat-max-turns', type=int, default=10, help="The number of dialogue turns before self chat ends.", ) parser.add_argument( '--seed-messages-from-task', action='store_true', help="Automatically seed conversation with messages from task dataset.", ) parser.add_argument('--outfile', type=str, default='/tmp/selfchat.json') parser.add_argument('--format', type=str, default='json', choices={'parlai', 'json'}) parser.set_defaults(interactive_mode=True, task='self_chat') WorldLogger.add_cmdline_args(parser) return parser
def main(): '''Handles setting up and running a ParlAI-MTurk task by instantiating an MTurk manager and configuring it for the qa_data_collection task ''' # Get relevant arguments argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() # Set the task name to be the folder name opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) # append the contents of task_config.py to the configuration opt.update(task_config) # Initialize a SQuAD teacher agent, which we will get context from module_name = 'parlai.tasks.squad.agents' class_name = 'DefaultTeacher' my_module = importlib.import_module(module_name) task_class = getattr(my_module, class_name) task_opt = opt.copy() task_opt['datatype'] = 'train' task_opt['datapath'] = opt['datapath'] # Select an agent_id that worker agents will be assigned in their world mturk_agent_id = 'Worker' # Instantiate an MTurkManager with the given options and a maximum number # of agents per world of 1 (based on the length of mturk_agent_ids) mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=[mturk_agent_id]) mturk_manager.setup_server() # Create an onboard_function, which will be run for workers who have # accepted your task and must be completed before they are put in the # queue for a task world. def run_onboard(worker): world = QADataCollectionOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() # If we want to use the above onboard function, we can replace the below # with set_onboard_function(onboard_function=run_onboard) mturk_manager.set_onboard_function(onboard_function=None) try: # Initialize run information mturk_manager.start_new_run() # Set up the sockets and threads to recieve workers mturk_manager.ready_to_accept_workers() # Create the hits as specified by command line arguments mturk_manager.create_hits() # Check workers eligiblity acts as a filter, and should return # the list of all workers currently eligible to work on the task def check_workers_eligibility(workers): return workers eligibility_function = { 'func': check_workers_eligibility, 'multiple': True, } # Assign worker roles is used to determine what the role each worker # in the given worker list will play. Setting `id` to None will return # the worker to the pool rather than putting them in a given task, # which is useful for having tasks with different possible worker # counts. def assign_worker_roles(workers): workers[0].id = mturk_agent_id # Define the task function, which will be run with workers that are # as the main task. global run_conversation def run_conversation(mturk_manager, opt, workers): # create a task agent to ask the questions task = task_class(task_opt) # Create the task world world = QADataCollectionWorld(opt=opt, task=task, mturk_agent=workers[0]) # run the world to completion while not world.episode_done(): world.parley() # shutdown and review the work world.shutdown() world.review_work() world.save_data() # Begin the task, allowing mturk_manager to start running the task # world on any workers who connect mturk_manager.start_task(eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: # Any hits that aren't claimed or completed have to be shut down. Must # keep the world running until that point. mturk_manager.expire_all_unassigned_hits() # Shutdown the manager and free all related resources mturk_manager.shutdown()
def _make_argparse_table(class_): """ Build the reStructuredText table containing the args and descriptions. """ readme = [] parser = ParlaiParser(False, False) class_.add_cmdline_args(parser, partial_opt=None) # group by whatever ArgumentGroups there are for ag in parser._action_groups: actions = [] # get options defined within only this group for action in ag._group_actions: if hasattr(action, 'hidden') and action.hidden: # some options are marked hidden continue if action.dest == argparse.SUPPRESS or action.dest == 'help': continue action_strings = ", ".join(f'`{a}`' for a in action.option_strings) description = [] if action.help: h = action.help if not h[0].isupper(): h = h[0].upper() + h[1:] h = h.replace("%(default)s", str(action.default)) description += [h] # list choices if there are any if action.choices: description += [ "Choices: " + ", ".join(f'`{c}`' for c in action.choices) + "." ] # list default and recommended values. default_value = "" if action.default is not None and action.default is not argparse.SUPPRESS: default_value += f"Default: ``{action.default}``. " if hasattr(action, 'recommended') and action.recommended: default_value += f"Recommended: ``{action.recommended}``. " # special escape for a few args which use a literal newline as their default if default_value: default_value = default_value.replace("\n", "\\n") description.append(default_value) description = "\n".join(description) # escape for the fact that we're inserting this inside a table description = description.replace("\n", "\n \n ") actions.append((action_strings, description)) if not actions: continue readme.append(f'__{ag.title.title()}__\n\n') readme.append("| Argument | Description |\n") readme.append("|----------|----------|\n") for row in actions: text = "| " + " | ".join(row) + " |" text = text.replace("\n", "<br>") readme.append(f"{text}\n") readme.append("\n\n") return readme
def setup_args(parser=None) -> ParlaiParser: """ Build the ParlAI parser, adding command line args if necessary. :param ParlaiParser parser: Preexisting parser to append options to. Will be created if needed. :returns: the ParlaiParser with CLI options added. """ if parser is None: parser = ParlaiParser(True, True, 'Train a model') parser.add_pytorch_datateacher_args() train = parser.add_argument_group('Training Loop Arguments') train.add_argument( '-et', '--evaltask', help= 'task to use for valid/test (defaults to the one used for training)', ) train.add_argument( '--eval-batchsize', type=int, hidden=True, help='Eval time batch size (defaults to same as -bs)', ) train.add_argument('--display-examples', type='bool', default=False, hidden=True) train.add_argument('-eps', '--num-epochs', type=float, default=-1) train.add_argument('-ttim', '--max-train-time', type=float, default=-1) train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) train.add_argument( '-vtim', '--validation-every-n-secs', type=float, default=-1, help='Validate every n seconds. Saves model to model_file ' '(if set) whenever best val metric is found', ) train.add_argument( '-stim', '--save-every-n-secs', type=float, default=-1, help='Saves the model to model_file.checkpoint after ' 'every n seconds (default -1, never).', ) train.add_argument( '-sval', '--save-after-valid', type='bool', default=False, help='Saves the model to model_file.checkpoint after ' 'every validation (default %(default)s).', ) train.add_argument( '-veps', '--validation-every-n-epochs', type=float, default=-1, help='Validate every n epochs. Saves model to model_file ' '(if set) whenever best val metric is found', ) train.add_argument( '-vme', '--validation-max-exs', type=int, default=-1, hidden=True, help='max examples to use during validation (default -1 uses all)', ) train.add_argument( '--short-final-eval', default=False, hidden=True, type='bool', help='If true, obeys --validation-max-exs in the final ' 'validation and test evaluations.', ) train.add_argument( '-vp', '--validation-patience', type=int, default=10, help=('number of iterations of validation where result' ' does not improve before we stop training'), ) train.add_argument( '-vmt', '--validation-metric', default='accuracy', help='key into report table for selecting best validation', ) train.add_argument( '-vmm', '--validation-metric-mode', type=str, choices=['max', 'min'], help='how to optimize validation metric (max or min)', ) train.add_argument( '-vcut', '--validation-cutoff', type=float, default=1.0, hidden=True, help='value at which training will stop if exceeded by metric', ) train.add_argument( '-lfc', '--load-from-checkpoint', type='bool', default=False, hidden=True, help='load model from checkpoint if available', ) train.add_argument( '-vshare', '--validation-share-agent', default=False, hidden=True, help='use a shared copy of the agent for validation. ' 'this will eventually default to True, but ' 'currently defaults to False.', ) train.add_argument( '-micro', '--aggregate-micro', type='bool', default=False, help='If multitasking, average metrics over the number of examples. ' 'If false, averages over the number of tasks.', ) train.add_argument( '-mcs', '--metrics', type=str, default='default', help='list of metrics to show/compute, e.g. all, default,' 'or give a list split by , like ' 'ppl,f1,accuracy,hits@1,rouge,bleu' 'the rouge metrics will be computed as rouge-1, rouge-2 and rouge-l', ) TensorboardLogger.add_cmdline_args(parser) parser = setup_dict_args(parser) return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Train a model') parser.add_pytorch_datateacher_args() train = parser.add_argument_group('Training Loop Arguments') train.add_argument('-et', '--evaltask', help=('task to use for valid/test (defaults to the ' 'one used for training if not set)')) train.add_argument('--eval-batchsize', type=int, hidden=True, help='Eval time batch size (defaults to same as -bs)') train.add_argument('--display-examples', type='bool', default=False, hidden=True) train.add_argument('-eps', '--num-epochs', type=float, default=-1) train.add_argument('-ttim', '--max-train-time', type=float, default=-1) train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) train.add_argument( '-vtim', '--validation-every-n-secs', type=float, default=-1, help='Validate every n seconds. Saves model to model_file ' '(if set) whenever best val metric is found') train.add_argument('-stim', '--save-every-n-secs', type=float, default=-1, help='Saves the model to model_file.checkpoint after ' 'every n seconds (default -1, never).') train.add_argument('-sval', '--save-after-valid', type='bool', default=False, help='Saves the model to model_file.checkpoint after ' 'every validation (default %(default)s).') train.add_argument( '-veps', '--validation-every-n-epochs', type=float, default=-1, help='Validate every n epochs. Saves model to model_file ' '(if set) whenever best val metric is found') train.add_argument('-vme', '--validation-max-exs', type=int, default=-1, hidden=True, help='max examples to use during validation (default ' '-1 uses all)') train.add_argument('--short-final-eval', default=False, hidden=True, type='bool', help='If true, obeys --validation-max-exs in the final ' 'validation and test evaluations.') train.add_argument('-vp', '--validation-patience', type=int, default=10, help=('number of iterations of validation where result' ' does not improve before we stop training')) train.add_argument('-vmt', '--validation-metric', default='accuracy', help='key into report table for selecting best ' 'validation') train.add_argument('-vmm', '--validation-metric-mode', type=str, choices=['max', 'min'], help='how to optimize validation metric (max or min)') train.add_argument('-vcut', '--validation-cutoff', type=float, default=1.0, hidden=True, help='value at which training will stop if exceeded by ' 'training metric') train.add_argument('-dbf', '--dict-build-first', hidden=True, type='bool', default=True, help='build dictionary first before training agent') train.add_argument('-lfc', '--load-from-checkpoint', type='bool', default=False, hidden=True, help='load model from checkpoint if available') train.add_argument('-vshare', '--validation-share-agent', default=False, hidden=True, help='use a shared copy of the agent for validation. ' 'this will eventually default to True, but ' 'currently defaults to False.') TensorboardLogger.add_cmdline_args(parser) parser = setup_dict_args(parser) return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, False, 'Lint for ParlAI tasks') parser.add_pytorch_datateacher_args() # Get command line arguments parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.add_argument( '--agent', type=int, default=0, help='Use teacher (agent 0) or model (agent 1)', choices=[0, 1], ) parser.add_argument( '--new_line_new_utt', type='bool', default=False, help='New lines treat substrings as separate utterances.', ) parser.add_argument( '--ignore_tokens', type=str, default='', help='ignore tokens containings these substrings (comma-separated)', ) parser.set_defaults(datatype='train:ordered') DictionaryAgent.add_cmdline_args(parser) return parser
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('--two_mturk_agents', dest='two_mturk_agents', action='store_true', help='data collection mode ' 'with converations between two MTurk agents') opt = argparser.parse_args() opt['task'] = 'dealnodeal' opt['datatype'] = 'valid' opt.update(task_config) local_agent_1_id = 'local_1' mturk_agent_ids = ['mturk_agent_1'] if opt['two_mturk_agents']: mturk_agent_ids.append('mturk_agent_2') mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.set_onboard_function(onboard_function=None) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[:] # Create a local agent if not opt['two_mturk_agents']: if 'model' in opt: local_agent = create_agent(opt) else: local_agent = LocalHumanAgent(opt=None) local_agent.id = local_agent_1_id agents.append(local_agent) opt["batchindex"] = mturk_manager.started_conversations world = MTurkDealNoDealDialogWorld( opt=opt, agents=agents ) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def _get_args(self): parser = ParlaiParser(False, False) parser.add_parlai_data_path() parser.add_messenger_args() return parser.parse_args([])
def setup_args(): # Get command line arguments parser = ParlaiParser() parser.add_argument( '-n', '--num-examples', default=-1, type=int, help='Total number of exs to convert, -1 to convert all examples', ) parser.add_argument( '-of', '--outfile', default=None, type=str, help='Output file where to save, by default will be created in tmp', ) parser.add_argument( '-if', '--ignore-fields', default='id', type=str, help='Ignore these fields from the message (returned with .act() )', ) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.set_defaults(datatype='train:stream') return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Interactive chat with a model') parser.add_argument('-d', '--display-examples', type='bool', default=False) # Get command line arguments parser.add_argument( '-rf', '--report-filename', type=str, default='', help='Saves a json file of the evaluation report either as an ' 'extension to the model-file (if begins with a ".") or a whole ' 'file path. Set to the empty string to not save at all.', ) parser.add_argument( '--save-world-logs', type='bool', default=False, help='Saves a jsonl file containing all of the task examples and ' 'model replies. Must also specify --report-filename.', ) parser.add_argument( '--display-prettify', type='bool', default=False, help='Set to use a prettytable when displaying ' 'examples with text candidates', ) parser.add_argument( '--display-ignore-fields', type=str, default='label_candidates,text_candidates', help='Do not display these fields', ) parser.add_argument( '-it', '--interactive-task', type='bool', default=True, help='Create interactive version of task', ) WorldLogger.add_cmdline_args(parser) parser.set_defaults(interactive_mode=True, task='interactive') LocalHumanAgent.add_cmdline_args(parser) return parser
def setup_args(model_args=None): parser = ParlaiParser(True, True, model_argv=model_args) train = parser.add_argument_group('Training Loop Arguments') train.add_argument('-et', '--evaltask', help=('task to use for valid/test (defaults to the ' 'one used for training if not set)')) train.add_argument('-d', '--display-examples', type='bool', default=False) train.add_argument('-e', '--num-epochs', type=float, default=-1) train.add_argument('-ttim', '--max-train-time', type=float, default=-1) train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) train.add_argument('-vtim', '--validation-every-n-secs', type=float, default=-1, help='Validate every n seconds. Whenever the the best ' 'validation metric is found, saves the model to ' 'the model_file path if set.') train.add_argument('-stim', '--save-every-n-secs', type=float, default=-1, help='Saves the model to model_file.checkpoint after ' 'every n seconds (default -1, never).') train.add_argument('-sval', '--save-after-valid', type='bool', default=False, help='Saves the model to model_file.checkpoint after ' 'every validation (default True).') train.add_argument('-vme', '--validation-max-exs', type=int, default=-1, help='max examples to use during validation (default ' '-1 uses all)') train.add_argument('-vp', '--validation-patience', type=int, default=10, help=('number of iterations of validation where result' ' does not improve before we stop training')) train.add_argument('-vmt', '--validation-metric', default='accuracy', help='key into report table for selecting best ' 'validation') train.add_argument('-vmm', '--validation-metric-mode', default='max', type=str, choices=['max', 'min'], help='how to optimize validation metric (max or min)') train.add_argument('-vcut', '--validation-cutoff', type=float, default=1.0, help='value at which training will stop if exceeded by ' 'training metric') train.add_argument('-dbf', '--dict-build-first', type='bool', default=True, help='build dictionary first before training agent') return parser
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'Evaluate a model') # Get command line arguments parser.add_argument( '-rf', '--report-filename', type=str, default='', help='Saves a json file of the evaluation report either as an ' 'extension to the model-file (if begins with a ".") or a whole ' 'file path. Set to the empty string to not save at all.', ) parser.add_argument( '--save-world-logs', type='bool', default=False, help='Saves a jsonl file containing all of the task examples and ' 'model replies. Must also specify --report-filename.', ) parser.add_argument( '--save-format', type=str, default='conversations', choices=['conversations', 'parlai'], ) parser.add_argument('-ne', '--num-examples', type=int, default=-1) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=10) parser.add_argument( '-mcs', '--metrics', type=str, default='default', help='list of metrics to show/compute, e.g. all, default,' 'or give a list split by , like ' 'ppl,f1,accuracy,hits@1,rouge,bleu' 'the rouge metrics will be computed as rouge-1, rouge-2 and rouge-l', ) parser.add_argument( '-micro', '--aggregate-micro', type='bool', default=False, help='Report micro-averaged metrics instead of macro averaged metrics.', recommended=False, ) WorldLogger.add_cmdline_args(parser) TensorboardLogger.add_cmdline_args(parser) parser.set_params(datatype='valid') return parser
def get_parlai_opt(self) -> Opt: """ Parser for converting fairseq argument to ParlAI opt. :return opt: opt parsed by ParlAI Parser """ # assume encoder/decoder symetrical except for number of layers state = self.state fairseq_args = state['args'].__dict__ transformer_common_config = {} # 1. Map transformer params for each in TRANSFORMER_PARAMETER_MAPPING: transformer_common_config[TRANSFORMER_PARAMETER_MAPPING[ each]] = fairseq_args[f'encoder_{each}'] # 2. Map dropout for each in TRANSFORMER_DROPOUT: transformer_common_config[each] = fairseq_args[each] if 'activation_dropout' in fairseq_args: transformer_common_config['relu_dropout'] = fairseq_args[ 'activation_dropout'] else: transformer_common_config['relu_dropout'] = fairseq_args[ 'relu_dropout'] # 3. Map other options transformer_common_config.update({ 'model': self.opt['model'], # number of layers 'n_encoder_layers': fairseq_args['encoder_layers'], 'n_decoder_layers': fairseq_args['decoder_layers'], # tokenization args 'dict_tokenizer': self.opt['tokenizer'], 'bpe_vocab': self.opt['vocab'], 'bpe_merge': self.opt['merge'], 'n_positions': fairseq_args['max_source_positions'], }) # 4. Embedding scale if 'encoder_embed_scale' in fairseq_args: transformer_common_config['embeddings_scale'] = ( fairseq_args['encoder_embed_scale'] != 1.0) else: transformer_common_config[ 'embeddings_scale'] = not fairseq_args['no_scale_embedding'] # 5. Determine variant if fairseq_args['encoder_normalize_before']: transformer_common_config['variant'] = 'prelayernorm' elif fairseq_args['layernorm_embedding']: transformer_common_config['variant'] = 'bart' else: transformer_common_config['variant'] = 'aiayn' if self.opt['add_prefix_space']: transformer_common_config['bpe_add_prefix_space'] = True parser = ParlaiParser() parser.set_params(**transformer_common_config) opt = parser.parse_args([]) # 6. Augment opt with additional ParlAI options opt['fp16'] = self.opt['fp16'] opt['activation'] = self.opt['activation'] opt['delimiter'] = self.opt['delimiter'] opt['history_add_global_end_token'] = self.opt[ 'history_add_global_end_token'] # Makes model fp16 ready for fine-tuning, means 4 extra padding tokens. opt['force_fp16_tokens'] = True opt['converting'] = True return opt
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() # The dialog model we want to evaluate from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent IrBaselineAgent.add_cmdline_args(argparser) opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) # The task that we will evaluate the dialog model on task_opt = {} task_opt['datatype'] = 'test' task_opt['datapath'] = opt['datapath'] task_opt['task'] = '#MovieDD-Reddit' mturk_agent_id = 'Worker' mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=[mturk_agent_id] ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = ModelEvaluatorOnboardWorld(opt=opt, mturk_agent=worker) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(worker): worker[0].id = mturk_agent_id global run_conversation def run_conversation(mturk_manager, opt, workers): mturk_agent = workers[0] model_agent = IrBaselineAgent(opt=opt) world = ModelEvaluatorWorld( opt=opt, model_agent=model_agent, task_opt=task_opt, mturk_agent=mturk_agent ) while not world.episode_done(): world.parley() world.shutdown() world.review_work() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
WizardTransformerRankerAgent, ) """Evaluate pre-trained retrieval model on the full Wizard Dialogue task. NOTE: Metrics here differ slightly to those reported in the paper as a result of code changes. Results on seen test set: Hits@1/100: 86.7 Results on unseen test set (run with flag `-t wizard_of_wikipedia:WizardDialogKnowledge:topic_split`): Hits@1/100: 68.96 """ if __name__ == '__main__': parser = ParlaiParser(add_model_args=True) parser.add_argument('-n', '--num-examples', default=100000000) parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) WizardTransformerRankerAgent.add_cmdline_args(parser) parser.set_params( task='wizard_of_wikipedia', model='projects:wizard_of_wikipedia:wizard_transformer_ranker', model_file= 'models:wizard_of_wikipedia/full_dialogue_retrieval_model/model', datatype='test', n_heads=6, ffn_size=1200, embeddings_scale=False, delimiter=' __SOC__ ', n_positions=1000,
def setup_args(parser=None): if parser is None: parser = ParlaiParser( True, True, 'Interactive chat with a model on the command line') parser.add_argument('-d', '--display-examples', type='bool', default=False) parser.add_argument( '--display-prettify', type='bool', default=False, help='Set to use a prettytable when displaying ' 'examples with text candidates', ) parser.add_argument( '--display-add-fields', type=str, default='', help= 'Display these fields when verbose is off (e.g., "--display-add-fields label_candidates,beam_texts")', ) parser.add_argument( '-it', '--interactive-task', type='bool', default=True, help='Create interactive version of task', ) parser.add_argument( '--outfile', type=str, default='', help='Saves a jsonl file containing all of the task examples and ' 'model replies. Set to the empty string to not save at all', ) parser.add_argument( '--save-format', type=str, default='conversations', choices=['conversations', 'parlai'], help= 'Format to save logs in. conversations is a jsonl format, parlai is a text format.', ) parser.set_defaults(interactive_mode=True, task='interactive') LocalHumanAgent.add_cmdline_args(parser) WorldLogger.add_cmdline_args(parser) return parser
def main(): random.seed(42) # Get command line arguments parser = ParlaiParser() parser.add_argument( '-n', '--num-examples', default=-1, type=int, help='Total number of exs to convert, -1 to convert \ all examples', ) parser.add_argument( '-of', '--outfile', default=None, type=str, help='Output file where to save, by default will be \ created in /tmp', ) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.set_defaults(datatype='train:ordered') opt = parser.parse_args() dump_data(opt)
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from parlai.core.build_data import download_models from parlai.core.params import ParlaiParser from parlai.scripts.interactive import interactive from projects.personachat.persona_seq2seq import PersonachatSeqseqAgentBasic '''Interact with pre-trained model Generative model trained on personachat using persona 'self' Run from ParlAI directory ''' if __name__ == '__main__': parser = ParlaiParser(add_model_args=True) parser.add_argument('-d', '--display-examples', type='bool', default=False) PersonachatSeqseqAgentBasic.add_cmdline_args(parser) parser.set_defaults( dict_file='models:personachat/profile_memory/fulldict.dict', interactive_mode=True, task='parlai.agents.local_human.local_human:LocalHumanAgent', model= 'projects.personachat.persona_seq2seq:PersonachatSeqseqAgentBasic', model_file= 'models:personachat/seq2seq_personachat/seq2seq_no_dropout0.2_lstm_1024_1e-3' ) opt = parser.parse_args() opt['model_type'] = 'seq2seq_personachat' # for builder # build all profile memory models
def add_cmdline_args( cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None ) -> ParlaiParser: parser = parser.add_argument_group('Retriever Arguments') parser.add_argument( '--retriever-numworkers', type=int, default=None, help='Number of CPU processes (for tokenizing, etc)', ) parser.add_argument( '--retriever-ngram', type=int, default=2, help='Use up to N-size n-grams (e.g. 2 = unigrams + bigrams)', ) parser.add_argument( '--retriever-hashsize', type=int, default=int(math.pow(2, 24)), help='Number of buckets to use for hashing ngrams', ) parser.add_argument( '--retriever-tokenizer', type=str, default='simple', help='String option specifying tokenizer type to use.', ) parser.add_argument( '--retriever-num-retrieved', default=5, type=int, help='How many docs to retrieve.', ) parser.add_argument( '--remove-title', type='bool', default=False, help='Whether to remove the title from the retrieved passage', ) parser.add_argument( '--retriever-mode', choices=['keys', 'values'], default='values', help='Whether to retrieve the stored key or the stored value. For ' 'example, if you want to return the text of an example, use ' 'keys here; if you want to return the label, use values here.', ) parser.add_argument( '--index-by-int-id', type='bool', default=True, help=( 'Whether to index into database by doc id as an integer. This ' 'defaults to true for DBs built using ParlAI.' ), ) parser.add_argument( '--tfidf-context-length', default=-1, type=int, help='Number of past utterances to remember when ' 'building flattened batches of data in multi-' 'example episodes.', ) parser.add_argument( '--tfidf-include-labels', default=True, type='bool', help='Specifies whether or not to include labels ' 'as past utterances when building flattened ' 'batches of data in multi-example episodes.', )
def main(): # Get command line arguments argparser = ParlaiParser() DictionaryAgent.add_cmdline_args(argparser) ParsedRemoteAgent.add_cmdline_args(argparser) argparser.add_argument('--num-examples', default=1000, type=int) argparser.add_argument('--num-its', default=100, type=int) argparser.add_argument('--dict-max-exs', default=10000, type=int) parlai_home = os.environ['PARLAI_HOME'] if '--remote-cmd' not in sys.argv: if os.system('which luajit') != 0: raise RuntimeError('Could not detect torch luajit installed: ' + 'please install torch from http://torch.ch ' + 'or manually set --remote-cmd for this example.') sys.argv.append('--remote-cmd') sys.argv.append('luajit {}/parlai/agents/'.format(parlai_home) + 'memnn_luatorch_cpu/memnn_zmq_parsed.lua') if '--remote-args' not in sys.argv: sys.argv.append('--remote-args') sys.argv.append('{}/examples/'.format(parlai_home) + 'memnn_luatorch_cpu/params_default.lua') opt = argparser.parse_args() # set up dictionary print('Setting up dictionary.') dictionary = DictionaryAgent(opt) if not opt.get('dict_file'): # build dictionary since we didn't load it ordered_opt = copy.deepcopy(opt) ordered_opt['datatype'] = 'train:ordered' ordered_opt['numthreads'] = 1 world_dict = create_task(ordered_opt, dictionary) print('Dictionary building on training data.') cnt = 0 # pass examples to dictionary for _ in world_dict: cnt += 1 if cnt > opt['dict_max_exs'] and opt['dict_max_exs'] > 0: print('Processed {} exs, moving on.'.format( opt['dict_max_exs'])) # don't wait too long... break world_dict.parley() # we need to save the dictionary to load it in memnn (sort it by freq) dictionary.sort() dictionary.save('/tmp/dict.txt', sort=True) print('Dictionary ready, moving on to training.') opt['datatype'] = 'train' agent = ParsedRemoteAgent(opt, {'dictionary_shared': dictionary.share()}) world_train = create_task(opt, agent) opt['datatype'] = 'valid' world_valid = create_task(opt, agent) start = time.time() with world_train: for _ in range(opt['num_its']): print('[ training ]') for _ in range(opt['num_examples'] * opt.get('numthreads', 1)): world_train.parley() world_train.synchronize() print('[ validating ]') world_valid.reset() for _ in world_valid: # check valid accuracy world_valid.parley() print('[ validation summary. ]') report_valid = world_valid.report() print(report_valid) if report_valid['accuracy'] > 0.95: break # show some example dialogs after training: world_valid = create_task(opt, agent) for _k in range(3): world_valid.parley() print(world_valid.display()) print('finished in {} s'.format(round(time.time() - start, 2)))
def superscript_main(args=None): """ Superscript is a loader for all the other scripts. """ setup_script_registry() parser = _SupercommandParser(False, False, formatter_class=_SuperscriptHelpFormatter) parser.add_argument( '--helpall', action='helpall', help='show all commands, including advanced ones.', ) parser.set_defaults(super_command=None) subparsers = parser.add_subparsers( parser_class=_SubcommandParser, title="Commands", metavar="COMMAND", ) hparser = subparsers.add_parser( 'help', aliases=['h'], help=argparse.SUPPRESS, description="List the main commands", ) hparser.set_defaults(super_command='help') hparser = subparsers.add_parser( 'helpall', help=argparse.SUPPRESS, description="List all commands, including advanced ones.", ) hparser.set_defaults(super_command='helpall') # build the supercommand for script_name, registration in SCRIPT_REGISTRY.items(): logging.verbose(f"Discovered command {script_name}") script_parser = registration.klass.setup_args() if script_parser is None: # user didn't bother defining command line args. let's just fill # in for them script_parser = ParlaiParser(False, False) help_ = argparse.SUPPRESS if registration.hidden else script_parser.description subparser = subparsers.add_parser( script_name, aliases=registration.aliases, help=help_, description=script_parser.description, formatter_class=CustomHelpFormatter, ) subparser.set_defaults(super_command=script_name) for action in script_parser._actions: subparser._add_action(action) for action_group in script_parser._action_groups: subparser._action_groups.append(action_group) try: import argcomplete argcomplete.autocomplete(parser) except ModuleNotFoundError: pass opt = parser.parse_args(args, print_args=False) cmd = opt.pop('super_command') if cmd == 'helpall': parser.print_helpall() elif cmd == 'help' or cmd is None: parser.print_help() elif cmd is not None: SCRIPT_REGISTRY[cmd].klass._run_from_parser_and_opt(opt, parser)
def main(): """ This task consists of one local human agent and two MTurk agents, each MTurk agent will go through the onboarding step to provide information about themselves, before being put into a conversation. You can end the conversation by sending a message ending with `[DONE]` from human_1. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_1_id = 'mturk_agent_1' mturk_agent_2_id = 'mturk_agent_2' human_agent_1_id = 'human_1' mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id] mturk_manager = MTurkManager( opt=opt, mturk_agent_ids=mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = MTurkMultiAgentDialogOnboardWorld( opt=opt, mturk_agent=worker ) while not world.episode_done(): world.parley() world.shutdown() # You can set onboard_function to None to skip onboarding mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True eligibility_function = { 'func': check_worker_eligibility, 'multiple': False, } def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): # Create mturk agents mturk_agent_1 = workers[0] mturk_agent_2 = workers[1] # Create the local human agents human_agent_1 = LocalHumanAgent(opt=None) human_agent_1.id = human_agent_1_id world = MTurkMultiAgentDialogWorld( opt=opt, agents=[human_agent_1, mturk_agent_1, mturk_agent_2] ) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task( eligibility_function=eligibility_function, assign_role_function=assign_worker_roles, task_function=run_conversation ) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setup_rag_args(parser: ParlaiParser) -> ParlaiParser: group = parser.add_argument_group('RAG Model Args') # Standard RAG Agent Arguments group.add_argument( '--generation-model', type=str, default='bart', help='which generation model to use', choices=['transformer/generator', 'bart', 't5'], ) group.add_argument( '--query-model', type=str, default='bert', help='Which query model to use for DPR.', choices=QUERY_MODEL_TYPES, ) group.add_argument( '--rag-model-type', type=str, default='token', help='which rag model decoding to use.', choices=['token', 'sequence', 'turn'], ) group.add_argument( '--thorough', type='bool', default=False, help='whether to use thorough decoding for rag sequence. ', ) modified_group = parser.add_argument_group('Modified RAG Args') modified_group.add_argument( '--n-extra-positions', type=int, default=0, help='Specify > 0 to include extra positions in the encoder, in which ' 'retrieved knowledge will go. In this setup, knowledge is _appended_ ' 'instead of prepended.', ) modified_group.add_argument( '--gold-knowledge-passage-key', type=str, default='checked_sentence', help= 'key in the observation dict that indicates the gold knowledge passage. ' 'Specify, along with --debug, to compute passage retrieval metrics at train/test time.', ) modified_group.add_argument( '--gold-knowledge-title-key', type=str, default='title', help= 'key in the observation dict that indicates the gold knowledge passage title. ' 'Specify, along with --debug, to compute passage retrieval metrics at train/test time.', ) retriever_group = parser.add_argument_group('RAG Retriever Args') retriever_group.add_argument( '--rag-retriever-query', type=str, default='full_history', choices=['one_turn', 'full_history'], help= 'What to use as the query for retrieval. `one_turn` retrieves only on the last turn ' 'of dialogue; `full_history` retrieves based on the full dialogue history.', ) retriever_group.add_argument( '--rag-retriever-type', type=str, default=RetrieverType.DPR.value, choices=[r.value for r in RetrieverType], help='Which retriever to use', ) retriever_group.add_argument( '--retriever-debug-index', type=str, default=None, choices=SMALL_INDEX_TYPES, help='Load specified small index, for debugging.', ) retriever_group.add_argument('--n-docs', type=int, default=5, help='How many documents to retrieve') retriever_group.add_argument( '--min-doc-token-length', type=int, default=64, help='minimum amount of information to retain from document. ' 'Useful to define if encoder does not use a lot of BPE token context.', ) retriever_group.add_argument( '--max-doc-token-length', type=int, default=256, help='maximum amount of information to retain from document. ', ) retriever_group.add_argument( '--rag-query-truncate', type=int, default=512, help='Max token length of query for retrieval.', ) retriever_group.add_argument( '--print-docs', type='bool', default=False, help='Whether to print docs; usually useful during interactive mode.', ) dense_retriever_group = parser.add_argument_group( 'RAG Dense Passage Retriever Args') dense_retriever_group.add_argument( '--path-to-index', type=str, default=WIKIPEDIA_COMPRESSED_INDEX, help='path to FAISS Index.', ) dense_retriever_group.add_argument( '--path-to-dense-embeddings', type=str, default=None, help='path to dense embeddings directory used to build index. ' 'Default None will assume embeddings and index are in the same directory.', ) dense_retriever_group.add_argument('--dpr-model-file', type=str, default=DPR_ZOO_MODEL, help='path to DPR Model.') dense_retriever_group.add_argument( '--path-to-dpr-passages', type=str, default=WIKIPEDIA_ZOO_PASSAGES, help='Path to DPR passages, used to build index.', ) dense_retriever_group.add_argument( '--retriever-embedding-size', type=int, default=768, help='Embedding size of dense retriever', ) tfidf_retriever_group = parser.add_argument_group( 'RAG TFIDF Retriever Args') tfidf_retriever_group.add_argument( '--tfidf-max-doc-paragraphs', type=int, default=-1, help='If > 0, limit documents to this many paragraphs', ) tfidf_retriever_group.add_argument( '--tfidf-model-path', type=str, default=TFIDF_ZOO_MODEL, help='Optionally override TFIDF model.', ) dpr_poly_retriever_group = parser.add_argument_group( 'RAG DPR-POLY Retriever Args') dpr_poly_retriever_group.add_argument( '--dpr-num-docs', type=int, default=25, help='In two stage retrieval, how many DPR documents to retrieve', ) dpr_poly_retriever_group.add_argument( '--poly-score-initial-lambda', type=float, default=0.5, help= 'In two stage retrieval, how much weight to give to the poly scores. ' 'Note: Learned parameter. Specify initial value here', ) dpr_poly_retriever_group.add_argument( '--polyencoder-init-model', type=str, default='wikito', help= 'Which init model to initialize polyencoder with. Specify wikito or reddit to use ' 'models from the ParlAI zoo; otherwise, provide a path to a trained polyencoder', ) poly_faiss_group = parser.add_argument_group( 'RAG PolyFAISS retriever args') poly_faiss_group.add_argument( '--poly-faiss-model-file', type=str, default=None, help='path to poly-encoder for use in poly-faiss retrieval.', ) regret_group = parser.add_argument_group("RAG ReGReT args") regret_group.add_argument( '--regret', type='bool', default=False, help='Retrieve, Generate, Retrieve, Tune. ' 'Retrieve, generate, then retrieve again, and finally tune (refine).', ) regret_group.add_argument( '--regret-intermediate-maxlen', type=int, default=32, help='Maximum length in intermediate regret generation', ) regret_group.add_argument( '--regret-model-file', type=str, default=None, help='Path to model for initial round of retrieval. ', ) indexer_group = parser.add_argument_group("RAG Indexer Args") indexer_group.add_argument( '--indexer-type', type=str, default='compressed', choices=['exact', 'compressed'], help= 'Granularity of RAG Indexer. Choose compressed to save on RAM costs, at the ' 'possible expense of accuracy.', ) indexer_group.add_argument( '--indexer-buffer-size', type=int, default=65536, help='buffer size for adding vectors to the index', ) indexer_group.add_argument( '--compressed-indexer-factory', type=str, default='IVF4096_HNSW128,PQ128', help= 'If specified, builds compressed indexer from a FAISS Index Factory. ' 'see https://github.com/facebookresearch/faiss/wiki/The-index-factory for details', ) indexer_group.add_argument( '--compressed-indexer-gpu-train', type='bool', default=False, hidden=True, help='Set False to not train compressed indexer on the gpu.', ) indexer_group.add_argument( '--compressed-indexer-nprobe', type=int, default=64, help='How many centroids to search in compressed indexer. See ' 'https://github.com/facebookresearch/faiss/wiki/Faiss-indexes#cell-probe-methods-indexivf-indexes ' 'for details', ) # See https://github.com/facebookresearch/faiss/wiki/Faiss-indexes#indexhnsw-variants for details indexer_group.add_argument( '--hnsw-indexer-store-n', type=int, default=128, hidden=True, help= 'Granularity of DenseHNSWIndexer. Higher == more accurate, more RAM', ) indexer_group.add_argument( '--hnsw-ef-search', type=int, default=128, hidden=True, help='Depth of exploration of search for HNSW.', ) indexer_group.add_argument( '--hnsw-ef-construction', type=int, default=200, hidden=True, help='Depth of exploration at add time for HNSW', ) return parser
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('-min_t', '--min_turns', default=5, type=int, help='minimum number of turns') argparser.add_argument('-mt', '--max_turns', default=10, type=int, help='maximal number of chat turns') argparser.add_argument('-mx_rsp_time', '--max_resp_time', default=150, type=int, help='time limit for entering a dialog message') argparser.add_argument('-mx_psn_time', '--max_persona_time', type=int, default=300, help='time limit for turker' 'entering the persona') argparser.add_argument('--ag_shutdown_time', default=120, type=int, help='time limit for entering a dialog message') argparser.add_argument('--persona-type', default='both', type=str, choices=['both', 'self', 'other'], help='Which personas to load from personachat') argparser.add_argument('--revised', default=True, type='bool', help='Whether to use revised personas') argparser.add_argument('-rt', '--range_turn', default='5,7', help='sample range of number of turns') opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) if 'data_path' not in opt: opt['data_path'] = os.getcwd() + '/data/' + opt['task'] opt.update(task_config) mturk_agent_ids = ['PERSON_1'] mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) persona_generator = PersonasGenerator(opt) mturk_manager.setup_server() # SET MODEL AGENT OPT HERE model_agent_opt = {} try: mturk_manager.start_new_run() mturk_manager.create_hits() if not opt['is_sandbox']: blocked_worker_list = [] for w in blocked_worker_list: mturk_manager.block_worker( w, 'We found that you have unexpected behaviors in our previous HITs. For more questions please email us.' ) def run_onboard(worker): worker.persona_generator = persona_generator world = PersonaProfileWorld(opt, worker) world.parley() world.shutdown() mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[0] conv_idx = mturk_manager.conversation_index world = PersonaChatEvalWorld( opt=opt, agents=[agents], range_turn=[int(s) for s in opt['range_turn'].split(',')], max_turn=opt['max_turns'], max_resp_time=opt['max_resp_time'], model_agent_opt=model_agent_opt, world_tag='conversation t_{}'.format(conv_idx)) world.reset_random() while not world.episode_done(): world.parley() world.save_data() world.shutdown() world.review_work() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def main(): argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() opt = argparser.parse_args() opt['task'] = os.path.basename(os.path.dirname(os.path.abspath(__file__))) opt.update(task_config) mturk_agent_1_id = 'mturk_agent_1' mturk_agent_2_id = 'mturk_agent_2' human_agent_1_id = 'human_1' mturk_agent_ids = [mturk_agent_1_id, mturk_agent_2_id] mturk_manager = MTurkManager( opt=opt, mturk_agent_ids = mturk_agent_ids ) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() def run_onboard(worker): world = MTurkMultiAgentDialogOnboardWorld( opt=opt, mturk_agent=worker ) while not world.episode_done(): world.parley() world.shutdown() # You can set onboard_function to None to skip onboarding mturk_manager.set_onboard_function(onboard_function=run_onboard) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): # Create mturk agents mturk_agent_1 = workers[0] mturk_agent_2 = workers[1] # Create the local human agents human_agent_1 = LocalHumanAgent(opt=None) human_agent_1.id = human_agent_1_id world = MTurkMultiAgentDialogWorld( opt=opt, agents=[human_agent_1, mturk_agent_1, mturk_agent_2] ) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task( eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation ) except: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setup_args(parser=None): if parser is None: parser = ParlaiParser(True, True, 'compute statistics from model predictions') DictionaryAgent.add_cmdline_args(parser) # Get command line arguments parser.add_argument('-ne', '--num-examples', type=int, default=-1) parser.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) parser.add_argument( '-ed', '--external-dict', type=str, default=None, help='External dictionary for stat computation', ) parser.add_argument( '-fb', '--freq-bins', type=str, default='0,100,1000,10000', help='Bins boundaries for rare words stat', ) parser.add_argument( '-dup', '--dump-predictions-path', type=str, default=None, help='Dump predictions into file', ) parser.add_argument( '-cun', '--compute-unique', type='bool', default=True, help='Compute %% of unique responses from the model', ) parser.set_defaults(datatype='valid') TensorboardLogger.add_cmdline_args(parser) return parser
def main(): # Get command line arguments parser = ParlaiParser(True, True) train = parser.add_argument_group('Training Loop Arguments') train.add_argument('-et', '--evaltask', help=('task to use for valid/test (defaults to the ' 'one used for training if not set)')) train.add_argument('-d', '--display-examples', type='bool', default=False) train.add_argument('-e', '--num-epochs', type=float, default=-1) train.add_argument('-ttim', '--max-train-time', type=float, default=-1) train.add_argument('-ltim', '--log-every-n-secs', type=float, default=2) train.add_argument('-vtim', '--validation-every-n-secs', type=float, default=-1) train.add_argument('-vme', '--validation-max-exs', type=int, default=-1, help='max examples to use during validation (default ' '-1 uses all)') train.add_argument('-vp', '--validation-patience', type=int, default=5, help=('number of iterations of validation where result' ' does not improve before we stop training')) train.add_argument('-vmt', '--validation-metric', default='accuracy', help='key into report table for selecting best ' 'validation') train.add_argument('-dbf', '--dict-build-first', type='bool', default=True, help='build dictionary first before training agent') opt = parser.parse_args() # Possibly build a dictionary (not all models do this). if opt['dict_build_first'] and 'dict_file' in opt: if opt['dict_file'] is None and opt.get('model_file'): opt['dict_file'] = opt['model_file'] + '.dict' print("[ building dictionary first... ]") build_dict.build_dict(opt) # Create model and assign it to the specified task agent = create_agent(opt) world = create_task(opt, agent) train_time = Timer() validate_time = Timer() log_time = Timer() print('[ training... ]') parleys = 0 total_exs = 0 max_exs = opt['num_epochs'] * len(world) max_parleys = math.ceil(max_exs / opt['batchsize']) best_valid = 0 impatience = 0 saved = False valid_world = None while True: world.parley() parleys += 1 if opt['num_epochs'] > 0 and parleys >= max_parleys: print('[ num_epochs completed: {} ]'.format(opt['num_epochs'])) break if opt['max_train_time'] > 0 and train_time.time() > opt['max_train_time']: print('[ max_train_time elapsed: {} ]'.format(train_time.time())) break if opt['log_every_n_secs'] > 0 and log_time.time() > opt['log_every_n_secs']: if opt['display_examples']: print(world.display() + '\n~~') logs = [] # time elapsed logs.append('time:{}s'.format(math.floor(train_time.time()))) logs.append('parleys:{}'.format(parleys)) # get report and update total examples seen so far if hasattr(agent, 'report'): train_report = agent.report() agent.reset_metrics() else: train_report = world.report() world.reset_metrics() if hasattr(train_report, 'get') and train_report.get('total'): total_exs += train_report['total'] logs.append('total_exs:{}'.format(total_exs)) # check if we should log amount of time remaining time_left = None if opt['num_epochs'] > 0: exs_per_sec = train_time.time() / total_exs time_left = (max_exs - total_exs) * exs_per_sec if opt['max_train_time'] > 0: other_time_left = opt['max_train_time'] - train_time.time() if time_left is not None: time_left = min(time_left, other_time_left) else: time_left = other_time_left if time_left is not None: logs.append('time_left:{}s'.format(math.floor(time_left))) # join log string and add full metrics report to end of log log = '[ {} ] {}'.format(' '.join(logs), train_report) print(log) log_time.reset() if (opt['validation_every_n_secs'] > 0 and validate_time.time() > opt['validation_every_n_secs']): valid_report, valid_world = run_eval( agent, opt, 'valid', opt['validation_max_exs'], valid_world=valid_world) if valid_report[opt['validation_metric']] > best_valid: best_valid = valid_report[opt['validation_metric']] impatience = 0 print('[ new best {}: {} ]'.format( opt['validation_metric'], best_valid)) world.save_agents() saved = True if opt['validation_metric'] == 'accuracy' and best_valid == 1: print('[ task solved! stopping. ]') break else: impatience += 1 print('[ did not beat best {}: {} impatience: {} ]'.format( opt['validation_metric'], round(best_valid, 4), impatience)) validate_time.reset() if opt['validation_patience'] > 0 and impatience >= opt['validation_patience']: print('[ ran out of patience! stopping training. ]') break world.shutdown() if not saved: world.save_agents() else: # reload best validation model agent = create_agent(opt) run_eval(agent, opt, 'valid', write_log=True) run_eval(agent, opt, 'test', write_log=True)
def main(): """This task consists of one agent, model or MTurk worker, talking to an MTurk worker to negotiate a deal. """ argparser = ParlaiParser(False, False) argparser.add_parlai_data_path() argparser.add_mturk_args() argparser.add_argument('--two_mturk_agents', dest='two_mturk_agents', action='store_true', help='data collection mode ' 'with converations between two MTurk agents') opt = argparser.parse_args() opt['task'] = 'dealnodeal' opt['datatype'] = 'valid' opt.update(task_config) local_agent_1_id = 'local_1' mturk_agent_ids = ['mturk_agent_1'] if opt['two_mturk_agents']: mturk_agent_ids.append('mturk_agent_2') mturk_manager = MTurkManager(opt=opt, mturk_agent_ids=mturk_agent_ids) mturk_manager.setup_server() try: mturk_manager.start_new_run() mturk_manager.create_hits() mturk_manager.set_onboard_function(onboard_function=None) mturk_manager.ready_to_accept_workers() def check_worker_eligibility(worker): return True def assign_worker_roles(workers): for index, worker in enumerate(workers): worker.id = mturk_agent_ids[index % len(mturk_agent_ids)] def run_conversation(mturk_manager, opt, workers): agents = workers[:] # Create a local agent if not opt['two_mturk_agents']: if 'model' in opt: local_agent = create_agent(opt) else: local_agent = LocalHumanAgent(opt=None) local_agent.id = local_agent_1_id agents.append(local_agent) opt["batchindex"] = mturk_manager.started_conversations world = MTurkDealNoDealDialogWorld(opt=opt, agents=agents) while not world.episode_done(): world.parley() world.shutdown() mturk_manager.start_task(eligibility_function=check_worker_eligibility, assign_role_function=assign_worker_roles, task_function=run_conversation) except BaseException: raise finally: mturk_manager.expire_all_unassigned_hits() mturk_manager.shutdown()
def setup_args(cls) -> ParlaiParser: parser = ParlaiParser() parser.add_argument( '--input', type=str, nargs='+', help= 'The input fairseq model path. Specify multiple to imply a join is necessary', ) parser.add_argument('--output', type=str, help='The output ParlAI model path') parser.add_argument( '--vocab', type=str, help='The hugging face vocab file path, if applicable') parser.add_argument( '--merge', type=str, help='The hugging face merge file path, if applicable') parser.add_argument( '--add-prefix-space', type='bool', default=True, help='Add prefix space for hugging face bpe', ) parser.add_argument( '--activation', type=str, help='Activation function', choices=['relu', 'gelu'], default='gelu', ) parser.add_argument( '--tokenizer', type=str, help='Dict tokenizer', choices=['bytelevelbpe', 'gpt2'], default='bytelevelbpe', ) parser.add_argument('--delimiter', type=str, default=' ', help='Delimiter') parser.add_argument( '--retain-bos-emb', type='bool', default=False, help='Retain the BOS embedding.', ) parser.add_argument( '--model', type=str, default='transformer/generator', help='Which ParlAI agent to use.', ) parser.add_argument('--fp16', type='bool', default=False, help='Whether to initialize with fp16') parser.add_argument( '--history-add-global-end-token', type='nonestr', default='end', hidden=True, choices=[None, 'end'], help='Add special token to the end of history encoding.', ) return parser
def add_cmdline_args( cls, parser: ParlaiParser, partial_opt: Optional[Opt] = None ) -> ParlaiParser: """ Add command-line arguments specifically for this agent. """ TransformerRankerAgent.add_cmdline_args(parser, partial_opt=partial_opt) agent = parser.add_argument_group('Polyencoder Arguments') agent.add_argument( '--polyencoder-type', type=str, default='codes', choices=['codes', 'n_first'], help='Type of polyencoder, either we compute' 'vectors using codes + attention, or we ' 'simply take the first N vectors.', recommended='codes', ) agent.add_argument( '--poly-n-codes', type=int, default=64, help='number of vectors used to represent the context' 'in the case of n_first, those are the number' 'of vectors that are considered.', recommended=64, ) agent.add_argument( '--poly-attention-type', type=str, default='basic', choices=['basic', 'sqrt', 'multihead'], help='Type of the top aggregation layer of the poly-' 'encoder (where the candidate representation is' 'the key)', recommended='basic', ) agent.add_argument( '--poly-attention-num-heads', type=int, default=4, help='In case poly-attention-type is multihead, ' 'specify the number of heads', ) # Those arguments are here in case where polyencoder type is 'code' agent.add_argument( '--codes-attention-type', type=str, default='basic', choices=['basic', 'sqrt', 'multihead'], help='Type ', recommended='basic', ) agent.add_argument( '--codes-attention-num-heads', type=int, default=4, help='In case codes-attention-type is multihead, ' 'specify the number of heads', ) return agent