def interactive(opt: 'zoo:tutorial_transformer_generator/model'): if isinstance(opt, ParlaiParser): logging.error('interactive should be passed opt not Parser') opt = opt.parse_args() # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=False) agent.opt.log() human_agent = LocalHumanAgent(opt) # set up world logger world_logger = WorldLogger(opt) if opt.get('outfile') else None world = create_task(opt, [human_agent, agent]) # Show some example dialogs: while not world.epoch_done(): world.parley() print("done by me!") print(world.display()) if world.epoch_done() or world.get_total_parleys() <= 0: # chat was reset with [DONE], [EXIT] or EOF if world_logger is not None: world_logger.reset() continue if world_logger is not None: world_logger.log(world) if opt.get('display_examples'): print("---") print(world.display()) if world_logger is not None: # dump world acts to file world_logger.write(opt['outfile'], world, file_format=opt['save_format'])
def interactive(opt, print_parser=None): if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): logging.error('interactive should be passed opt not Parser') opt = opt.parse_args() # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() human_agent = LocalHumanAgent(opt) # set up world logger world_logger = WorldLogger(opt) if opt['save_world_logs'] else None world = create_task(opt, [human_agent, agent]) # Show some example dialogs: while not world.epoch_done(): world.parley() if world_logger is not None: world_logger.log(world) if opt.get('display_examples'): print("---") print(world.display()) if world_logger is not None: # dump world acts to file world_logger.reset() # add final acts to logs base_outfile = opt['report_filename'].split('.')[0] outfile = f'{base_outfile}_{opt["task"]}_replies.jsonl' world_logger.write(outfile, world, file_format=opt['save_format'])
def self_chat(opt, print_parser=None): if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print('[ Deprecated Warning: self_chat should be passed opt not Parser ]') opt = opt.parse_args() random.seed(opt['seed']) # Create models agent1 = create_agent(opt, requireModelExists=True) agent2 = agent1.clone() if hasattr(agent2, 'id'): agent2.id = agent2.id + "2" # Check for `selfchat` in the task name if 'selfchat' not in opt['task']: warn_once( 'You are using self chat with task {}. '.format(opt['task']) + 'If your task has an existing self chat world, then run with ' '-t {}:selfchat'.format(opt['task']) ) world = create_task(opt, [agent1, agent2]) if print_parser: # Show arguments after loading model print_parser.opt = agent1.opt print_parser.print_args() # set up logging log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() logger = WorldLogger(opt) # Run some self chats. max_cnt = opt['num_examples'] cnt = 0 while cnt < max_cnt: cnt += opt.get('batchsize', 1) world.parley() logger.log(world) if opt.get('display_examples'): print(world.display()) if log_time.time() > log_every_n_secs: text = log_time.log(cnt, max_cnt) print(text) if opt.get('display_examples'): print('-- end of episode --') logger.reset_world() # flush last episode logger.write(opt['outfile'], opt['format']) return logger.get_logs()
def _eval_single_world(opt, agent, task): print('[ Evaluating task {} using datatype {}. ] '.format( task, opt.get('datatype', 'N/A'))) # set up world logger world_logger = WorldLogger(opt) if opt['save_world_logs'] else None task_opt = opt.copy() # copy opt since we're editing the task # task_opt['task'] = task world = create_task(task_opt, agent) # create worlds for tasks # set up logging log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # max number of examples to evaluate max_cnt = opt['num_examples'] if opt['num_examples'] > 0 else float('inf') cnt = 0 while not world.epoch_done() and cnt < max_cnt: cnt += opt.get('batchsize', 1) world.parley() if world_logger is not None: world_logger.log(world) if opt['display_examples']: # display examples print(world.display() + '\n~~') # for a in world.acts: # print (a) # print (world.get_acts()) # print (world.acts) if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report.get('exs', 0), world.num_examples(), report) print(text) report = world.report() print("Printing Report") print(report) world.reset() if world_logger is not None: # dump world acts to file world_logger.reset() # add final acts to logs base_outfile = opt['report_filename'].split('.')[0] print("filename: ", base_outfile) outfile = base_outfile + f'_{task}_replies.jsonl' # world_logger.write_jsonl_format(outfile) world_logger.write_parlai_format(outfile) return report
def _eval_single_world(opt, agent, task): logging.info( f'Evaluating task {task} using datatype {opt.get("datatype")}.') # set up world logger world_logger = WorldLogger(opt) if opt['world_logs'] else None task_opt = opt.copy() # copy opt since we're editing the task task_opt['task'] = task world = create_task(task_opt, agent) # create worlds for tasks # set up logging log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # max number of examples to evaluate max_cnt = opt['num_examples'] if opt['num_examples'] > 0 else float('inf') cnt = 0 total_cnt = world.num_examples() if is_distributed(): logging.warning('Progress bar is approximate in distributed mode.') while not world.epoch_done() and cnt < max_cnt: cnt += opt.get('batchsize', 1) world.parley() if world_logger is not None: world_logger.log(world) if opt['display_examples']: # display examples print(world.display() + '\n~~') if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report.get('exs', 0), min(max_cnt, total_cnt), report) logging.info(text) if world_logger is not None: # dump world acts to file world_logger.reset() # add final acts to logs if is_distributed(): rank = get_rank() base_outfile, extension = os.path.splitext(opt['world_logs']) outfile = base_outfile + f'_{rank}' + extension else: outfile = opt['world_logs'] world_logger.write(outfile, world, file_format=opt['save_format']) report = aggregate_unnamed_reports(all_gather_list(world.report())) world.reset() return report
def interactive(opt, print_parser=None): if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print( '[ Deprecated Warning: interactive should be passed opt not Parser ]' ) opt = opt.parse_args() # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) human_agent = LocalHumanAgent(opt) world = create_task(opt, [human_agent, agent]) # set up world logger world_logger = WorldLogger(opt) if opt['save_world_logs'] else None if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() # Show some example dialogs: while True: try: world.parley() if world_logger is not None: world_logger.log(world) if opt.get('display_examples'): print("---") print(world.display()) if world.epoch_done(): print("EPOCH DONE") break except KeyboardInterrupt: if world_logger is not None: print(f"\nWriting out world log.") # Save report report = world.report() world.reset() # dump world acts to file world_logger.reset() # add final acts to logs base_outfile = opt['report_filename'].split('.')[0] outfile = base_outfile + f'_interactive_replies.json' world_logger.write(outfile, file_format=opt['world_logs_format']) quit()
def self_chat(opt, print_parser=None): if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print( '[ Deprecated Warning: self_chat should be passed opt not Parser ]' ) opt = opt.parse_args() random.seed(opt['seed']) # Create models agent1 = create_agent(opt, requireModelExists=True) agent2 = agent1.clone() if hasattr(agent2, 'id'): agent2.id = agent2.id + "2" world = create_task(opt, [agent1, agent2]) if print_parser: # Show arguments after loading model print_parser.opt = agent1.opt print_parser.print_args() # set up logging log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() logger = WorldLogger(opt) # Run some self chats. max_cnt = opt['num_examples'] cnt = 0 while cnt < max_cnt: cnt += opt.get('batchsize', 1) world.parley() logger.log(world) if opt.get('display_examples'): print("---") print(world.display()) if log_time.time() > log_every_n_secs: text = log_time.log(cnt, max_cnt) print(text) logger.write(opt['outfile'], opt['format'])
def interactive(opt, print_parser=None): if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print( '[ Deprecated Warning: interactive should be passed opt not Parser ]' ) opt = opt.parse_args() # Create model and assign it to the specified task agent = create_agent(opt, requireModelExists=True) if print_parser: # Show arguments after loading model print_parser.opt = agent.opt print_parser.print_args() # human_agent = LocalHumanAgent(opt) human_agent = AutoQueryAgent(opt, query_txt=opt['report_filename'] + ".txt") world_logger = WorldLogger(opt) world = create_task(opt, [human_agent, agent]) # Show some example dialogs: #while not world.epoch_done(): with open(opt['report_filename'] + ".txt", "r") as f: length = len(f.read().split("\n")) - 1 #for _ in range(5) : for _ in range(length): world.parley() if world_logger is not None: world_logger.log(world) if opt.get('display_examples'): print("---") print(world.display()) if world_logger is not None: world_logger.reset() base_outfile = opt['report_filename'].split('.')[0] outfile = f'{base_outfile}_{opt["task"]}_replies.jsonl' # world_logger.write(outfile, world, file_format=opt['save_format']) # world_logger.write(outfile, world, file_format='conversations') world_logger.write(outfile, world, file_format='text')
def _run_single_eval(self, opt, valid_world, max_exs, datatype, is_multitask, task): # run evaluation on a single world valid_world.reset() world_logger = None task_opt = opt.copy() # set up world logger for the "test" fold if opt['world_logs'] and datatype == 'test': task_opt['world_logs'] = get_task_world_logs( task, opt['world_logs'], is_multitask ) world_logger = WorldLogger(task_opt) cnt = 0 max_cnt = max_exs if max_exs > 0 else float('inf') while not valid_world.epoch_done() and cnt < max_cnt: valid_world.parley() if world_logger is not None: world_logger.log(valid_world) if cnt == 0 and opt['display_examples']: print(valid_world.display() + '\n~~') print(valid_world.report()) cnt = valid_world.report().get('exs') or 0 if world_logger is not None: # dump world acts to file world_logger.reset() # add final acts to logs if is_distributed(): rank = get_rank() base_outfile, extension = os.path.splitext(task_opt['world_logs']) outfile = base_outfile + f'_{rank}' + extension else: outfile = task_opt['world_logs'] world_logger.write(outfile, valid_world, file_format=opt['save_format']) valid_report = valid_world.report() if opt.get('validation_share_agent', False): valid_world.reset() # make sure world doesn't remember valid data return valid_report
def self_chat(opt, print_parser=None): client = MongoClient( opt['mongo_host'], opt['mongo_port'], username=opt['user_name'], password=opt['password'], #authSource=DATABASE_NAME ) db = client[DATABASE_NAME] collection = db[opt['collection_name']] if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print( '[ Deprecated Warning: self_chat should be passed opt not Parser ]' ) opt = opt.parse_args() random.seed(opt['seed']) # Create models opt['model_file'] = opt['model_file1'] if opt['model_file'] == 'tmp/convai2/lost_in_conversation/last_checkpoint': parser.set_defaults( model= 'projects.convai2.baselines.transformer_chatbot.agent:TransformerAgent', sample=False, wild_mode=False, replace_repeat=False, replace_ngram=False, detokenize=False, emoji_prob=0, add_questions=0, clean_emoji=False, check_grammar=False, correct_generative=False, split_into_sentences=False, max_seq_len=256, beam_size=3, annealing_topk=None, annealing=0.6, length_penalty=0.6) opt = parser.parse_args() agent1 = create_agent(opt, requireModelExists=True) elif opt['model_file'] == 'tmp/convai2/huggingface/model': parser.set_params( model= 'projects.convai2.baselines.huggingface.convai_evaluation:TransformerAgent' ) opt = parser.parse_args() agent1 = create_agent(opt, requireModelExists=True) else: agent1 = create_agent(opt, requireModelExists=True) opt['model_file'] = opt['model_file2'] if opt['model_file'] == 'tmp/convai2/lost_in_conversation/last_checkpoint': parser.set_defaults( model= 'projects.convai2.baselines.transformer_chatbot.agent:TransformerAgent', sample=False, wild_mode=False, replace_repeat=False, replace_ngram=False, detokenize=False, emoji_prob=0, add_questions=0, clean_emoji=False, check_grammar=False, correct_generative=False, split_into_sentences=False, max_seq_len=256, beam_size=3, annealing_topk=None, annealing=0.6, length_penalty=0.6) opt = parser.parse_args() agent2 = create_agent(opt, requireModelExists=True) elif opt['model_file'] == 'tmp/convai2/huggingface/model': parser.set_params( model= 'projects.convai2.baselines.huggingface.convai_evaluation:TransformerAgent' ) opt = parser.parse_args() agent2 = create_agent(opt, requireModelExists=True) else: agent2 = create_agent(opt, requireModelExists=True) if hasattr(agent2, 'id'): agent2.id = agent2.id + "2" opt['random_order'] = False world = create_task(opt, [agent1, agent2]) if print_parser: # Show arguments after loading model print_parser.opt = agent1.opt print_parser.print_args() # set up logging log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() logger = WorldLogger(opt) # Run some self chats. max_dial_cnt = opt['num_dialogues'] #dial_cnt = 0 world.max_turn_cnt = world.sample_episode_length() for dial_cnt in tqdm(range(max_dial_cnt)): #while dial_cnt < max_dial_cnt: #world.max_turn_cnt = world.sample_episode_length() #world.turn_cnt = 0 #print('Dialogue Number: {}, Max Turn: {}\n'.format(dial_cnt, world.max_turn_cnt)) while True: world.parley() logger.log(world) if world.episode_done(): break #dial_cnt += 1 if dial_cnt % 20 == 0: store_logger(opt, collection, logger) logger = WorldLogger(opt) store_logger(opt, collection, logger)
def _eval_single_world(opt, agent, task): logging.info( f'Evaluating task {task} using datatype {opt.get("datatype")}.') # set up world logger task_opt = opt.copy() # copy opt since we're editing the task task_opt['task'] = task # add task suffix in case of multi-tasking if opt['world_logs']: task_opt['world_logs'] = get_task_world_logs( task, task_opt['world_logs'], is_multitask=len(opt['task'].split(',')) > 1) world_logger = WorldLogger(task_opt) if task_opt['world_logs'] else None world = create_task(task_opt, agent) # create worlds for tasks # set up logging log_every_n_secs = opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() # max number of examples to evaluate max_cnt = opt['num_examples'] if opt['num_examples'] > 0 else float('inf') cnt = 0 total_cnt = world.num_examples() if is_distributed(): logging.warning('Progress bar is approximate in distributed mode.') while not world.epoch_done() and cnt < max_cnt: cnt += opt.get('batchsize', 1) world.parley() if world_logger is not None: world_logger.log(world) if opt['display_examples']: # display examples print(world.display() + '\n~~') if log_time.time() > log_every_n_secs: report = world.report() text, report = log_time.log(report.get('exs', 0), min(max_cnt, total_cnt), report) logging.info(text) if world_logger is not None: # dump world acts to file world_logger.reset() # add final acts to logs if is_distributed(): rank = get_rank() base_outfile, extension = os.path.splitext(task_opt['world_logs']) outfile = base_outfile + f'_{rank}' + extension else: outfile = task_opt['world_logs'] world_logger.write(outfile, world, file_format=opt['save_format']) report = aggregate_unnamed_reports(all_gather_list(world.report())) if isinstance(world.agents, list) and len(world.agents) > 1: classifier_agent = world.agents[CLASSIFIER_AGENT] if hasattr(classifier_agent, 'calc_auc') and classifier_agent.calc_auc: for class_indices, curr_auc in zip( classifier_agent.auc_class_indices, classifier_agent.aucs): report[ f'AUC_{classifier_agent.class_list[class_indices]}'] = curr_auc classifier_agent.reset_auc() # for safety measures agent.reset_auc() world.reset() return report
def display_data(opt): client = MongoClient( opt['mongo_host'], opt['mongo_port'], username=opt['user_name'], password=opt['password'], # authSource=DATABASE_NAME ) db = client[DATABASE_NAME] collection = db[opt['collection_name']] # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) logger = WorldLogger(opt) max_dial_count = opt['num_examples'] dial_count = 0 try: # print dataset size if available print('[ loaded {} episodes with a total of {} examples ]'.format( world.num_episodes(), world.num_examples())) except Exception: pass # Show some example dialogs. for _ in tqdm(range(max_dial_count)): while True: world.parley() logger.log(world) if world.episode_done() or world.acts[0].get( 'episode_done', False): break if world.epoch_done(): break dial_count += 1 domain = opt['task'].split(':')[0] if domain == 'dailydialog': convo_list = [ convo[1:] for convo in logger._logs if not convo[0] == '__SILENCE__' if len(convo) > 4 ] elif domain == 'wizard_of_wikipedia': convo_list = logger._logs elif domain == 'personachat': convo_list = [convo[1:] for convo in logger._logs] elif domain == 'empathetic_dialogues': convo_list = [convo[1:] for convo in logger._logs if len(convo) > 2] else: convo_list = logger._logs random.seed(opt['seed']) sampled_convos = random.sample(convo_list, k=min(len(convo_list), opt['num_stored'])) for did, convo in enumerate(sampled_convos): convo_data = {} convo_data['domain_name'] = opt['task'].split(':')[0] convo_data['system_name0'] = "{}/human".format( convo_data['domain_name']) convo_data['system_name1'] = "{}/human".format( convo_data['domain_name']) convo_data['system_type0'] = 'human' convo_data['system_type1'] = 'human' convo_data['is_human0'] = True convo_data['is_human1'] = True turn_list = [] for eid, exchange in enumerate(convo): turn0_db, turn1_db = {}, {} turn0 = exchange[0] turn1 = exchange[1] turn0_db['exchange_nr'] = eid turn1_db['exchange_nr'] = eid turn0_db['id'] = 'human' turn1_db['id'] = 'human' turn0_db['text'] = ' '.join( [tok.text for tok in nlp(turn0['text'].lower())]) turn1_db['text'] = ' '.join( [tok.text for tok in nlp(turn1['text'].lower())]) turn_list.append(turn0_db) turn_list.append(turn1_db) if domain == 'wizard_of_wikipedia': turn_list = turn_list[1:] #remove context word convo_data['convo'] = turn_list collection.insert_one(convo_data) print('Dialogue Number: {}\n'.format(did)) for tid, turn in enumerate(convo_data['convo']): print(tid, turn['text']) print('\n\n')
def self_chat(opt, print_parser=None): client = MongoClient( opt['mongo_host'], opt['mongo_port'], username=opt['user_name'], password=opt['password'], #authSource=DATABASE_NAME ) db = client[DATABASE_NAME] collection = db[COLLECTION_NAME] if print_parser is not None: if print_parser is True and isinstance(opt, ParlaiParser): print_parser = opt elif print_parser is False: print_parser = None if isinstance(opt, ParlaiParser): print( '[ Deprecated Warning: self_chat should be passed opt not Parser ]' ) opt = opt.parse_args() # Create agents agent1 = create_agent(opt, requireModelExists=True) agent2 = agent1.clone() # Set IDs model_id = agent1.id agent1.id = model_id + "_1" agent2.id = model_id + "_2" world = create_task(opt, user_agents=[agent1, agent2]) # Set up world logging logger = WorldLogger(opt) log_time = TimeLogger() # Run some self chats. max_dial_cnt = opt['num_dialogues'] dial_cnt = 0 while dial_cnt < max_dial_cnt: world.max_turn_cnt = world.sample_episode_length() world.turn_cnt = 0 print('Dialogue Number: {}, Max Turn: {}\n'.format( dial_cnt, world.max_turn_cnt)) while True: world.parley() logger.log(world) if opt.get('display_examples'): print(world.display()) if world.episode_done(): break print('\n\n') dial_cnt += 1 if opt.get('display_examples'): print('-- end of episode --') logger.write(opt['outfile'], opt['format']) for convo in logger._logs: convo_data = {} convo_data['system_name0'] = opt['model_file'] convo_data['system_name1'] = opt['model_file'] convo_data['system_type0'] = opt['model_file'].split('/')[2] convo_data['system_type1'] = opt['model_file'].split('/')[2] convo_data['is_human0'] = False convo_data['is_human1'] = False convo_data['domain_name'] = opt['task'].split(':')[0] turn_list = [] for eid, exchange in enumerate(convo): turn0 = exchange[0] turn1 = exchange[1] turn0['exchange_nr'] = eid turn1['exchange_nr'] = eid if type(turn0) == Message: turn0.force_set('episode_done', bool(turn0['episode_done'])) else: turn0['episode_done'] = bool(turn0['episode_done']) if type(turn0) == Message: turn1.force_set('episode_done', bool(turn1['episode_done'])) else: turn1['episode_done'] = bool(turn1['episode_done']) turn_list.append(turn0) turn_list.append(turn1) convo_data['convo'] = cap_context(turn_list, convo_data['domain_name']) collection.insert_one(convo_data) print(len(convo_data['convo']))