def write_parlai_format(self, outfile): logging.info(f'Saving log to {outfile} in ParlAI format') with PathManager.open(outfile, 'w') as fw: for episode in tqdm(self._logs): ep = self.convert_to_labeled_data(episode) for act in ep: txt = msg_to_str(act) fw.write(txt + '\n') fw.write('\n')
def write_parlai_format(self, outfile): print('[ Saving log to {} in ParlAI format ]'.format(outfile)) with open(outfile, 'w') as fw: for episode in tqdm(self._logs): ep = self.convert_to_labeled_data(episode) for act in ep: txt = msg_to_str(act) fw.write(txt + '\n') fw.write('\n')
def write_parlai_format(self, outfile): print('[ saving log to {} ]'.format(outfile)) fw = open(outfile, 'w') for episode in self._logs: ep = self.convert_to_labeled_data(episode) for a in ep: txt = msg_to_str(a) fw.write(txt + '\n') fw.write('\n') fw.close()
def dump_data(opt): # create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) opt.log() ignorefields = opt.get('ignore_fields', '') if opt['outfile'] is None: outfile = tempfile.mkstemp(prefix='{}_{}_'.format( opt['task'], opt['datatype']), suffix='.txt')[1] else: outfile = opt['outfile'] if opt['num_examples'] == -1: num_examples = world.num_examples() else: num_examples = opt['num_examples'] log_timer = TimeLogger() logging.debug('starting to convert...') logging.info(f'saving output to {outfile}') fw = open(outfile, 'w') for _ in range(num_examples): world.parley() acts = world.get_acts() value = acts[0].get('labels', acts[0].pop('eval_labels', None)) acts[0].force_set('labels', value) txt = msg_to_str(acts[0], ignore_fields=ignorefields) fw.write(txt + '\n') if acts[0].get('episode_done', False): fw.write('\n') if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) logging.info(text) if world.epoch_done(): logging.info('epoch done') break fw.close()
def write_dialog(opt, fw, d, label_type, split): l = len(d['speech']) msgs = [] text = '_task_' + label_type + '\n' if opt['light_use_setting']: text += ('_setting_name ' + d['setting']['name'] + ", " + d['setting']['category'] + '\n') text += '_setting_desc ' + d['setting']['description'] + '\n' if opt['light_use_person_names']: text += '_partner_name ' + d['partner_agent']['name'] + '\n' text += '_self_name ' + d['self_agent']['name'] + '\n' if use_feat(opt, 'light_use_persona', 'self'): text += '_self_persona ' + d['self_agent']['persona'] + '\n' if use_feat(opt, 'light_use_persona', 'other'): text += '_other_persona ' + d['partner_agent']['persona'] + '\n' if opt['light_use_objects']: for o, od in d['all_descriptions'].items(): text += '_object_desc ' + o + " : " + od + '\n' if False: for o in d['room_objects'][0]: text += '_object_in_room ' + o + '\n' for o in d['carrying'][0]: text += '_object_carrying ' + o + '\n' for o in d['wearing'][0]: text += '_object_wearing ' + o + '\n' for o in d['wielding'][0]: text += '_object_wielding ' + o + '\n' for i in range(0, l, 2): if i < l - 1: if (use_feat(opt, 'light_use_speech', 'partner') and d['speech'][i] is not None): if opt['light_use_speech_prefix']: text += '_partner_say ' text += str(d['speech'][i]) + '\n' if (use_feat(opt, 'light_use_action', 'partner') and d['action'][i] is not None): text += '_partner_act ' + str(d['action'][i]) + '\n' if (use_feat(opt, 'light_use_emote', 'partner') and d['emote'][i] is not None): text += '_partner_emote ' + str(d['emote'][i]) + '\n' if opt.get('light_use_repeat') == 'self_last': if i > 0: text = str(d['speech'][i - 1]) else: text = 'nothing' if opt.get('light_use_repeat') == 'partner_last': text = str(d['speech'][i]) if opt.get('light_use_repeat') == 'both_last': text = '' if i > 0: text += str(d['speech'][i - 1]) + ' ' text += str(d['speech'][i]) label = d[label_type][i + 1] used_current = False shown = {} if (use_feat(opt, 'light_use_current_self_output', 'all') and label_type != 'speech' and use_feat(opt, 'light_use_speech', 'self') and d['speech'][i + 1] is not None): if 'remove' not in opt['light_use_current_self_output']: if opt['light_use_speech_prefix']: text += '_self_say ' text += str(d['speech'][i + 1]) + '\n' shown['speech'] = True used_current = True if (use_feat(opt, 'light_use_current_self_output', 'all') and label_type != 'action' and use_feat(opt, 'light_use_action', 'self') and d['action'][i + 1] is not None): if 'remove' not in opt['light_use_current_self_output']: text += '_self_act ' + str(d['action'][i + 1]) + '\n' shown['action'] = True used_current = True if (use_feat(opt, 'light_use_current_self_output', 'all') and label_type != 'emote' and use_feat(opt, 'light_use_emote', 'self') and d['emote'][i + 1] is not None): if 'remove' not in opt['light_use_current_self_output']: text += '_self_emote ' + str(d['emote'][i + 1]) + '\n' shown['emote'] = True used_current = True if ('all_filtered' in opt['light_use_current_self_output'] and used_current is False): label = None if label is not None: msg = {} msg['text'] = text msg['labels'] = label add_negs( msg, d, i + 1, label_type, split, int(opt.get('light_use_cands', 100)), opt.get('light_use_affordances', True), ) msgs.append(msg) text = '' if (use_feat(opt, 'light_use_speech', 'self') and d['speech'][i + 1] is not None and ('speech' not in shown)): if opt['light_use_speech_prefix']: text += '_self_say ' text += str(d['speech'][i + 1]) + '\n' if (use_feat(opt, 'light_use_action', 'self') and d['action'][i + 1] is not None and ('action' not in shown)): text += '_self_act ' + str(d['action'][i + 1]) + '\n' if (use_feat(opt, 'light_use_emote', 'self') and d['emote'][i + 1] is not None and ('emote' not in shown)): text += '_self_emote ' + str(d['emote'][i + 1]) + '\n' if len(msgs) > 0: msgs[-1]['episode_done'] = True for m in msgs: # print(m.replace('\n', '\\n')) fix_labels(m, opt) global mx mx = max(len(m['label_candidates']), mx) # print(mx) txt = msg_to_str(m) fw.write(txt + '\n')
def write_dialog(opt, fw, d, label_type, split): l = len(d['speech']) msgs = [] text = '' score = d['score'] model_name = d['model_name'] did_continue = d['did_continue'] d['which'] = [] for i in range(0, len(d['emote'])): lab = 'none' if d['emote'][i] is not None: lab = 'emote' if d['action'][i] is not None: lab = 'action' d['which'].append(lab) if opt.get('light_use_taskname', True): text = '_task_' + label_type + '\n' if opt['light_use_setting']: text += ('_setting_name ' + d['setting']['name'] + ", " + d['setting']['category'] + '\n') text += '_setting_desc ' + d['setting']['description'] + '\n' if opt['light_use_person_names']: text += '_partner_name ' + d['partner_agent']['name'] + '\n' text += '_self_name ' + d['self_agent']['name'] + '\n' if use_feat(opt, 'light_use_persona', 'self'): text += '_self_persona ' + d['self_agent']['persona'] + '\n' if use_feat(opt, 'light_use_persona', 'other'): text += '_other_persona ' + d['partner_agent']['persona'] + '\n' for i in range(0, l, 2): if i < l - 1: if (use_feat(opt, 'light_use_speech', 'partner') and d['speech'][i] is not None): if opt['light_use_speech_prefix']: text += '_partner_say ' elif opt['light_use_person_names_prefix']: text += f"*{d['partner_agent']['name']}*: " text += str(d['speech'][i]) + '\n' if (use_feat(opt, 'light_use_action', 'partner') and d['action'][i] is not None): text += '_partner_act ' + str(d['action'][i]) + '\n' if (use_feat(opt, 'light_use_emote', 'partner') and d['emote'][i] is not None): text += '_partner_emote ' + str(d['emote'][i]) + '\n' if opt.get('light_use_repeat') == 'self_last': if i > 0: text = str(d['speech'][i - 1]) else: text = 'nothing' if opt.get('light_use_repeat') == 'partner_last': text = str(d['speech'][i]) if opt.get('light_use_repeat') == 'both_last': text = '' if i > 0: text += str(d['speech'][i - 1]) + ' ' text += str(d['speech'][i]) label = d[label_type][i + 1] if opt['light_use_person_names_prefix']: label = f"*{d['self_agent']['name']}*: {label}" used_current = False shown = {} if (use_feat(opt, 'light_use_current_self_output', 'speech') and label_type != 'speech' and d['speech'][i + 1] is not None): if 'remove' not in opt['light_use_current_self_output']: if opt['light_use_speech_prefix']: text += '_self_say ' text += str(d['speech'][i + 1]) + '\n' shown['speech'] = True used_current = True if (use_feat(opt, 'light_use_current_self_output', 'all') and label_type != 'action' and use_feat(opt, 'light_use_action', 'self') and d['action'][i + 1] is not None): if 'remove' not in opt['light_use_current_self_output']: text += '_self_act ' + str(d['action'][i + 1]) + '\n' shown['action'] = True used_current = True if (use_feat(opt, 'light_use_current_self_output', 'all') and label_type != 'emote' and use_feat(opt, 'light_use_emote', 'self') and d['emote'][i + 1] is not None): if 'remove' not in opt['light_use_current_self_output']: text += '_self_emote ' + str(d['emote'][i + 1]) + '\n' shown['emote'] = True used_current = True if ('all_filtered' in opt['light_use_current_self_output'] and used_current is False): label = None if label is not None: msg = {} msg['text'] = text msg['labels'] = label msg['model_name'] = model_name msg['did_continue'] = did_continue msg['score'] = score add_negs( msg, d, i + 1, label_type, split, int(opt.get('light_use_cands', 100)), opt.get('light_use_affordances', True), ) msgs.append(msg) text = '' if (use_feat(opt, 'light_use_speech', 'self') and d['speech'][i + 1] is not None and ('speech' not in shown)): if opt['light_use_speech_prefix']: text += '_self_say ' elif opt['light_use_person_names_prefix']: text += f"*{d['self_agent']['name']}*: " text += str(d['speech'][i + 1]) + '\n' if (use_feat(opt, 'light_use_action', 'self') and d['action'][i + 1] is not None and ('action' not in shown)): text += '_self_act ' + str(d['action'][i + 1]) + '\n' if (use_feat(opt, 'light_use_emote', 'self') and d['emote'][i + 1] is not None and ('emote' not in shown)): text += '_self_emote ' + str(d['emote'][i + 1]) + '\n' if len(msgs) > 0: msgs[-1]['episode_done'] = True for m in msgs: m['text'] = m['text'].rstrip('\n') # print(m.replace('\n', '\\n')) fix_labels(m, opt) global mx mx = max(len(m['label_candidates']), mx) # print(mx) txt = msg_to_str(m) fw.write(txt + '\n')
def make_dataset(opt): # Initialize control information so we can compute sentence attributes. # Here we set build_task=False so we don't download data/controllable_dialogue # (because we're trying to create it instead). initialize_control_information(opt, build_task=False) # Create repeat label agent and assign it to the specified task agent = RepeatLabelAgent(opt) world = create_task(opt, agent) ignorefields = opt.get('ignore_fields', '') outfile = opt['outfile'] # Number of examples to process if opt['num_examples'] == -1: num_examples = world.num_examples() else: num_examples = opt['num_examples'] # List of controls to include: controls = opt['controls'].split(',') if opt['controls'] != '' else [] print('[ starting to convert.. ]') print('[ saving output to {} ]'.format(outfile)) fw = open(outfile, 'w') log_timer = TimeLogger() for _ in range(num_examples): world.parley() world.acts[0]['labels'] = world.acts[0].get( 'labels', world.acts[0].pop('eval_labels', None)) # Need to get history in order to compute control values hist = ConvAI2History(world.acts[0]['text'], assume_persontokens=False) response = world.acts[0]['labels'][0] # Compute control values for ctrl in controls: ctrl_val = eval_attr(response, hist, ctrl) if ctrl == 'avg_nidf': assert ctrl_val >= 0 assert ctrl_val <= 1 elif ctrl == 'question': assert ctrl_val in [0, 1] elif ctrl == 'lastuttsim': if ctrl_val is not None: assert ctrl_val >= -1 assert ctrl_val <= 1 else: raise Exception('unexpected ctrl name: %s' % ctrl) world.acts[0][ctrl] = ctrl_val # add control value to act # Write to file txt = msg_to_str(world.acts[0], ignore_fields=ignorefields) fw.write(txt + '\n') if world.acts[0].get('episode_done', False): fw.write('\n') if log_timer.time() > opt['log_every_n_secs']: text, _log = log_timer.log(world.total_parleys, world.num_examples()) print(text) if world.epoch_done(): print('EPOCH DONE') break fw.close()