Ejemplo n.º 1
0
 def write_parlai_format(self, outfile):
     logging.info(f'Saving log to {outfile} in ParlAI format')
     with PathManager.open(outfile, 'w') as fw:
         for episode in tqdm(self._logs):
             ep = self.convert_to_labeled_data(episode)
             for act in ep:
                 txt = msg_to_str(act)
                 fw.write(txt + '\n')
             fw.write('\n')
Ejemplo n.º 2
0
 def write_parlai_format(self, outfile):
     print('[ Saving log to {} in ParlAI format ]'.format(outfile))
     with open(outfile, 'w') as fw:
         for episode in tqdm(self._logs):
             ep = self.convert_to_labeled_data(episode)
             for act in ep:
                 txt = msg_to_str(act)
                 fw.write(txt + '\n')
             fw.write('\n')
Ejemplo n.º 3
0
 def write_parlai_format(self, outfile):
     print('[ saving log to {} ]'.format(outfile))
     fw = open(outfile, 'w')
     for episode in self._logs:
         ep = self.convert_to_labeled_data(episode)
         for a in ep:
             txt = msg_to_str(a)
             fw.write(txt + '\n')
         fw.write('\n')
     fw.close()
Ejemplo n.º 4
0
def dump_data(opt):
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)
    opt.log()
    ignorefields = opt.get('ignore_fields', '')
    if opt['outfile'] is None:
        outfile = tempfile.mkstemp(prefix='{}_{}_'.format(
            opt['task'], opt['datatype']),
                                   suffix='.txt')[1]
    else:
        outfile = opt['outfile']

    if opt['num_examples'] == -1:
        num_examples = world.num_examples()
    else:
        num_examples = opt['num_examples']
    log_timer = TimeLogger()

    logging.debug('starting to convert...')
    logging.info(f'saving output to {outfile}')
    fw = open(outfile, 'w')
    for _ in range(num_examples):
        world.parley()
        acts = world.get_acts()
        value = acts[0].get('labels', acts[0].pop('eval_labels', None))
        acts[0].force_set('labels', value)
        txt = msg_to_str(acts[0], ignore_fields=ignorefields)
        fw.write(txt + '\n')
        if acts[0].get('episode_done', False):
            fw.write('\n')

        if log_timer.time() > opt['log_every_n_secs']:
            text, _log = log_timer.log(world.total_parleys,
                                       world.num_examples())
            logging.info(text)

        if world.epoch_done():
            logging.info('epoch done')
            break
    fw.close()
Ejemplo n.º 5
0
def write_dialog(opt, fw, d, label_type, split):
    l = len(d['speech'])
    msgs = []
    text = '_task_' + label_type + '\n'
    if opt['light_use_setting']:
        text += ('_setting_name ' + d['setting']['name'] + ", " +
                 d['setting']['category'] + '\n')
        text += '_setting_desc ' + d['setting']['description'] + '\n'
    if opt['light_use_person_names']:
        text += '_partner_name ' + d['partner_agent']['name'] + '\n'
        text += '_self_name ' + d['self_agent']['name'] + '\n'
    if use_feat(opt, 'light_use_persona', 'self'):
        text += '_self_persona ' + d['self_agent']['persona'] + '\n'
    if use_feat(opt, 'light_use_persona', 'other'):
        text += '_other_persona ' + d['partner_agent']['persona'] + '\n'
    if opt['light_use_objects']:
        for o, od in d['all_descriptions'].items():
            text += '_object_desc ' + o + " : " + od + '\n'
        if False:
            for o in d['room_objects'][0]:
                text += '_object_in_room ' + o + '\n'
            for o in d['carrying'][0]:
                text += '_object_carrying ' + o + '\n'
            for o in d['wearing'][0]:
                text += '_object_wearing ' + o + '\n'
            for o in d['wielding'][0]:
                text += '_object_wielding ' + o + '\n'
    for i in range(0, l, 2):
        if i < l - 1:
            if (use_feat(opt, 'light_use_speech', 'partner')
                    and d['speech'][i] is not None):
                if opt['light_use_speech_prefix']:
                    text += '_partner_say '
                text += str(d['speech'][i]) + '\n'
            if (use_feat(opt, 'light_use_action', 'partner')
                    and d['action'][i] is not None):
                text += '_partner_act ' + str(d['action'][i]) + '\n'
            if (use_feat(opt, 'light_use_emote', 'partner')
                    and d['emote'][i] is not None):
                text += '_partner_emote ' + str(d['emote'][i]) + '\n'
            if opt.get('light_use_repeat') == 'self_last':
                if i > 0:
                    text = str(d['speech'][i - 1])
                else:
                    text = 'nothing'
            if opt.get('light_use_repeat') == 'partner_last':
                text = str(d['speech'][i])
            if opt.get('light_use_repeat') == 'both_last':
                text = ''
                if i > 0:
                    text += str(d['speech'][i - 1]) + ' '
                text += str(d['speech'][i])
            label = d[label_type][i + 1]
            used_current = False
            shown = {}
            if (use_feat(opt, 'light_use_current_self_output', 'all')
                    and label_type != 'speech'
                    and use_feat(opt, 'light_use_speech', 'self')
                    and d['speech'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    if opt['light_use_speech_prefix']:
                        text += '_self_say '
                    text += str(d['speech'][i + 1]) + '\n'
                    shown['speech'] = True
                used_current = True
            if (use_feat(opt, 'light_use_current_self_output', 'all')
                    and label_type != 'action'
                    and use_feat(opt, 'light_use_action', 'self')
                    and d['action'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    text += '_self_act ' + str(d['action'][i + 1]) + '\n'
                    shown['action'] = True
                used_current = True
            if (use_feat(opt, 'light_use_current_self_output', 'all')
                    and label_type != 'emote'
                    and use_feat(opt, 'light_use_emote', 'self')
                    and d['emote'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    text += '_self_emote ' + str(d['emote'][i + 1]) + '\n'
                    shown['emote'] = True
                used_current = True
            if ('all_filtered' in opt['light_use_current_self_output']
                    and used_current is False):
                label = None
            if label is not None:
                msg = {}
                msg['text'] = text
                msg['labels'] = label
                add_negs(
                    msg,
                    d,
                    i + 1,
                    label_type,
                    split,
                    int(opt.get('light_use_cands', 100)),
                    opt.get('light_use_affordances', True),
                )
                msgs.append(msg)
                text = ''
            if (use_feat(opt, 'light_use_speech', 'self')
                    and d['speech'][i + 1] is not None
                    and ('speech' not in shown)):
                if opt['light_use_speech_prefix']:
                    text += '_self_say '
                text += str(d['speech'][i + 1]) + '\n'
            if (use_feat(opt, 'light_use_action', 'self')
                    and d['action'][i + 1] is not None
                    and ('action' not in shown)):
                text += '_self_act ' + str(d['action'][i + 1]) + '\n'
            if (use_feat(opt, 'light_use_emote', 'self')
                    and d['emote'][i + 1] is not None
                    and ('emote' not in shown)):
                text += '_self_emote ' + str(d['emote'][i + 1]) + '\n'
    if len(msgs) > 0:
        msgs[-1]['episode_done'] = True
        for m in msgs:
            # print(m.replace('\n', '\\n'))
            fix_labels(m, opt)
            global mx
            mx = max(len(m['label_candidates']), mx)
            # print(mx)
            txt = msg_to_str(m)
            fw.write(txt + '\n')
Ejemplo n.º 6
0
def write_dialog(opt, fw, d, label_type, split):
    l = len(d['speech'])
    msgs = []
    text = ''
    score = d['score']
    model_name = d['model_name']
    did_continue = d['did_continue']

    d['which'] = []
    for i in range(0, len(d['emote'])):
        lab = 'none'
        if d['emote'][i] is not None:
            lab = 'emote'
        if d['action'][i] is not None:
            lab = 'action'
        d['which'].append(lab)

    if opt.get('light_use_taskname', True):
        text = '_task_' + label_type + '\n'
    if opt['light_use_setting']:
        text += ('_setting_name ' + d['setting']['name'] + ", " +
                 d['setting']['category'] + '\n')
        text += '_setting_desc ' + d['setting']['description'] + '\n'
    if opt['light_use_person_names']:
        text += '_partner_name ' + d['partner_agent']['name'] + '\n'
        text += '_self_name ' + d['self_agent']['name'] + '\n'
    if use_feat(opt, 'light_use_persona', 'self'):
        text += '_self_persona ' + d['self_agent']['persona'] + '\n'
    if use_feat(opt, 'light_use_persona', 'other'):
        text += '_other_persona ' + d['partner_agent']['persona'] + '\n'
    for i in range(0, l, 2):
        if i < l - 1:
            if (use_feat(opt, 'light_use_speech', 'partner')
                    and d['speech'][i] is not None):
                if opt['light_use_speech_prefix']:
                    text += '_partner_say '
                elif opt['light_use_person_names_prefix']:
                    text += f"*{d['partner_agent']['name']}*: "
                text += str(d['speech'][i]) + '\n'
            if (use_feat(opt, 'light_use_action', 'partner')
                    and d['action'][i] is not None):
                text += '_partner_act ' + str(d['action'][i]) + '\n'
            if (use_feat(opt, 'light_use_emote', 'partner')
                    and d['emote'][i] is not None):
                text += '_partner_emote ' + str(d['emote'][i]) + '\n'
            if opt.get('light_use_repeat') == 'self_last':
                if i > 0:
                    text = str(d['speech'][i - 1])
                else:
                    text = 'nothing'
            if opt.get('light_use_repeat') == 'partner_last':
                text = str(d['speech'][i])
            if opt.get('light_use_repeat') == 'both_last':
                text = ''
                if i > 0:
                    text += str(d['speech'][i - 1]) + ' '
                text += str(d['speech'][i])

            label = d[label_type][i + 1]
            if opt['light_use_person_names_prefix']:
                label = f"*{d['self_agent']['name']}*: {label}"
            used_current = False
            shown = {}
            if (use_feat(opt, 'light_use_current_self_output', 'speech')
                    and label_type != 'speech'
                    and d['speech'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    if opt['light_use_speech_prefix']:
                        text += '_self_say '
                    text += str(d['speech'][i + 1]) + '\n'
                    shown['speech'] = True
                used_current = True
            if (use_feat(opt, 'light_use_current_self_output', 'all')
                    and label_type != 'action'
                    and use_feat(opt, 'light_use_action', 'self')
                    and d['action'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    text += '_self_act ' + str(d['action'][i + 1]) + '\n'
                    shown['action'] = True
                used_current = True
            if (use_feat(opt, 'light_use_current_self_output', 'all')
                    and label_type != 'emote'
                    and use_feat(opt, 'light_use_emote', 'self')
                    and d['emote'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    text += '_self_emote ' + str(d['emote'][i + 1]) + '\n'
                    shown['emote'] = True
                used_current = True
            if ('all_filtered' in opt['light_use_current_self_output']
                    and used_current is False):
                label = None
            if label is not None:
                msg = {}
                msg['text'] = text
                msg['labels'] = label
                msg['model_name'] = model_name
                msg['did_continue'] = did_continue
                msg['score'] = score
                add_negs(
                    msg,
                    d,
                    i + 1,
                    label_type,
                    split,
                    int(opt.get('light_use_cands', 100)),
                    opt.get('light_use_affordances', True),
                )
                msgs.append(msg)
                text = ''
            if (use_feat(opt, 'light_use_speech', 'self')
                    and d['speech'][i + 1] is not None
                    and ('speech' not in shown)):
                if opt['light_use_speech_prefix']:
                    text += '_self_say '
                elif opt['light_use_person_names_prefix']:
                    text += f"*{d['self_agent']['name']}*: "
                text += str(d['speech'][i + 1]) + '\n'
            if (use_feat(opt, 'light_use_action', 'self')
                    and d['action'][i + 1] is not None
                    and ('action' not in shown)):
                text += '_self_act ' + str(d['action'][i + 1]) + '\n'
            if (use_feat(opt, 'light_use_emote', 'self')
                    and d['emote'][i + 1] is not None
                    and ('emote' not in shown)):
                text += '_self_emote ' + str(d['emote'][i + 1]) + '\n'
    if len(msgs) > 0:
        msgs[-1]['episode_done'] = True
        for m in msgs:
            m['text'] = m['text'].rstrip('\n')
            # print(m.replace('\n', '\\n'))
            fix_labels(m, opt)
            global mx
            mx = max(len(m['label_candidates']), mx)
            # print(mx)
            txt = msg_to_str(m)
            fw.write(txt + '\n')
Ejemplo n.º 7
0
def make_dataset(opt):

    # Initialize control information so we can compute sentence attributes.
    # Here we set build_task=False so we don't download data/controllable_dialogue
    # (because we're trying to create it instead).
    initialize_control_information(opt, build_task=False)

    # Create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)
    ignorefields = opt.get('ignore_fields', '')
    outfile = opt['outfile']

    # Number of examples to process
    if opt['num_examples'] == -1:
        num_examples = world.num_examples()
    else:
        num_examples = opt['num_examples']

    # List of controls to include:
    controls = opt['controls'].split(',') if opt['controls'] != '' else []

    print('[ starting to convert.. ]')
    print('[ saving output to {} ]'.format(outfile))
    fw = open(outfile, 'w')
    log_timer = TimeLogger()

    for _ in range(num_examples):
        world.parley()
        world.acts[0]['labels'] = world.acts[0].get(
            'labels', world.acts[0].pop('eval_labels', None))

        # Need to get history in order to compute control values
        hist = ConvAI2History(world.acts[0]['text'], assume_persontokens=False)
        response = world.acts[0]['labels'][0]

        # Compute control values
        for ctrl in controls:
            ctrl_val = eval_attr(response, hist, ctrl)
            if ctrl == 'avg_nidf':
                assert ctrl_val >= 0
                assert ctrl_val <= 1
            elif ctrl == 'question':
                assert ctrl_val in [0, 1]
            elif ctrl == 'lastuttsim':
                if ctrl_val is not None:
                    assert ctrl_val >= -1
                    assert ctrl_val <= 1
            else:
                raise Exception('unexpected ctrl name: %s' % ctrl)
            world.acts[0][ctrl] = ctrl_val  # add control value to act

        # Write to file
        txt = msg_to_str(world.acts[0], ignore_fields=ignorefields)
        fw.write(txt + '\n')
        if world.acts[0].get('episode_done', False):
            fw.write('\n')

        if log_timer.time() > opt['log_every_n_secs']:
            text, _log = log_timer.log(world.total_parleys,
                                       world.num_examples())
            print(text)

        if world.epoch_done():
            print('EPOCH DONE')
            break
    fw.close()