Ejemplo n.º 1
0
def dump_data(opt):
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)
    ignorefields = opt['ignore_fields'].split(',')

    print("[ starting to convert.. ]")
    fw = open(opt['outfile'], 'w')
    for _ in range(opt['num_examples']):
        world.parley()
        world.acts[0]['labels'] = world.acts[0].get(
            'labels', world.acts[0].pop('eval_labels', None))
        txt = msg_to_str(world.acts[0], ignore_fields=ignorefields)
        fw.write(txt + "\n")
        if world.acts[0].get('episode_done', False):
            fw.write("\n")

        if world.epoch_done():
            print('EPOCH DONE')
            break
    fw.close()
def dump_data(opt):
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)
    ignorefields = opt.get('ignore_fields', '')
    if opt['outfile'] is None:
        outfile = tempfile.mkstemp(prefix='{}_{}_'.format(
            opt['task'], opt['datatype']),
                                   suffix='.txt')[1]
    else:
        outfile = opt['outfile']

    if opt['num_examples'] == -1:
        num_examples = world.num_examples()
    else:
        num_examples = opt['num_examples']
    log_timer = TimeLogger()

    print('[ starting to convert.. ]')
    print('[ saving output to {} ]'.format(outfile))
    fw = open(outfile, 'w')
    for _ in range(num_examples):
        world.parley()
        world.acts[0]['labels'] = world.acts[0].get(
            'labels', world.acts[0].pop('eval_labels', None))
        txt = msg_to_str(world.acts[0], ignore_fields=ignorefields)
        fw.write(txt + '\n')
        if world.acts[0].get('episode_done', False):
            fw.write('\n')

        if log_timer.time() > opt['log_every_n_secs']:
            text, _log = log_timer.log(world.total_parleys,
                                       world.num_examples())
            print(text)

        if world.epoch_done():
            print('EPOCH DONE')
            break
    fw.close()
Ejemplo n.º 3
0
def make_dataset(opt):

    # Initialize control information so we can compute sentence attributes.
    # Here we set build_task=False so we don't download data/controllable_dialogue
    # (because we're trying to create it instead).
    initialize_control_information(opt, build_task=False)

    # Create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)
    ignorefields = opt.get('ignore_fields', '')
    outfile = opt['outfile']

    # Number of examples to process
    if opt['num_examples'] == -1:
        num_examples = world.num_examples()
    else:
        num_examples = opt['num_examples']

    # List of controls to include:
    controls = opt['controls'].split(',') if opt['controls'] != '' else []

    print('[ starting to convert.. ]')
    print('[ saving output to {} ]'.format(outfile))
    fw = open(outfile, 'w')
    log_timer = TimeLogger()

    for _ in range(num_examples):
        world.parley()
        world.acts[0]['labels'] = world.acts[0].get(
            'labels', world.acts[0].pop('eval_labels', None))

        # Need to get history in order to compute control values
        hist = ConvAI2History(world.acts[0]['text'], assume_persontokens=False)
        response = world.acts[0]['labels'][0]

        # Compute control values
        for ctrl in controls:
            ctrl_val = eval_attr(response, hist, ctrl)
            if ctrl == 'avg_nidf':
                assert ctrl_val >= 0
                assert ctrl_val <= 1
            elif ctrl == 'question':
                assert ctrl_val in [0, 1]
            elif ctrl == 'lastuttsim':
                if ctrl_val is not None:
                    assert ctrl_val >= -1
                    assert ctrl_val <= 1
            else:
                raise Exception('unexpected ctrl name: %s' % ctrl)
            world.acts[0][ctrl] = ctrl_val  # add control value to act

        # Write to file
        txt = msg_to_str(world.acts[0], ignore_fields=ignorefields)
        fw.write(txt + '\n')
        if world.acts[0].get('episode_done', False):
            fw.write('\n')

        if log_timer.time() > opt['log_every_n_secs']:
            text, _log = log_timer.log(world.total_parleys,
                                       world.num_examples())
            print(text)

        if world.epoch_done():
            print('EPOCH DONE')
            break
    fw.close()
Ejemplo n.º 4
0
def write_dialog(opt, fw, d, label_type, split):
    l = len(d['speech'])
    msgs = []
    text = '_task_' + label_type + '\n'
    if opt['light_use_setting']:
        text += ('_setting_name ' + d['setting']['name'] + ", " +
                 d['setting']['category'] + '\n')
        text += '_setting_desc ' + d['setting']['description'] + '\n'
    if opt['light_use_person_names']:
        text += '_partner_name ' + d['partner_agent']['name'] + '\n'
        text += '_self_name ' + d['self_agent']['name'] + '\n'
    if use_feat(opt, 'light_use_persona', 'self'):
        text += '_self_persona ' + d['self_agent']['persona'] + '\n'
    if use_feat(opt, 'light_use_persona', 'other'):
        text += '_other_persona ' + d['partner_agent']['persona'] + '\n'
    if opt['light_use_objects']:
        for o, od in d['all_descriptions'].items():
            text += '_object_desc ' + o + " : " + od + '\n'
        if False:
            for o in d['room_objects'][0]:
                text += '_object_in_room ' + o + '\n'
            for o in d['carrying'][0]:
                text += '_object_carrying ' + o + '\n'
            for o in d['wearing'][0]:
                text += '_object_wearing ' + o + '\n'
            for o in d['wielding'][0]:
                text += '_object_wielding ' + o + '\n'
    for i in range(0, l, 2):
        if i < l - 1:
            if (use_feat(opt, 'light_use_speech', 'partner')
                    and d['speech'][i] is not None):
                text += '_partner_say ' + str(d['speech'][i]) + '\n'
            if (use_feat(opt, 'light_use_action', 'partner')
                    and d['action'][i] is not None):
                text += '_partner_act ' + str(d['action'][i]) + '\n'
            if (use_feat(opt, 'light_use_emote', 'partner')
                    and d['emote'][i] is not None):
                text += '_partner_emote ' + str(d['emote'][i]) + '\n'
            if opt.get('light_use_repeat') == 'self_last':
                if i > 0:
                    text = str(d['speech'][i - 1])
                else:
                    text = 'nothing'
            if opt.get('light_use_repeat') == 'partner_last':
                text = str(d['speech'][i])
            if opt.get('light_use_repeat') == 'both_last':
                text = ''
                if i > 0:
                    text += str(d['speech'][i - 1]) + ' '
                text += str(d['speech'][i])
            label = d[label_type][i + 1]
            used_current = False
            shown = {}
            if (use_feat(opt, 'light_use_current_self_output', 'all')
                    and label_type != 'speech'
                    and use_feat(opt, 'light_use_speech', 'self')
                    and d['speech'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    text += '_self_say ' + str(d['speech'][i + 1]) + '\n'
                    shown['speech'] = True
                used_current = True
            if (use_feat(opt, 'light_use_current_self_output', 'all')
                    and label_type != 'action'
                    and use_feat(opt, 'light_use_action', 'self')
                    and d['action'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    text += '_self_act ' + str(d['action'][i + 1]) + '\n'
                    shown['action'] = True
                used_current = True
            if (use_feat(opt, 'light_use_current_self_output', 'all')
                    and label_type != 'emote'
                    and use_feat(opt, 'light_use_emote', 'self')
                    and d['emote'][i + 1] is not None):
                if 'remove' not in opt['light_use_current_self_output']:
                    text += '_self_emote ' + str(d['emote'][i + 1]) + '\n'
                    shown['emote'] = True
                used_current = True
            if ('all_filtered' in opt['light_use_current_self_output']
                    and used_current is False):
                label = None
            if label is not None:
                msg = {}
                msg['text'] = text
                msg['labels'] = label
                add_negs(
                    msg,
                    d,
                    i + 1,
                    label_type,
                    split,
                    int(opt.get('light_use_cands', 100)),
                    opt.get('light_use_affordances', True),
                )
                msgs.append(msg)
                text = ''
            if (use_feat(opt, 'light_use_speech', 'self')
                    and d['speech'][i + 1] is not None
                    and ('speech' not in shown)):
                text += '_self_say ' + str(d['speech'][i + 1]) + '\n'
            if (use_feat(opt, 'light_use_action', 'self')
                    and d['action'][i + 1] is not None
                    and ('action' not in shown)):
                text += '_self_act ' + str(d['action'][i + 1]) + '\n'
            if (use_feat(opt, 'light_use_emote', 'self')
                    and d['emote'][i + 1] is not None
                    and ('emote' not in shown)):
                text += '_self_emote ' + str(d['emote'][i + 1]) + '\n'
    if len(msgs) > 0:
        msgs[-1]['episode_done'] = True
        for m in msgs:
            # print(m.replace('\n', '\\n'))
            fix_labels(m, opt)
            global mx
            mx = max(len(m['label_candidates']), mx)
            # print(mx)
            txt = msg_to_str(m)
            fw.write(txt + '\n')