Example #1
0
File: target.py Project: jrmyp/zeno
    return tuple('_'.join(pl) for pl in (l[:2], l[-2:]))


ques = defaultdict(list)
x_t = cs.TabData(fxqap)
for row in x_t:
    gi, si = nsplit(row['q_id'].value)
    if gi in all_anno:
        ques[nsplit(row['a_id'].value)].append((gi, si))

#~ print(ques)
#~ sys.exit()
c_t = cs.TabData(ffinal)
c_t.new_class('is_commitment')
#~ end_c = cs.Trainer(c_t, 10, 'dialogue', learner='logreg')
end_c = cs.Trainer(c_t, 10, 'dialogue')
end_c.evaluate()

tot_ok = 0
tot_in = 0
tot_all = 0

with open('res/zog.txt', 'w', encoding='utf-8') as f:
    for pred, row in end_c.pred_rows():
        if pred.value == 'True' and row.getclass().value == 'True':
            #~ for row in c_t:
            #~ if row.getclass().value == 'True':
            gi, si = nsplit(row['id'].value)
            u = all_anno[gi].elements[si]
            f.write(u.text + '\n')
            qe = None
Example #2
0
    'ends_with_bang_DU1', 'ends_with_qmark_DU1', 'has_FOR_np_DU1',
    'is_question_DU1', 'num_tokens_DU2', 'has_player_name_exact_DU2',
    'has_player_name_fuzzy_DU2', 'has_emoticons_DU2', 'is_emoticon_only_DU2',
    'speaker_started_the_dialogue_DU2',
    'speaker_already_spoken_in_dialogue_DU2',
    'speakers_first_turn_in_dialogue_DU2', 'turn_follows_gap_DU2',
    'position_in_dialogue_DU2', 'position_in_game_DU2',
    'edu_position_in_turn_DU2', 'has_correction_star_DU2',
    'ends_with_bang_DU2', 'ends_with_qmark_DU2', 'has_FOR_np_DU2',
    'is_question_DU2'
]
meta_sel = ['dialogue', 'id_DU1', 'id_DU2']

t_r = cs.TabData(fpairs)
#~ t_r.sel_row({'CLASS':'UNRELATED'}, negate=1)
t_r.sel_col(feat_sel, meta_sel, 'CLASS')
#~ t_r.save('res/cut.tab')

#~ c_r = cs.Trainer(t_r, grouper='dialogue')
c_r = cs.Trainer(t_r, learner='logreg', grouper='dialogue')

c_r.evaluate()
sys.exit()
with open('../res/gpred.tab', 'w') as f:
    for pred, row in c_r.pred_rows():
        line = '\t'.join([
            k.value
            for k in (pred, row.getclass(), row['id_DU1'], row['id_DU2'])
        ])
        f.write(line + '\n')
Example #3
0
File: gather.py Project: jrmyp/zeno
    lgames = set(na[:-3] for na in all_anno) & set(na[:-3] for na in snames)
    for lg in lgames:
        print(lg)

    sys.exit()

###### QAP #######################################

#~ if True:
if False:
    p_t = cs.TabData(fpairs)
    p_t.sel_row({'CLASS': 'UNRELATED'}, negate=1)
    # Warning : you're supposing ALL EDUs are part of a relation
    #   Check this, or you'll lose some
    print('Pairs loaded')
    p_c = cs.Trainer(p_t, 10, 'dialogue')

    pc_feat = (('is_question_p', 'd', ''), ('is_answer_p', 'd', ''),
               ('id', 'd', 'meta'))

    def pc_data():
        #~ it = ((r['CLASS'], r) for r in p_t)
        #~ for pred, row in it:
        for pred, row in p_c.pred_rows():
            if pred.value == 'Question-answer_pair':
                yield (
                    True,
                    False,
                    #~ gid(row['id_DU1']))
                    row['id_DU1'])
                yield (
Example #4
0
    t_full = cs.TabData(fpairs)
    t_full.sel_col(feat_sel, meta_sel, 'CLASS')
    t_full.save(fsrc)

# Step 2 : set of all dialogues
t_master = cs.TabData(fsrc)
dials = list(set(l['dialogue'].value for l in t_master))
random.shuffle(dials)
print('Data loaded')

# Step 3 : the curve loop
all_scores = list()
n = len(dials)
n_steps = int(n / step_size)
for m in range(n_steps):
    t_size = step_size * (m + 1)
    d_train = set(dials[:t_size])
    print('= Iteration {0:2}/{1:2}, size {2:3} ='.format(
        m + 1, n_steps, t_size))
    t_cur = cs.TabData(fsrc)
    t_cur.sel_row_by(lambda x: (x['dialogue'].value in d_train))
    trainer = cs.Trainer(t_cur, grouper='dialogue')
    scores = trainer.evaluate(quiet=True)
    print('Score : {0:.3}'.format(scores['True']))
    all_scores.append((t_size, scores['True']))

# Step 4 : output scores
with open('res/curve.log', 'a') as fres:
    fres.write('# {0}\n'.format(n))
    fres.write(str(all_scores) + '\n')