return tuple('_'.join(pl) for pl in (l[:2], l[-2:])) ques = defaultdict(list) x_t = cs.TabData(fxqap) for row in x_t: gi, si = nsplit(row['q_id'].value) if gi in all_anno: ques[nsplit(row['a_id'].value)].append((gi, si)) #~ print(ques) #~ sys.exit() c_t = cs.TabData(ffinal) c_t.new_class('is_commitment') #~ end_c = cs.Trainer(c_t, 10, 'dialogue', learner='logreg') end_c = cs.Trainer(c_t, 10, 'dialogue') end_c.evaluate() tot_ok = 0 tot_in = 0 tot_all = 0 with open('res/zog.txt', 'w', encoding='utf-8') as f: for pred, row in end_c.pred_rows(): if pred.value == 'True' and row.getclass().value == 'True': #~ for row in c_t: #~ if row.getclass().value == 'True': gi, si = nsplit(row['id'].value) u = all_anno[gi].elements[si] f.write(u.text + '\n') qe = None
'ends_with_bang_DU1', 'ends_with_qmark_DU1', 'has_FOR_np_DU1', 'is_question_DU1', 'num_tokens_DU2', 'has_player_name_exact_DU2', 'has_player_name_fuzzy_DU2', 'has_emoticons_DU2', 'is_emoticon_only_DU2', 'speaker_started_the_dialogue_DU2', 'speaker_already_spoken_in_dialogue_DU2', 'speakers_first_turn_in_dialogue_DU2', 'turn_follows_gap_DU2', 'position_in_dialogue_DU2', 'position_in_game_DU2', 'edu_position_in_turn_DU2', 'has_correction_star_DU2', 'ends_with_bang_DU2', 'ends_with_qmark_DU2', 'has_FOR_np_DU2', 'is_question_DU2' ] meta_sel = ['dialogue', 'id_DU1', 'id_DU2'] t_r = cs.TabData(fpairs) #~ t_r.sel_row({'CLASS':'UNRELATED'}, negate=1) t_r.sel_col(feat_sel, meta_sel, 'CLASS') #~ t_r.save('res/cut.tab') #~ c_r = cs.Trainer(t_r, grouper='dialogue') c_r = cs.Trainer(t_r, learner='logreg', grouper='dialogue') c_r.evaluate() sys.exit() with open('../res/gpred.tab', 'w') as f: for pred, row in c_r.pred_rows(): line = '\t'.join([ k.value for k in (pred, row.getclass(), row['id_DU1'], row['id_DU2']) ]) f.write(line + '\n')
lgames = set(na[:-3] for na in all_anno) & set(na[:-3] for na in snames) for lg in lgames: print(lg) sys.exit() ###### QAP ####################################### #~ if True: if False: p_t = cs.TabData(fpairs) p_t.sel_row({'CLASS': 'UNRELATED'}, negate=1) # Warning : you're supposing ALL EDUs are part of a relation # Check this, or you'll lose some print('Pairs loaded') p_c = cs.Trainer(p_t, 10, 'dialogue') pc_feat = (('is_question_p', 'd', ''), ('is_answer_p', 'd', ''), ('id', 'd', 'meta')) def pc_data(): #~ it = ((r['CLASS'], r) for r in p_t) #~ for pred, row in it: for pred, row in p_c.pred_rows(): if pred.value == 'Question-answer_pair': yield ( True, False, #~ gid(row['id_DU1'])) row['id_DU1']) yield (
t_full = cs.TabData(fpairs) t_full.sel_col(feat_sel, meta_sel, 'CLASS') t_full.save(fsrc) # Step 2 : set of all dialogues t_master = cs.TabData(fsrc) dials = list(set(l['dialogue'].value for l in t_master)) random.shuffle(dials) print('Data loaded') # Step 3 : the curve loop all_scores = list() n = len(dials) n_steps = int(n / step_size) for m in range(n_steps): t_size = step_size * (m + 1) d_train = set(dials[:t_size]) print('= Iteration {0:2}/{1:2}, size {2:3} ='.format( m + 1, n_steps, t_size)) t_cur = cs.TabData(fsrc) t_cur.sel_row_by(lambda x: (x['dialogue'].value in d_train)) trainer = cs.Trainer(t_cur, grouper='dialogue') scores = trainer.evaluate(quiet=True) print('Score : {0:.3}'.format(scores['True'])) all_scores.append((t_size, scores['True'])) # Step 4 : output scores with open('res/curve.log', 'a') as fres: fres.write('# {0}\n'.format(n)) fres.write(str(all_scores) + '\n')