# -*- coding: utf-8 -*- """ Created on Mon Nov 16 18:35:09 2015 @author: Valou """ import os from extraction import extract2CRFsuite path = "/Users/Valou/Documents/TELECOM_PARISTECH/Stage_Lucas/Datasets/Semaine/" ALL_LABELS = {'attitude_positive', 'attitude_negative', 'source', 'target'} ALL_FILES = sorted(os.listdir(path+"all/dump/")) # nom de tous les fichiers contenus dans path+"all/dump" tries dans l'ordre label = 'attitude' label_select = 'attitude' for i in range(1): # i represente une session ? filename = ALL_FILES[i] X, y = extract2CRFsuite(path+"all/dump/"+filename, path+"all/dump_audio/"+filename, path+"all/dump_mfcc/"+filename, label, 'TEXT')
def cvloo(label, path_results, params, label_select=None, LOOP_TEST=False, valence=False): u"""Compute the Cross-validation for the given label. valence is True if we wanna distinguish the positive and negative attitudes """ if label_select is None: label_select = label opt = params["opt"] truepos_o, falsepos_o, falseneg_o = (0, 0, 0) precision = {} recall = {} trainer = pycrfsuite.Trainer(verbose=False) for i in range(len(ALL_FILES)): filename = ALL_FILES[i] X, y = extract2CRFsuite( path + "all/dump" + valence * "_attitudeposneg_only" + "/" + filename, path + "all/dump_audio/" + filename, path + "all/dump_mfcc/" + filename, label, params, ) for x_seq, y_seq in zip(X, y): trainer.append(x_seq, y_seq, i) trainer.set_params( { "c1": params["c1"], # coefficient for L1 penalty "c2": params["c2"], # coefficient for L2 penalty "max_iterations": params["max_it"], # stop earlier # include transitions that are possible, but not observed "feature.possible_transitions": False, } ) # print("Beginning of the training") for i in range(len(ALL_FILES)): # for i in range(1): filename = ALL_FILES[i] filename_model = filename.split(".")[0] # to threw away the extension # Training trainer.train(path_model + "model_%s_" % opt + filename_model, i) # Testing X_test, y_test = extract2CRFsuite( path + "all/dump" + valence * "_attitudeposneg_only" + "/" + filename, path + "all/dump_audio/" + filename, path + "all/dump_mfcc/" + filename, label, params, ) tagger = pycrfsuite.Tagger(verbose=False) tagger.open(path_model + "model_%s_" % opt + filename_model) truepos, falsepos, falseneg = (0, 0, 0) for sent, corr_labels in zip(X_test, y_test): pred_labels = tagger.tag(sent) trueposAdd, falseposAdd, falsenegAdd = F1_token(pred_labels, corr_labels, label_select) truepos += trueposAdd falsepos += falseposAdd falseneg += falsenegAdd precision[filename] = "%.2f" % (truepos / (truepos + falsepos + 0.01) * 100) recall[filename] = "%.2f" % (truepos / (truepos + falseneg + 0.01) * 100) truepos_o += truepos falsepos_o += falsepos falseneg_o += falseneg precision["overall"] = "%.2f" % (truepos_o / (truepos_o + falsepos_o + 0.01) * 100) recall["overall"] = "%.2f" % (truepos_o / (truepos_o + falseneg_o + 0.01) * 100) F1 = ( 2 * float(precision["overall"]) * float(recall["overall"]) / (float(precision["overall"]) + float(recall["overall"]) + 1e-5) ) # If there is pos and neg differentiation for the attitudes if valence == True and label.__class__ == list: label = "attitud_posneg" # Dump the different results on results ext = ".txt" dump_resultats(precision, recall, F1, path_results + "results_CVLOO_%s_" % (opt) + label + "_" + label_select + ext) if LOOP_TEST: # if loop test dump the ALL the results in 1 file dump_resultats_total( precision, recall, F1, path_results + "results_total_%s_" % (opt) + label + "_" + label_select + ext, params ) return_sent = "Precision : %s, Recall : %s, F1 : %.2f" % (precision["overall"], recall["overall"], F1) return return_sent