Example #1
0
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 16 18:35:09 2015

@author: Valou
"""
import os
from extraction import extract2CRFsuite
path = "/Users/Valou/Documents/TELECOM_PARISTECH/Stage_Lucas/Datasets/Semaine/"
ALL_LABELS = {'attitude_positive', 'attitude_negative', 'source', 'target'}
ALL_FILES = sorted(os.listdir(path+"all/dump/")) # nom de tous les fichiers contenus dans path+"all/dump" tries dans l'ordre

label = 'attitude'
label_select = 'attitude'

for i in range(1):  # i represente une session ?
    filename = ALL_FILES[i]
    X, y = extract2CRFsuite(path+"all/dump/"+filename,
                            path+"all/dump_audio/"+filename,
                            path+"all/dump_mfcc/"+filename,
                            label, 'TEXT')
                            
Example #2
0
def cvloo(label, path_results, params, label_select=None, LOOP_TEST=False, valence=False):
    u"""Compute the Cross-validation for the given label.
    valence is True if we wanna distinguish the positive and negative attitudes    
    """
    if label_select is None:
        label_select = label
    opt = params["opt"]

    truepos_o, falsepos_o, falseneg_o = (0, 0, 0)
    precision = {}
    recall = {}

    trainer = pycrfsuite.Trainer(verbose=False)

    for i in range(len(ALL_FILES)):
        filename = ALL_FILES[i]
        X, y = extract2CRFsuite(
            path + "all/dump" + valence * "_attitudeposneg_only" + "/" + filename,
            path + "all/dump_audio/" + filename,
            path + "all/dump_mfcc/" + filename,
            label,
            params,
        )
        for x_seq, y_seq in zip(X, y):
            trainer.append(x_seq, y_seq, i)

    trainer.set_params(
        {
            "c1": params["c1"],  # coefficient for L1 penalty
            "c2": params["c2"],  # coefficient for L2 penalty
            "max_iterations": params["max_it"],  # stop earlier
            # include transitions that are possible, but not observed
            "feature.possible_transitions": False,
        }
    )
    # print("Beginning of the training")
    for i in range(len(ALL_FILES)):
        # for i in range(1):

        filename = ALL_FILES[i]
        filename_model = filename.split(".")[0]  # to threw away the extension

        # Training
        trainer.train(path_model + "model_%s_" % opt + filename_model, i)

        # Testing
        X_test, y_test = extract2CRFsuite(
            path + "all/dump" + valence * "_attitudeposneg_only" + "/" + filename,
            path + "all/dump_audio/" + filename,
            path + "all/dump_mfcc/" + filename,
            label,
            params,
        )
        tagger = pycrfsuite.Tagger(verbose=False)
        tagger.open(path_model + "model_%s_" % opt + filename_model)

        truepos, falsepos, falseneg = (0, 0, 0)
        for sent, corr_labels in zip(X_test, y_test):
            pred_labels = tagger.tag(sent)
            trueposAdd, falseposAdd, falsenegAdd = F1_token(pred_labels, corr_labels, label_select)
            truepos += trueposAdd
            falsepos += falseposAdd
            falseneg += falsenegAdd

        precision[filename] = "%.2f" % (truepos / (truepos + falsepos + 0.01) * 100)
        recall[filename] = "%.2f" % (truepos / (truepos + falseneg + 0.01) * 100)
        truepos_o += truepos
        falsepos_o += falsepos
        falseneg_o += falseneg

    precision["overall"] = "%.2f" % (truepos_o / (truepos_o + falsepos_o + 0.01) * 100)
    recall["overall"] = "%.2f" % (truepos_o / (truepos_o + falseneg_o + 0.01) * 100)
    F1 = (
        2
        * float(precision["overall"])
        * float(recall["overall"])
        / (float(precision["overall"]) + float(recall["overall"]) + 1e-5)
    )

    # If there is pos and neg differentiation for the attitudes
    if valence == True and label.__class__ == list:
        label = "attitud_posneg"

    # Dump the different results on results
    ext = ".txt"
    dump_resultats(precision, recall, F1, path_results + "results_CVLOO_%s_" % (opt) + label + "_" + label_select + ext)
    if LOOP_TEST:  # if loop test dump the ALL the results in 1 file
        dump_resultats_total(
            precision, recall, F1, path_results + "results_total_%s_" % (opt) + label + "_" + label_select + ext, params
        )
    return_sent = "Precision : %s, Recall : %s, F1 : %.2f" % (precision["overall"], recall["overall"], F1)
    return return_sent