Ejemplo n.º 1
0
def main(ci,gamma,wi,data_dir,p):
    emotions = [x[1] for x in os.walk(data_dir)][0]
    c = [30,30,0.1,0.01,10]
    count=0
    for emo in emotions:
        print 80*"*"
        print "Emotion is", emo
        dir = data_dir+emo
        trainfile = dir+"/train.scale"
        CVtestfile = dir+"/test.scale.cv"
        testfile = dir+"/test.scale"
        predfile = dir+"/pred"
        truefile = dir+'/y_test'
        cv_trfile = dir+'/cv/train.cv'
        cv_tfile = dir+'/cv/test.cv'
        cv_pfile = dir+'/cv/predresults.cv'
        cv_truey = dir+'/cv/y_test.cv'
        ci = c[count]
        count+=1

        if 'scale' in p:
            print "---Feature scaling"
            scaling(dir,emo)
        else:
            pass

        if 'tune' in p:
            print "---Parameter tuning"
            tunec=tuneC(trainfile,CVtestfile,cv_trfile,cv_tfile,cv_pfile,cv_truey)
            bestc=tunec[0][0]
            bestgamma=tunec[0][1]
            bestwi=tunec[0][3]
            bestCV=tunec[0][-1]
#                for i in tunec:
#                    print "CV: Learning LibSVM with kernel=rbf and C=%f and gamma=(1/num_feature), its F1 is %f"%(i[0],i[-1])
            print "Tuning: Five-fold CV on %s, the best F1 is %f at c=%f, gamma=%f and wi=%s"%(trainfile,bestCV,bestc,bestgamma,str(bestwi))

        if ('tune' in p) and ('pred' in p):
            print "---Model fitting and prediction"
            predict(str(bestc),str(bestgamma),str(tunec[0][2]),str(bestwi),trainfile, testfile, predfile, emo)

        if ('tune' not in p) and ('pred' in p):
            print "---Model fitting and prediction"
            print 'C=%s, gamma=%s and wi=%s'%(str(ci), str(gamma), str(wi))
            predict(str(ci), str(gamma), str(2), str(wi), trainfile, testfile, predfile, emo)

        if 'evaluation' in p:
            print "---Evaluation"
            y_test, y_predicted = feval(truefile, predfile)
            print 'Precision score: ', metrics.precision_score(y_test, y_predicted, average='binary')
            print 'Recall score: ', metrics.recall_score(y_test, y_predicted, average='binary')
            print 'F1 score: ', metrics.f1_score(y_test, y_predicted, average='binary')
            print 80*"*"
Ejemplo n.º 2
0
def main(data_dir):
    emotions = [x[1] for x in os.walk(data_dir)][0]
    data_split = "/30-30/"  # define the test data split ratio here and make sure you have the sub folders
    for emo in emotions:
        print 80 * "*"
        print "Emotion is", emo
        dir = data_dir + emo
        datafile = dir + "/test.scale"
        #        predfile = dir+"/pred"
        #        truefile = dir+'/y_test'
        trainfile = dir + data_split + emo + ".train"
        valfile = dir + data_split + emo + ".val"
        valpredfile = dir + data_split + emo + ".val.pred"
        valtruefile = dir + data_split + emo + ".val.true"
        testfile = dir + data_split + emo + ".test"
        predfile = dir + data_split + emo + ".test.pred"
        truefile = dir + data_split + emo + ".test.true"
        cv_trfile = dir + '/cv/train.cv'
        cv_tfile = dir + '/cv/test.cv'
        cv_pfile = dir + '/cv/predresults.cv'
        cv_truey = dir + '/cv/y_test.cv'

        print "---Parameter tuning"
        tune_ = tune(emo, trainfile, valfile, valpredfile, valtruefile,
                     cv_trfile, cv_tfile, cv_pfile, cv_truey)
        bestc = tune_[0][0]
        bestw = tune_[0][1]
        bests1 = tune_[0][2]
        bests2 = tune_[0][3]
        bests3 = tune_[0][4]
        bestr = tune_[0][-3]
        bestp = tune_[0][-2]
        bestf1 = tune_[0][-1]
        print "Tuning aSVM on %s, the best F1 is %f at c=%s, wi=%s and (s1=%s ; s2=%s ; s3=%s)" % (
            valfile, bestf1, str(bestc), str(bestw), str(bests1), str(bests2),
            str(bests3))

        print "---Model fitting and prediction"
        adapt(str(bestc), str(bestw), str(bests1), str(bests2), str(bests3),
              trainfile, testfile, predfile, emo)
        print "---Evaluation"
        y_test, y_predicted = feval(truefile, predfile)
        print 'Precision score: ', metrics.precision_score(y_test,
                                                           y_predicted,
                                                           average='binary')
        print 'Recall score: ', metrics.recall_score(y_test,
                                                     y_predicted,
                                                     average='binary')
        print 'F1 score: ', metrics.f1_score(y_test,
                                             y_predicted,
                                             average='binary')
        print 80 * "*"
Ejemplo n.º 3
0
def main(ci, gamma, wi, data_dir, p):
    emotions = [x[1] for x in os.walk(data_dir)][0]
    for emo in emotions:
        print 80*"*"
        print "Emotion is", emo
        dir = data_dir+emo
        trainfile = dir+"/train.scale"
        testfile = dir+"/test.scale"
        predfile = dir+"/test.pred"
        truefile = dir+'/y_test'
        cv_trfile = dir+'/cv/train.cv'
        cv_tfile = dir+'/cv/test.cv'
        cv_pfile = dir+'/cv/predresults.cv'
        cv_truey = dir+'/cv/y_test.cv'

        if 'scale' in p:
            print "---Feature scaling"
            scaling(dir, emo)
        else:
            pass

        if 'tune' in p:
            print '---Parameter tuning'
            tune_ = tune(trainfile, cv_trfile, cv_tfile, cv_pfile, cv_truey)
            bestc = tune_[0][0]
            bestgamma = tune_[0][1]
            bestwi = tune_[0][2]
            bestF = tune_[0][-1]
            bestP = tune_[0][-2]
            bestR = tune_[0][-3]
            print "Five-fold CV on %s, best Precision is %f"%(trainfile, bestP)
            print "Five-fold CV on %s, best Recall is %f"%(trainfile, bestR)
            print "Five-fold CV on %s, best F1 is %f at c=%s, gamma=%s and wi=%s"%(trainfile, bestF, str(bestc), str(bestgamma), str(bestwi))

        if ('tune' in p) and ('pred' in p):
            print "---Model fitting and prediction"
            predict(str(bestc), str(bestgamma), str(2), str(bestwi), trainfile, testfile, predfile, dir, emo)
        if ('tune' not in p) and ('pred' in p):
            print "---Model fitting and prediction"
#            print 'C=%s, gamma=%s and wi=%s'%(str(ci), str(gamma), str(wi))
            predict2(testfile, predfile, dir, emo)

        if 'evaluation' in p:
            print "---Evaluation"
            y_test, y_predicted = feval(truefile, predfile)
            print 'Precision score: ', metrics.precision_score(y_test, y_predicted, average='binary')
            print 'Recall score: ', metrics.recall_score(y_test, y_predicted, average='binary')
            print 'F1 score: ', metrics.f1_score(y_test, y_predicted, average='binary')
            print 80*"*"
Ejemplo n.º 4
0
def CV(ci, gamma, kernel, wi, trfile, tfile, CV_trfile, CV_tfile, CV_pfile,
       CV_truey):
    trfeat = readfeats(trfile)
    tfeat = readfeats(tfile)
    cv = ShuffleSplit(n=len(tfeat), n_iter=5, test_size=0.2, random_state=0)
    f1_list = []
    p_list = []
    r_list = []
    count = 0
    for train_index, test_index in cv:
        count += 1
        cv_tfile = CV_tfile + str(count)
        cv_pfile = CV_pfile + str(count)
        cv_truey = CV_truey + str(count)
        X_train = trfeat
        X_test = tfeat[test_index]
        y_test = getlabels(X_test)
        writingfile(cv_tfile, X_test)
        writingfile(cv_truey, y_test)
        #        traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-q", trfile]
        traincmd = [
            "svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-w1", "1",
            "-w0", "1", "-q", trfile
        ]
        traincmd[2] = ci
        traincmd[4] = kernel
        traincmd[6] = gamma
        traincmd[10] = wi
        subprocess.call(traincmd)
        model = trfile.split('/')[-1] + '.model'
        predcmd = ["svm-predict", cv_tfile, model, cv_pfile]
        p = subprocess.Popen(predcmd, stdout=subprocess.PIPE)
        output, err = p.communicate()
        y_test, y_predicted = feval(cv_truey, cv_pfile)
        p_list.append(
            metrics.precision_score(y_test, y_predicted, average='binary'))
        r_list.append(
            metrics.recall_score(y_test, y_predicted, average='binary'))
        f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary'))
    recall = np.mean(np.asarray(r_list))
    precision = np.mean(np.asarray(p_list))
    f1 = np.mean(np.asarray(f1_list))
    print "C=%s, gamma=%s and wi=%s, its F1 is %f" % (ci, gamma, wi, f1)
    return [recall, precision, f1]
Ejemplo n.º 5
0
def CV(ci,gamma,kernel,wi,trfile,tfile,CV_trfile,CV_tfile,CV_pfile,CV_truey):
    trfeat = readfeats(trfile)
    tfeat = readfeats(tfile)
    cv = ShuffleSplit(n=len(tfeat), n_iter=5, test_size=0.2, random_state=0)
    f1_list = []
    p_list = []
    r_list = []
    count = 0
    for train_index, test_index in cv:
        count+=1
        cv_tfile = CV_tfile+str(count)
        cv_pfile = CV_pfile+str(count)
        cv_truey = CV_truey+str(count)
        X_train=trfeat
        X_test=tfeat[test_index]
        y_test = getlabels(X_test)
        writingfile(cv_tfile, X_test)
        writingfile(cv_truey, y_test)
#        traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-q", trfile]
        traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-w1", "1", "-w0", "1", "-q", trfile]
        traincmd[2]=ci
        traincmd[4]=kernel
        traincmd[6]=gamma
        traincmd[10]=wi
        subprocess.call(traincmd)
        model=trfile.split('/')[-1]+'.model'
        predcmd=["svm-predict", cv_tfile, model, cv_pfile]
        p = subprocess.Popen(predcmd, stdout=subprocess.PIPE)
        output, err = p.communicate()
        y_test, y_predicted = feval(cv_truey, cv_pfile)
        p_list.append(metrics.precision_score(y_test, y_predicted, average='binary'))
        r_list.append(metrics.recall_score(y_test, y_predicted, average='binary'))
        f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary'))
    recall = np.mean(np.asarray(r_list))
    precision = np.mean(np.asarray(p_list))
    f1 = np.mean(np.asarray(f1_list))
    print "C=%s, gamma=%s and wi=%s, its F1 is %f"%(ci,gamma,wi,f1)
    return [recall, precision, f1]
Ejemplo n.º 6
0
def CV(ci, wi, s1, s2, s3, trfile, CV_trfile, CV_tfile, CV_pfile, CV_truey,
       emo):
    feats = readfeats(trfile)
    cv = StratifiedShuffleSplit(y=getlabels(feats),
                                n_iter=5,
                                test_size=0.2,
                                random_state=0)
    f1_list = []
    p_list = []
    r_list = []
    count = 0
    for train_index, test_index in cv:
        count += 1
        cv_trfile = CV_trfile + str(count)
        cv_tfile = CV_tfile + str(count)
        cv_pfile = CV_pfile + str(count)
        cv_truey = CV_truey + str(count)
        X_train = feats[train_index]
        X_test = feats[test_index]
        y_test = getlabels(X_test)
        writingfile(cv_trfile, X_train)
        writingfile(cv_tfile, X_test)
        writingfile(cv_truey, y_test)
        adapt(str(ci), str(wi), str(s1), str(s2), str(s3), cv_trfile, cv_tfile,
              cv_pfile, emo)
        y_test, y_predicted = feval(cv_truey, cv_pfile)
        p_list.append(
            metrics.precision_score(y_test, y_predicted, average='binary'))
        r_list.append(
            metrics.recall_score(y_test, y_predicted, average='binary'))
        f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary'))
    recall = np.mean(np.asarray(r_list))
    precision = np.mean(np.asarray(p_list))
    f1 = np.mean(np.asarray(f1_list))
    print "C=%s, wi=%s and (s1=%s ; s2=%s ; s3=%s), its F1 is %f" % (
        ci, wi, s1, s2, s3, f1)
    return recall, precision, f1
Ejemplo n.º 7
0
def main(ci, gamma, wi, data_dir, p):
    emotions = [x[1] for x in os.walk(data_dir)][0]
    c = [30, 30, 0.1, 0.01, 10]
    count = 0
    for emo in emotions:
        print 80 * "*"
        print "Emotion is", emo
        dir = data_dir + emo
        trainfile = dir + "/train.scale"
        CVtestfile = dir + "/test.scale.cv"
        testfile = dir + "/test.scale"
        predfile = dir + "/pred"
        truefile = dir + '/y_test'
        cv_trfile = dir + '/cv/train.cv'
        cv_tfile = dir + '/cv/test.cv'
        cv_pfile = dir + '/cv/predresults.cv'
        cv_truey = dir + '/cv/y_test.cv'
        ci = c[count]
        count += 1

        if 'scale' in p:
            print "---Feature scaling"
            scaling(dir, emo)
        else:
            pass

        if 'tune' in p:
            print "---Parameter tuning"
            tunec = tuneC(trainfile, CVtestfile, cv_trfile, cv_tfile, cv_pfile,
                          cv_truey)
            bestc = tunec[0][0]
            bestgamma = tunec[0][1]
            bestwi = tunec[0][3]
            bestCV = tunec[0][-1]
            #                for i in tunec:
            #                    print "CV: Learning LibSVM with kernel=rbf and C=%f and gamma=(1/num_feature), its F1 is %f"%(i[0],i[-1])
            print "Tuning: Five-fold CV on %s, the best F1 is %f at c=%f, gamma=%f and wi=%s" % (
                trainfile, bestCV, bestc, bestgamma, str(bestwi))

        if ('tune' in p) and ('pred' in p):
            print "---Model fitting and prediction"
            predict(str(bestc), str(bestgamma), str(tunec[0][2]), str(bestwi),
                    trainfile, testfile, predfile, emo)

        if ('tune' not in p) and ('pred' in p):
            print "---Model fitting and prediction"
            print 'C=%s, gamma=%s and wi=%s' % (str(ci), str(gamma), str(wi))
            predict(str(ci), str(gamma), str(2), str(wi), trainfile, testfile,
                    predfile, emo)

        if 'evaluation' in p:
            print "---Evaluation"
            y_test, y_predicted = feval(truefile, predfile)
            print 'Precision score: ', metrics.precision_score(
                y_test, y_predicted, average='binary')
            print 'Recall score: ', metrics.recall_score(y_test,
                                                         y_predicted,
                                                         average='binary')
            print 'F1 score: ', metrics.f1_score(y_test,
                                                 y_predicted,
                                                 average='binary')
            print 80 * "*"