def main(ci,gamma,wi,data_dir,p): emotions = [x[1] for x in os.walk(data_dir)][0] c = [30,30,0.1,0.01,10] count=0 for emo in emotions: print 80*"*" print "Emotion is", emo dir = data_dir+emo trainfile = dir+"/train.scale" CVtestfile = dir+"/test.scale.cv" testfile = dir+"/test.scale" predfile = dir+"/pred" truefile = dir+'/y_test' cv_trfile = dir+'/cv/train.cv' cv_tfile = dir+'/cv/test.cv' cv_pfile = dir+'/cv/predresults.cv' cv_truey = dir+'/cv/y_test.cv' ci = c[count] count+=1 if 'scale' in p: print "---Feature scaling" scaling(dir,emo) else: pass if 'tune' in p: print "---Parameter tuning" tunec=tuneC(trainfile,CVtestfile,cv_trfile,cv_tfile,cv_pfile,cv_truey) bestc=tunec[0][0] bestgamma=tunec[0][1] bestwi=tunec[0][3] bestCV=tunec[0][-1] # for i in tunec: # print "CV: Learning LibSVM with kernel=rbf and C=%f and gamma=(1/num_feature), its F1 is %f"%(i[0],i[-1]) print "Tuning: Five-fold CV on %s, the best F1 is %f at c=%f, gamma=%f and wi=%s"%(trainfile,bestCV,bestc,bestgamma,str(bestwi)) if ('tune' in p) and ('pred' in p): print "---Model fitting and prediction" predict(str(bestc),str(bestgamma),str(tunec[0][2]),str(bestwi),trainfile, testfile, predfile, emo) if ('tune' not in p) and ('pred' in p): print "---Model fitting and prediction" print 'C=%s, gamma=%s and wi=%s'%(str(ci), str(gamma), str(wi)) predict(str(ci), str(gamma), str(2), str(wi), trainfile, testfile, predfile, emo) if 'evaluation' in p: print "---Evaluation" y_test, y_predicted = feval(truefile, predfile) print 'Precision score: ', metrics.precision_score(y_test, y_predicted, average='binary') print 'Recall score: ', metrics.recall_score(y_test, y_predicted, average='binary') print 'F1 score: ', metrics.f1_score(y_test, y_predicted, average='binary') print 80*"*"
def main(data_dir): emotions = [x[1] for x in os.walk(data_dir)][0] data_split = "/30-30/" # define the test data split ratio here and make sure you have the sub folders for emo in emotions: print 80 * "*" print "Emotion is", emo dir = data_dir + emo datafile = dir + "/test.scale" # predfile = dir+"/pred" # truefile = dir+'/y_test' trainfile = dir + data_split + emo + ".train" valfile = dir + data_split + emo + ".val" valpredfile = dir + data_split + emo + ".val.pred" valtruefile = dir + data_split + emo + ".val.true" testfile = dir + data_split + emo + ".test" predfile = dir + data_split + emo + ".test.pred" truefile = dir + data_split + emo + ".test.true" cv_trfile = dir + '/cv/train.cv' cv_tfile = dir + '/cv/test.cv' cv_pfile = dir + '/cv/predresults.cv' cv_truey = dir + '/cv/y_test.cv' print "---Parameter tuning" tune_ = tune(emo, trainfile, valfile, valpredfile, valtruefile, cv_trfile, cv_tfile, cv_pfile, cv_truey) bestc = tune_[0][0] bestw = tune_[0][1] bests1 = tune_[0][2] bests2 = tune_[0][3] bests3 = tune_[0][4] bestr = tune_[0][-3] bestp = tune_[0][-2] bestf1 = tune_[0][-1] print "Tuning aSVM on %s, the best F1 is %f at c=%s, wi=%s and (s1=%s ; s2=%s ; s3=%s)" % ( valfile, bestf1, str(bestc), str(bestw), str(bests1), str(bests2), str(bests3)) print "---Model fitting and prediction" adapt(str(bestc), str(bestw), str(bests1), str(bests2), str(bests3), trainfile, testfile, predfile, emo) print "---Evaluation" y_test, y_predicted = feval(truefile, predfile) print 'Precision score: ', metrics.precision_score(y_test, y_predicted, average='binary') print 'Recall score: ', metrics.recall_score(y_test, y_predicted, average='binary') print 'F1 score: ', metrics.f1_score(y_test, y_predicted, average='binary') print 80 * "*"
def main(ci, gamma, wi, data_dir, p): emotions = [x[1] for x in os.walk(data_dir)][0] for emo in emotions: print 80*"*" print "Emotion is", emo dir = data_dir+emo trainfile = dir+"/train.scale" testfile = dir+"/test.scale" predfile = dir+"/test.pred" truefile = dir+'/y_test' cv_trfile = dir+'/cv/train.cv' cv_tfile = dir+'/cv/test.cv' cv_pfile = dir+'/cv/predresults.cv' cv_truey = dir+'/cv/y_test.cv' if 'scale' in p: print "---Feature scaling" scaling(dir, emo) else: pass if 'tune' in p: print '---Parameter tuning' tune_ = tune(trainfile, cv_trfile, cv_tfile, cv_pfile, cv_truey) bestc = tune_[0][0] bestgamma = tune_[0][1] bestwi = tune_[0][2] bestF = tune_[0][-1] bestP = tune_[0][-2] bestR = tune_[0][-3] print "Five-fold CV on %s, best Precision is %f"%(trainfile, bestP) print "Five-fold CV on %s, best Recall is %f"%(trainfile, bestR) print "Five-fold CV on %s, best F1 is %f at c=%s, gamma=%s and wi=%s"%(trainfile, bestF, str(bestc), str(bestgamma), str(bestwi)) if ('tune' in p) and ('pred' in p): print "---Model fitting and prediction" predict(str(bestc), str(bestgamma), str(2), str(bestwi), trainfile, testfile, predfile, dir, emo) if ('tune' not in p) and ('pred' in p): print "---Model fitting and prediction" # print 'C=%s, gamma=%s and wi=%s'%(str(ci), str(gamma), str(wi)) predict2(testfile, predfile, dir, emo) if 'evaluation' in p: print "---Evaluation" y_test, y_predicted = feval(truefile, predfile) print 'Precision score: ', metrics.precision_score(y_test, y_predicted, average='binary') print 'Recall score: ', metrics.recall_score(y_test, y_predicted, average='binary') print 'F1 score: ', metrics.f1_score(y_test, y_predicted, average='binary') print 80*"*"
def CV(ci, gamma, kernel, wi, trfile, tfile, CV_trfile, CV_tfile, CV_pfile, CV_truey): trfeat = readfeats(trfile) tfeat = readfeats(tfile) cv = ShuffleSplit(n=len(tfeat), n_iter=5, test_size=0.2, random_state=0) f1_list = [] p_list = [] r_list = [] count = 0 for train_index, test_index in cv: count += 1 cv_tfile = CV_tfile + str(count) cv_pfile = CV_pfile + str(count) cv_truey = CV_truey + str(count) X_train = trfeat X_test = tfeat[test_index] y_test = getlabels(X_test) writingfile(cv_tfile, X_test) writingfile(cv_truey, y_test) # traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-q", trfile] traincmd = [ "svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-w1", "1", "-w0", "1", "-q", trfile ] traincmd[2] = ci traincmd[4] = kernel traincmd[6] = gamma traincmd[10] = wi subprocess.call(traincmd) model = trfile.split('/')[-1] + '.model' predcmd = ["svm-predict", cv_tfile, model, cv_pfile] p = subprocess.Popen(predcmd, stdout=subprocess.PIPE) output, err = p.communicate() y_test, y_predicted = feval(cv_truey, cv_pfile) p_list.append( metrics.precision_score(y_test, y_predicted, average='binary')) r_list.append( metrics.recall_score(y_test, y_predicted, average='binary')) f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary')) recall = np.mean(np.asarray(r_list)) precision = np.mean(np.asarray(p_list)) f1 = np.mean(np.asarray(f1_list)) print "C=%s, gamma=%s and wi=%s, its F1 is %f" % (ci, gamma, wi, f1) return [recall, precision, f1]
def CV(ci,gamma,kernel,wi,trfile,tfile,CV_trfile,CV_tfile,CV_pfile,CV_truey): trfeat = readfeats(trfile) tfeat = readfeats(tfile) cv = ShuffleSplit(n=len(tfeat), n_iter=5, test_size=0.2, random_state=0) f1_list = [] p_list = [] r_list = [] count = 0 for train_index, test_index in cv: count+=1 cv_tfile = CV_tfile+str(count) cv_pfile = CV_pfile+str(count) cv_truey = CV_truey+str(count) X_train=trfeat X_test=tfeat[test_index] y_test = getlabels(X_test) writingfile(cv_tfile, X_test) writingfile(cv_truey, y_test) # traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-q", trfile] traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-w1", "1", "-w0", "1", "-q", trfile] traincmd[2]=ci traincmd[4]=kernel traincmd[6]=gamma traincmd[10]=wi subprocess.call(traincmd) model=trfile.split('/')[-1]+'.model' predcmd=["svm-predict", cv_tfile, model, cv_pfile] p = subprocess.Popen(predcmd, stdout=subprocess.PIPE) output, err = p.communicate() y_test, y_predicted = feval(cv_truey, cv_pfile) p_list.append(metrics.precision_score(y_test, y_predicted, average='binary')) r_list.append(metrics.recall_score(y_test, y_predicted, average='binary')) f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary')) recall = np.mean(np.asarray(r_list)) precision = np.mean(np.asarray(p_list)) f1 = np.mean(np.asarray(f1_list)) print "C=%s, gamma=%s and wi=%s, its F1 is %f"%(ci,gamma,wi,f1) return [recall, precision, f1]
def CV(ci, wi, s1, s2, s3, trfile, CV_trfile, CV_tfile, CV_pfile, CV_truey, emo): feats = readfeats(trfile) cv = StratifiedShuffleSplit(y=getlabels(feats), n_iter=5, test_size=0.2, random_state=0) f1_list = [] p_list = [] r_list = [] count = 0 for train_index, test_index in cv: count += 1 cv_trfile = CV_trfile + str(count) cv_tfile = CV_tfile + str(count) cv_pfile = CV_pfile + str(count) cv_truey = CV_truey + str(count) X_train = feats[train_index] X_test = feats[test_index] y_test = getlabels(X_test) writingfile(cv_trfile, X_train) writingfile(cv_tfile, X_test) writingfile(cv_truey, y_test) adapt(str(ci), str(wi), str(s1), str(s2), str(s3), cv_trfile, cv_tfile, cv_pfile, emo) y_test, y_predicted = feval(cv_truey, cv_pfile) p_list.append( metrics.precision_score(y_test, y_predicted, average='binary')) r_list.append( metrics.recall_score(y_test, y_predicted, average='binary')) f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary')) recall = np.mean(np.asarray(r_list)) precision = np.mean(np.asarray(p_list)) f1 = np.mean(np.asarray(f1_list)) print "C=%s, wi=%s and (s1=%s ; s2=%s ; s3=%s), its F1 is %f" % ( ci, wi, s1, s2, s3, f1) return recall, precision, f1
def main(ci, gamma, wi, data_dir, p): emotions = [x[1] for x in os.walk(data_dir)][0] c = [30, 30, 0.1, 0.01, 10] count = 0 for emo in emotions: print 80 * "*" print "Emotion is", emo dir = data_dir + emo trainfile = dir + "/train.scale" CVtestfile = dir + "/test.scale.cv" testfile = dir + "/test.scale" predfile = dir + "/pred" truefile = dir + '/y_test' cv_trfile = dir + '/cv/train.cv' cv_tfile = dir + '/cv/test.cv' cv_pfile = dir + '/cv/predresults.cv' cv_truey = dir + '/cv/y_test.cv' ci = c[count] count += 1 if 'scale' in p: print "---Feature scaling" scaling(dir, emo) else: pass if 'tune' in p: print "---Parameter tuning" tunec = tuneC(trainfile, CVtestfile, cv_trfile, cv_tfile, cv_pfile, cv_truey) bestc = tunec[0][0] bestgamma = tunec[0][1] bestwi = tunec[0][3] bestCV = tunec[0][-1] # for i in tunec: # print "CV: Learning LibSVM with kernel=rbf and C=%f and gamma=(1/num_feature), its F1 is %f"%(i[0],i[-1]) print "Tuning: Five-fold CV on %s, the best F1 is %f at c=%f, gamma=%f and wi=%s" % ( trainfile, bestCV, bestc, bestgamma, str(bestwi)) if ('tune' in p) and ('pred' in p): print "---Model fitting and prediction" predict(str(bestc), str(bestgamma), str(tunec[0][2]), str(bestwi), trainfile, testfile, predfile, emo) if ('tune' not in p) and ('pred' in p): print "---Model fitting and prediction" print 'C=%s, gamma=%s and wi=%s' % (str(ci), str(gamma), str(wi)) predict(str(ci), str(gamma), str(2), str(wi), trainfile, testfile, predfile, emo) if 'evaluation' in p: print "---Evaluation" y_test, y_predicted = feval(truefile, predfile) print 'Precision score: ', metrics.precision_score( y_test, y_predicted, average='binary') print 'Recall score: ', metrics.recall_score(y_test, y_predicted, average='binary') print 'F1 score: ', metrics.f1_score(y_test, y_predicted, average='binary') print 80 * "*"