def split(datafile, trfile, vfile, tfile, vtruefile, truefile): allfeat = readfeats(datafile) afeat, tfeat, y_adapt, y_test = train_test_split( allfeat, getlabels(allfeat), test_size=float(1) / 3, stratify=getlabels(allfeat)) writingfile(tfile, tfeat) writingfile(truefile, y_test) trfeat, vfeat, y_train, y_validation = train_test_split( afeat, getlabels(afeat), test_size=float(3) / 7, stratify=getlabels(afeat)) writingfile(trfile, trfeat) writingfile(vfile, vfeat) writingfile(vtruefile, y_validation)
def CV(ci, gamma, kernel, wi, trfile, tfile, CV_trfile, CV_tfile, CV_pfile, CV_truey): trfeat = readfeats(trfile) tfeat = readfeats(tfile) cv = ShuffleSplit(n=len(tfeat), n_iter=5, test_size=0.2, random_state=0) f1_list = [] p_list = [] r_list = [] count = 0 for train_index, test_index in cv: count += 1 cv_tfile = CV_tfile + str(count) cv_pfile = CV_pfile + str(count) cv_truey = CV_truey + str(count) X_train = trfeat X_test = tfeat[test_index] y_test = getlabels(X_test) writingfile(cv_tfile, X_test) writingfile(cv_truey, y_test) # traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-q", trfile] traincmd = [ "svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-w1", "1", "-w0", "1", "-q", trfile ] traincmd[2] = ci traincmd[4] = kernel traincmd[6] = gamma traincmd[10] = wi subprocess.call(traincmd) model = trfile.split('/')[-1] + '.model' predcmd = ["svm-predict", cv_tfile, model, cv_pfile] p = subprocess.Popen(predcmd, stdout=subprocess.PIPE) output, err = p.communicate() y_test, y_predicted = feval(cv_truey, cv_pfile) p_list.append( metrics.precision_score(y_test, y_predicted, average='binary')) r_list.append( metrics.recall_score(y_test, y_predicted, average='binary')) f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary')) recall = np.mean(np.asarray(r_list)) precision = np.mean(np.asarray(p_list)) f1 = np.mean(np.asarray(f1_list)) print "C=%s, gamma=%s and wi=%s, its F1 is %f" % (ci, gamma, wi, f1) return [recall, precision, f1]
def CV(ci,gamma,kernel,wi,trfile,tfile,CV_trfile,CV_tfile,CV_pfile,CV_truey): trfeat = readfeats(trfile) tfeat = readfeats(tfile) cv = ShuffleSplit(n=len(tfeat), n_iter=5, test_size=0.2, random_state=0) f1_list = [] p_list = [] r_list = [] count = 0 for train_index, test_index in cv: count+=1 cv_tfile = CV_tfile+str(count) cv_pfile = CV_pfile+str(count) cv_truey = CV_truey+str(count) X_train=trfeat X_test=tfeat[test_index] y_test = getlabels(X_test) writingfile(cv_tfile, X_test) writingfile(cv_truey, y_test) # traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-q", trfile] traincmd=["svm-train", "-c", "0.001", "-t", "2", "-g", "1", "-w1", "1", "-w0", "1", "-q", trfile] traincmd[2]=ci traincmd[4]=kernel traincmd[6]=gamma traincmd[10]=wi subprocess.call(traincmd) model=trfile.split('/')[-1]+'.model' predcmd=["svm-predict", cv_tfile, model, cv_pfile] p = subprocess.Popen(predcmd, stdout=subprocess.PIPE) output, err = p.communicate() y_test, y_predicted = feval(cv_truey, cv_pfile) p_list.append(metrics.precision_score(y_test, y_predicted, average='binary')) r_list.append(metrics.recall_score(y_test, y_predicted, average='binary')) f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary')) recall = np.mean(np.asarray(r_list)) precision = np.mean(np.asarray(p_list)) f1 = np.mean(np.asarray(f1_list)) print "C=%s, gamma=%s and wi=%s, its F1 is %f"%(ci,gamma,wi,f1) return [recall, precision, f1]
def split2(datafile, trfile, tfile, truefile): allfeat = readfeats(datafile) trfeat, tfeat, y_train, y_test = train_test_split( allfeat, getlabels(allfeat), test_size=float(1) / 7, stratify=getlabels(allfeat)) writingfile(trfile, trfeat) writingfile(tfile, tfeat) writingfile(truefile, y_test)
def CV(ci, wi, s1, s2, s3, trfile, CV_trfile, CV_tfile, CV_pfile, CV_truey, emo): feats = readfeats(trfile) cv = StratifiedShuffleSplit(y=getlabels(feats), n_iter=5, test_size=0.2, random_state=0) f1_list = [] p_list = [] r_list = [] count = 0 for train_index, test_index in cv: count += 1 cv_trfile = CV_trfile + str(count) cv_tfile = CV_tfile + str(count) cv_pfile = CV_pfile + str(count) cv_truey = CV_truey + str(count) X_train = feats[train_index] X_test = feats[test_index] y_test = getlabels(X_test) writingfile(cv_trfile, X_train) writingfile(cv_tfile, X_test) writingfile(cv_truey, y_test) adapt(str(ci), str(wi), str(s1), str(s2), str(s3), cv_trfile, cv_tfile, cv_pfile, emo) y_test, y_predicted = feval(cv_truey, cv_pfile) p_list.append( metrics.precision_score(y_test, y_predicted, average='binary')) r_list.append( metrics.recall_score(y_test, y_predicted, average='binary')) f1_list.append(metrics.f1_score(y_test, y_predicted, average='binary')) recall = np.mean(np.asarray(r_list)) precision = np.mean(np.asarray(p_list)) f1 = np.mean(np.asarray(f1_list)) print "C=%s, wi=%s and (s1=%s ; s2=%s ; s3=%s), its F1 is %f" % ( ci, wi, s1, s2, s3, f1) return recall, precision, f1