def train_test_model(train_datafile, test_datafile): from svmutil import svm_read_problem, svm_train, svm_predict y,x = svm_read_problem(train_datafile) m = svm_train(y,x,'-t 0 -e .01 -m 1000 -h 0') y_test,x_test = svm_read_problem(test_datafile) p_labs,p_acc,p_vals = svm_predict(y_test,x_test,m) return p_labs, p_acc, p_vals
def main(): """Build representation from files.""" parser = argparse.ArgumentParser() parser.add_argument("-r", "--regen", help="increase output verbosity", action="store_true") args = parser.parse_args() if args.regen: train_class, train_frames = read_dir('dataset/train/*.txt', ' ') test_class, test_frames = read_dir('dataset/test/*.txt', ' ') rad_d1 = [get_rad(action) for action in train_frames] rad_d1_t = [get_rad(action) for action in test_frames] cust_d1 = [get_custom(action) for action in train_frames] cust_d1_t = [get_custom(action) for action in test_frames] with open('rad_d1', 'w') as f: f.writelines([' '.join(line) + '\n' for line in rad_d1]) with open('rad_d1.t', 'w') as f: f.writelines([' '.join(line) + '\n' for line in rad_d1_t]) with open('cust_d1', 'w') as f: f.writelines([' '.join(line) + '\n' for line in cust_d1]) with open('cust_d1.t', 'w') as f: f.writelines([' '.join(line) + '\n' for line in cust_d1_t]) rad_d2 = reformat(rad_d1, train_class) rad_d2_t = reformat(rad_d1_t, test_class) cust_d2 = reformat(cust_d1, train_class) cust_d2_t = reformat(cust_d1_t, test_class) with open('rad_d2', 'w') as f: f.writelines([' '.join(line) + '\n' for line in rad_d2]) with open('rad_d2.t', 'w') as f: f.writelines([' '.join(line) + '\n' for line in rad_d2_t]) with open('cust_d2', 'w') as f: f.writelines([' '.join(line) + '\n' for line in cust_d2]) with open('cust_d2.t', 'w') as f: f.writelines([' '.join(line) + '\n' for line in cust_d2_t]) # Train the models and test with them y, x = svm.svm_read_problem('rad_d2') y_t, x_t = svm.svm_read_problem('rad_d2.t') rad_model = svm.svm_train(y, x, '-s 0 -t 2 -c 2 -g 0.0005') rad_labels, (rad_acc, *_), _ = svm.svm_predict(y_t, x_t, rad_model) y, x = svm.svm_read_problem('cust_d2') y_t, x_t = svm.svm_read_problem('cust_d2.t') cust_model = svm.svm_train(y, x, '-s 0 -t 2 -c 8 -g 0.0005') cust_labels, (cust_acc, *_), _ = svm.svm_predict(y_t, x_t, cust_model) print(f'RAD accuracy: {rad_acc}') print(f'Custom accuracy: {cust_acc}')
def simulate_skin_with_svm(data_size=None, train_params='-s 0 -t 0'): """Simulate learning skin data set with libsvm.""" convert_skin_to_svm(data_size) train_y, train_x = svm.svm_read_problem('skin_train.svm') model = svm.svm_train(train_y, train_x, train_params) test_y, test_x = svm.svm_read_problem('skin_test.svm') p_label, p_acc, p_val = svm.svm_predict(test_y, test_x, model)
def train_grasp(grasp_type, side): """ train_grasp(grasp_type): train linear svm classifier for specific grasp type\n grasp_type: hand grasping type\n side: left hand or right hand\n """ #train datafile = "model/traindata_grasp_" + grasp_type + "_" + side if not os.path.isfile(datafile): srcfile = "data/feature_grasp_train.csv" write_svmdata_grasp(srcfile, datafile, grasp_type, side, 0) label_train, data_train = svmutil.svm_read_problem(datafile) modelfile = "model/model_grasp_" + grasp_type + "_" + side m = [] if not os.path.isfile(modelfile): print("train model: " + grasp_type + "_" + side) label_weight = {} for v in label_train: if label_weight.has_key(v): label_weight[v] += 1 else: label_weight[v] = 1 sorted_label = sorted(label_weight) param_weight = ' ' for v in sorted_label: label_weight[v] = float( len(label_train)) / len(sorted_label) / label_weight[v] param_weight += '-w%d %f ' % (v, label_weight[v]) prob = svmutil.svm_problem(label_train, data_train) param = svmutil.svm_parameter('-t 0 -b 1 -q' + param_weight) print '-t 0 -b 1 -q' + param_weight # param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q') m = svmutil.svm_train(prob, param) svmutil.svm_save_model(modelfile, m) else: print("load model: " + grasp_type + "_" + side) m = svmutil.svm_load_model(modelfile) #test grasp_info = read_info("data/feature_grasp_test.csv", side) datafile = "model/testdata_grasp_" + grasp_type + "_" + side if not os.path.isfile(datafile): srcfile = "data/feature_grasp_test.csv" write_svmdata_grasp(srcfile, datafile, grasp_type, side, 1) label_test, data_test = svmutil.svm_read_problem(datafile) p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1') f_result = open("result/grasp_" + grasp_type + "_" + side + ".csv", "w") for i in range(len(p_label)): f_result.write(grasp_info[i] + ", " + str(int(label_test[i])) + ", " + str(int(p_label[i])) + ", ") f_result.write("[%.4f]\n" % p_val[i][0]) f_result.close()
def train_test_model(train_datafile, test_datafile): """ :param train_datafile: relative path :param test_datafile: relative path :return: trains a libsvm model using the training data in train_datafile and test in on the data in test_datafile """ from svmutil import svm_read_problem, svm_predict, svm_train y_test, x_test = svm_read_problem(test_datafile) y_train, x_train = svm_read_problem(train_datafile) model = svm_train(y_train, x_train, '-t 0 -e .01 -m 1000 -h 0') p_labs, p_acc, p_vals = svm_predict(y_test, x_test, model) return p_labs, p_acc, p_vals
def trainmodel(self,train,cv,test,modelsavepath): y,x = svmutil.svm_read_problem(train)#读入训练数据 # ycv,xcv = svm_read_problem(cv)#读入验证集 # ytest,xtest=svm_read_problem(test)#读入测试集 prob = svm.svm_problem(y, x) param = svm.svm_parameter('-t 2 -c 0.5 -g 0.125 -b 1') model = svmutil.svm_train(prob, param) yt,xt = svmutil.svm_read_problem(train)#??????????? p_labs, p_acc, p_vals = svmutil.svm_predict(yt, xt, model,'-b 1') svmutil.svm_save_model(modelsavepath, model)#save model # model = svmutil.svm_load_model('model_file')#读取model pass
def getmodelandaccuary(line): """ 训练模型,预测结果 :param line: hdfs上的要读取的features目录的目录 :return: 准确率 """ train_y = [] train_x = [] test_y = [] test_x = [] for i in range(0, len(line) - 1): y, x = svmutil.svm_read_problem(line[i]) train_y.extend(y[0:90]) train_x.extend(x[0:90]) test_y.extend(y[90:100]) test_x.extend(x[90:100]) train_random_index = [i for i in range(len(train_y))] test_random_index = [i for i in range(len(test_y))] random.shuffle(train_random_index) random.shuffle(test_random_index) random_train_y = [train_y[x] for x in train_random_index] random_train_x = [train_x[x] for x in train_random_index] random_test_y = [test_y[x] for x in test_random_index] random_test_x = [test_x[x] for x in test_random_index] m = svmutil.svm_train(random_train_y, random_train_x, "-s 0 -t 2 -c 32 -g 8 -b 1") predict_label, accuary, prob_estimates = svmutil.svm_predict( random_test_y, random_test_x, m, '-b 1') svmutil.svm_save_model( '/home/sunbite/Co_KNN_SVM_TMP/CoKNNSVM2.model', m) return accuary
def test(self): self.model = svmutil.svm_load_model('./news/svmmodel') self.yt, self.xt = svmutil.svm_read_problem('./news/temp/svmtest.txt') print('Start to predict...') p_label, p_acc, p_val = svmutil.svm_predict(self.yt, self.xt, self.model) self.yt, self.xt = svmutil.svm_read_problem('./news/temp/svmtest.txt') confusion_matrix = pd.DataFrame([[0 for i in range(10)] for j in range(10)], index=list(range(1, 11)), columns=list(range(1, 11)), dtype='int') for i in range(len(p_label)): confusion_matrix[p_label[i]][self.yt[i]] += 1 confusion_matrix.to_csv('confusion_matrix_svm.csv') self.show_test_result(confusion_matrix)
def main(): #transform('features.train','features-libsvm.train') #transform('features.test','features-libsvm.test') y_train, X_train = svmutil.svm_read_problem('../features-libsvm.train') #w_len=Problem11(X_train,y_train) nSV = Problem12(X_train, y_train) np.savez('../sSV.npz', sSV=np.array(nSV))
def getmodelandaccuary(line): """ 训练模型,预测结果 :param line: hdfs上的要读取的features目录的目录 :return: 准确率 """ train_y = [] train_x = [] test_y = [] test_x = [] for i in range(0, len(line) - 1): y, x = svmutil.svm_read_problem(line[i]) train_y.extend(y[0:60]) train_x.extend(x[0:60]) test_y.extend(y[60:300]) test_x.extend(x[60:300]) train_random_index = [i for i in range(len(train_y))] test_random_index = [i for i in range(len(test_y))] random.shuffle(train_random_index) random.shuffle(test_random_index) random_train_y = [train_y[x] for x in train_random_index] random_train_x = [train_x[x] for x in train_random_index] random_test_y = [test_y[x] for x in test_random_index] random_test_x = [test_x[x] for x in test_random_index] Co_KNN_SVM.co_knn_svm(random_train_y, random_train_x, random_test_y, random_test_x)
def Train_SVM_model(PathToFeatureFile):#生成训练模型文件,model.txt #print(PathToFeatureFile) y,x=svmutil.svm_read_problem(PathToFeatureFile) model=svmutil.svm_train(y,x) modelFilePath=os.path.join(os.path.split(PathToFeatureFile)[0],f"model_{PathToFeatureFile.split('_',1)[1]}") svmutil.svm_save_model(modelFilePath,model) print(modelFilePath)
def classify(filename, classLabel=0): str = "/Thu_Life/CS/SVM/data/trainData/Test_SVMFile/singleSVM_TestFile" f = open(str, "wb") t = VSM.TextToVector2(filename) slabel = ("%d ") % classLabel if len(t) > 0: f.write(slabel) for k in range(len(t)): str1 = ("%d:%d ") % (t[k][0], t[k][1]) f.write(str1) f.write("\r\n") else: print "The text can't be classified to the Four Labels!" return "Can't be classified ! " f.close() y, x = svmutil.svm_read_problem(str) model = svmutil.svm_load_model("../SVMTrainFile250.model") label, b, c = svmutil.svm_predict(y, x, model) print "label", label if label[0] == 1: print "类别:财经" return "财经" elif label[0] == 2: print "类别:IT" return "IT" elif label[0] == 3: print "类别:旅游" return "旅游" elif label[0] == 4: print "类别:体育" return "体育"
def __init__(self,train_feature_file = TRAIN_FEATURE_FILE): if os.path.exists(SAVED_MODEL): self.model = svmutil.svm_load_model(SAVED_MODEL) else: y, x = svmutil.svm_read_problem(train_feature_file) self.model = svmutil.svm_train(y, x, '-c 4') svmutil.svm_save_model(SAVED_MODEL,self.model)
def testing_fold(self, k, m): folder_name = 'fold_' + str(k) + '/' file_name = self.filename + '.test.svm' y, x = svmutil.svm_read_problem(folder_name + file_name) print "Y", len(y), "X", len(x) p_label, p_acc, p_val = svmutil.svm_predict(y, x, m) return float(p_acc[0])
def __init__(self, train_feature_file=TRAIN_FEATURE_FILE): if os.path.exists(SAVED_MODEL): self.model = svmutil.svm_load_model(SAVED_MODEL) else: y, x = svmutil.svm_read_problem(train_feature_file) self.model = svmutil.svm_train(y, x, '-c 4') svmutil.svm_save_model(SAVED_MODEL, self.model)
def main(): y, x = svmutil.svm_read_problem("char_recon_shuffled.db") x_train = x[:1800] y_train = y[:1800] x_val = x[1800:] y_val = y[1800:] gamma_c_pairs = [GammaCPair(1.0 / (2.0 * (3.0 ** log_sigma) ** 2), 3.0 ** log_C) for log_sigma in [7] for log_C in [3] ] log_log_pairs = [[log_sigma, log_C] for log_sigma in np.arange(6, 10, 0.5) for log_C in np.arange(0, 5, 0.5) ] def cv(gamma_c): return get_cross_val(x_train, y_train, x_val, y_val, gamma_c) cross_val = [] for gamma_c in gamma_c_pairs: cross_val.append(cv(gamma_c)) f = open("gamma_c", "w") for i in range(len(gamma_c_pairs)): f.write("{0} {1} {2}\n".format(log_log_pairs[i][0], log_log_pairs[i][1], cross_val[i])) f.close()
def train_manipulation(mnp_type): """ """ #train datafile = "manipulate/model/traindata_mnp_"+mnp_type if not os.path.isfile(datafile): srcfile = "manipulate/data/feature_mnp_train.csv" write_svmdata_mnp(srcfile, datafile, mnp_type, 0) label_train,data_train = svmutil.svm_read_problem(datafile) modelfile = "manipulate/model/model_mnp_"+mnp_type m = [] if not os.path.isfile(modelfile): print("train model: " + mnp_type) label_weight = {} for v in label_train: if label_weight.has_key(v): label_weight[v]+=1 else: label_weight[v]=1 sorted_label = sorted(label_weight) param_weight = ' ' for v in sorted_label: label_weight[v] = float(len(label_train))/len(sorted_label)/label_weight[v] param_weight += '-w%d %f ' % (v, label_weight[v]) prob = svmutil.svm_problem(label_train, data_train) param = svmutil.svm_parameter('-t 0 -b 1 -q'+param_weight) print '-t 0 -b 1 -q'+param_weight m = svmutil.svm_train(prob, param) svmutil.svm_save_model(modelfile, m) else: print("load model: " + mnp_type) m = svmutil.svm_load_model(modelfile) # weight = read_model_linearSVM(modelfile, len(data_train[0])) # print weight #test mnp_info = read_info("manipulate/data/feature_mnp_test.csv") datafile = "manipulate/model/testdata_mnp_"+mnp_type if not os.path.isfile(datafile): srcfile = "manipulate/data/feature_mnp_test.csv" write_svmdata_mnp(srcfile, datafile, mnp_type, 1) label_test,data_test = svmutil.svm_read_problem(datafile) p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1') f_result = open("manipulate/result/mnp_" + mnp_type + ".csv", "w") for i in range(len(p_label)): f_result.write(mnp_info[i]+", "+str(int(label_test[i]))+", "+str(int(p_label[i]))+", ") f_result.write("[%.4f]\n" % p_val[i][0]) f_result.close()
def main(): y_train, X_train = svmutil.svm_read_problem('../features-libsvm.train') y_test, X_test = svmutil.svm_read_problem('../features-libsvm.test') c = 0.1 y_train = list(2 * (np.array(y_train) == 0) - 1) y_test = list(2 * (np.array(y_test) == 0) - 1) eout = list() for t in range(-3, 2): gamma = 10**t model = svmutil.svm_train(y_train, X_train, '-g %s -c %s' % (gamma, c)) _, (accuracy, _, _), _ = svmutil.svm_predict(y_test, X_test, model) eout.append(100 - accuracy) plt.plot(range(-3, 2), eout) plt.xlabel("log10(gamma)") plt.ylabel("Eout on test set") plt.title("C=0.1") plt.show()
def doRecognize(): yt, xt = svm_read_problem(testTxt) model = svm_load_model(cdir + "model") p_label, p_acc, p_val = svm_predict(yt, xt, model) #p_label即为识别的结果 code = '' for item in p_label: code = code + str(int(item)) print code
def predict(trainFileName, testFileName, cgp={ 'c': 1024, 'g': 16, 'p': 0.015625 }): from svmutil import svm_read_problem, svm_train, svm_predict y, x = svm_read_problem(trainFileName) yt, xt = svm_read_problem(testFileName) model = svm_train( y, x, '-s 4 -t 2 -c %f -g %f -p %f' % (cgp['c'], cgp['g'], cgp['p'])) p_label, p_acc, p_val = svm_predict(yt, xt, model) return (p_label, p_acc, p_val)
def train_fold(self, k, c): self.print_debug('train_fold', k, c) folder_name = 'fold_' + str(k) + '/' file_name = self.filename + '.data.svm' y, x = svmutil.svm_read_problem(folder_name + file_name) prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-s 0 -t 0 -c ' + str(c)) m = svmutil.svm_train(prob, param) return m
def train_fold_polynomial(self, k, c, g, d): self.print_debug('train_fold_polynomial', k, c, g, d) folder_name = 'fold_' + str(k) + '/' file_name = self.filename + '.data.svm' y, x = svmutil.svm_read_problem(folder_name + file_name) prob = svmutil.svm_problem(y, x, isKernel=True) param = svmutil.svm_parameter('-s 0 -t 1 -c ' + str(c) + ' -g ' + str(g) + ' -d ' + str(d)) m = svmutil.svm_train(prob, param) return m
def svm_model_test(svm_root, model_path): """ 使用测试集测试模型 :return: """ svm_path = r"C:\Python36\risk_down\libsvm" sys.path.append(svm_path + r"\python") import svmutil # 训练总模型 y, x = svmutil.svm_read_problem('./' + 'total_feature.txt')
def fit(self, document, keep_on_doc=False): if isinstance(document, str): data_file = str(document) y, x = svmutil.svm_read_problem(data_file) elif isinstance(document, object): y = [] x =[] for y_section,x_section in self.extract_xy(self.get_svm_data(document, keep_on_doc)): y.append(y_section) x.append(x_section) return y,x
def main(): y_train, X_train = svmutil.svm_read_problem('../features-libsvm.train') # y_test, X_test = svmutil.svm_read_problem('../features-libsvm.test') y_train = list(2 * (np.array(y_train) == 0) - 1) # y_test = list(2 * (np.array(y_test) == 0) - 1) R=1000 res=list() for r in range(R): print("round %s"%r) res.append(validate(X_train,y_train)) pd.Series(res).hist()
def predict_main(file, model, out, now_path): file_path = os.path.join(now_path, file) out_path = os.path.join(now_path, out) y_p, x_p = svm_read_problem(file_path) model = svm_load_model(model) p_label, p_acc, p_val = svm_predict(y_p, x_p, model) result = 'True,Predict\n' for i in range(len(y_p)): result += str(y_p[i]) + ',' + str(p_label[i]) + '\n' with open(out_path + '_predict.csv', 'w', encoding="utf-8") as f: f.write(result) f.close()
def GetCode_SKB_123(picpath,types='path'): model=svmutil.svm_load_model(modelpath_SKB_123) if types=='path':img=Image.open(picpath).convert('L').point([0]*165+[1]*(256-165),'1') elif types=='img':img=picpath pixel_cnt_list=GetFeature(img,'img') tempath=os.path.join(os.getcwd(),'temp.txt') with open(tempath,'w') as f: f.writelines(GetFeatureStr(pixel_cnt_list,0)) y0,x0=svmutil.svm_read_problem(tempath) os.remove(tempath) p_label,p_acc,p_val=svmutil.svm_predict(y0,x0,model,'-q') return chr(int(p_label[0]))
def solve(im): im = denoise(im) im_list = crop(im) tmp = str(round(time.time() * 1000)) with open(tmp, "w") as f: for im in im_list: f.write("0" + feature(im)) y, x = svmutil.svm_read_problem(tmp) model = svmutil.svm_load_model(MODEL_FILE) p_label, p_acc, p_val = svmutil.svm_predict(y, x, model) os.remove(tmp) return "".join([chr(round(x)) for x in p_label])
def train(): print("Starting train process.") ''' for line in open(__FEATURE_FILE): line = line.split(None, 1) if len(line) == 1: line += [''] print(line) ''' y, x = svmutil.svm_read_problem(__FEATURE_FILE) model = svmutil.svm_train(y, x) svmutil.svm_save_model(Function.MODEL_FILE, model) print("train process done.") return
def train_model_main2(model_path,file): # LibSVM # 按照 libSVM 指定的格式生成一组带特征值和标记值的向量文件 svm_path = r"C:\Python36\risk_down\libsvm" sys.path.append(svm_path + r"\python") import svmutil # y, x = svmutil.svm_read_problem('./' + str(n) + '_feature.txt') y, x = svmutil.svm_read_problem(file) # --------------------------------------------------- model = svmutil.svm_train(y[:50], x[:50], '-c 4') # print(model_path,file.split('/')[2][0]) model_path = './'+model_path+'/'+file.split('/')[2][0]+"_feature.model" svmutil.svm_save_model(model_path, model)
def train(train_file, fold_num, mapping=None, parameters={'-c': 1}, multilabel=None, output_folder=None): ''' Given a training instance file and (optionally) a label mapping, adapt the training vectors to fit the mapping and build an SVM model. ''' global classifier if not output_folder: output_folder = 'models' output_folder = os.path.join(output_folder, 'fold-{0:02d}'.format(fold_num+1)) if not os.path.exists(output_folder): os.makedirs(output_folder) if multilabel: temp_labels, instances = alt_read_problem(train_file) temp_labels = [[mapping[l] for l in label] for label in temp_labels] labels = [] for temp_labs in temp_labels: if multilabel[1] in temp_labs: labels.append(multilabel[1]) else: assert len(set(temp_labs)) == 1, "Something appears to be wrong with the intermediate mapping. There's still more than one label present for an instance: {0}".format(temp_labs) labels.append([l for l in temp_labs if l != multilabel[1]][0]) else: labels, instances = svm_read_problem(train_file) labels = reMap(labels, mapping) # Exclude instances which have 0 as their label labels, instances = zip(*[(label, instance) for label, instance in zip(labels, instances) if label != 0]) distribution = {} for label in set(labels): distribution[label] = float(labels.count(label))/len(labels) paramstring = '' for param, value in parameters.items(): paramstring += ' {0} {1}'.format(param, value) if classifier == 'libsvm' and '-b' not in parameters.keys(): paramstring += ' -b 1' paramstring += ' -q' if multilabel: model_file = os.path.join(output_folder, os.path.basename(train_file) + '.{0}.model'.format(multilabel[0])) else: model_file = os.path.join(output_folder, os.path.basename(train_file) + '.model') print '---training' model = svm_train(labels, instances, paramstring) svm_save_model(model_file, model) return model_file, distribution
def train_attribute(attribute, side): """ train_attribute(str, float): train linear svm classifier for specific attribute\n attribute: should be one from ["prismatic", "sphere", "flat", "rigid"] """ #train datafile = "model/traindata_attribute_"+attribute+"_"+side if not os.path.isfile(datafile): srcfile = "data/feature_attribute_train.csv" write_svmdata_attribute(srcfile, datafile, attribute, side, 0) label_train,data_train = svmutil.svm_read_problem(datafile) modelfile = "model/model_attribute_"+attribute+"_"+side m = [] if not os.path.isfile(modelfile): print("train model: " + attribute+"_"+side) prob = svmutil.svm_problem(label_train, data_train) param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q') m = svmutil.svm_train(prob, param) svmutil.svm_save_model(modelfile, m) else: print("load model: " + attribute+"_"+side) m = svmutil.svm_load_model(modelfile) #test attribute_info = read_info("data/feature_attribute_test.csv", side) datafile = "model/testdata_attribute_"+attribute+"_"+side if not os.path.isfile(datafile): srcfile = "data/feature_attribute_test.csv" write_svmdata_attribute(srcfile, datafile, attribute, side, 1) label_test,data_test = svmutil.svm_read_problem(datafile) p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1') [precision, recall, f1, accuracy] = getF1(label_test, p_label) print "F1: [%.4f, %.4f, %.4f] Accuracy: %.4f" % (precision, recall, f1, accuracy) f_result = open("result/attribute_"+attribute+"_"+side+".csv", "w") for i in range(len(p_label)): f_result.write(attribute_info[i]+", "+str(int(label_test[i]))+", "+str(int(p_label[i]))+", ") f_result.write("[%.4f]\n" % p_val[i][0]) f_result.close()
def loadTraces(self, fname, reLoad=False): if not reLoad: self._fnames.append(fname) y, x = svm_read_problem(fname) vectors = [] record = [] for t, i in zip( x, range(len(x)) ): vectors.append( [ v for k, v in t.items() ] ) record.append( str(fname)+'_'+str(i) ) self._traces += vectors self._labels += y self._records += record
def GetCode_register(picpath):#识别注册验证码 code=[];model=svmutil.svm_load_model(modelpath_login_register)#打开训练文件 im=Pre_process_register(picpath) Imgs=Crop_Vertical(im,th=3)#切割后的图片列表 for img in Imgs: pixel_cnt_list=GetFeature(img,'img') tempath=os.path.join(os.getcwd(),'temp.txt')#临时文件,用于存储将要识别的图片的特征 with open(tempath,'w') as f: f.writelines(GetFeatureStr(pixel_cnt_list,0)) y0,x0=svmutil.svm_read_problem(tempath) os.remove(tempath) p_label,p_acc,p_val=svmutil.svm_predict(y0,x0,model,'-q') code.append(int(p_label[0])) code=''.join(list(map(chr,code))) return code
def over_sample( sample_df, dump_svmlight_file ): train_y, train_x = svm_read_problem(dump_svmlight_file) with open(dump_svmlight_file,"rb") as f: ans_lines = np.array(f.readlines()); sample_df['label'] = pd.Series(train_y) positive = sample_df[ sample_df['label'] >= 0.99 ].index balence = ( len(train_y) - len(positive) ) / len(positive); for i in range(balence): ans_lines = np.hstack((ans_lines, ans_lines[positive])) sample_df = pd.concat([sample_df, sample_df.iloc[positive,:] ], axis=0) new_idx = np.random.permutation(len(ans_lines)) ans_lines = ans_lines[new_idx] sample_df = sample_df.reset_index(drop=True) sample_df = sample_df.iloc[new_idx,:].reset_index(drop=True) with open(dump_svmlight_file,"wb") as f: f.writelines(list(ans_lines)) return sample_df[ np.setdiff1d(sample_df.columns,['label']) ], dump_svmlight_file
def classification(): print('test image classification...') y, x = svmutil.svm_read_problem('./show_data/test_data') print('model loading...') model = svmutil.svm_load_model('./train_data/model_weight') p_label, p_acc, p_val = svmutil.svm_predict(y, x, model) for label in p_label: if int(label) == 0: print('正常 眼底图像') elif int(label) == 1: print('轻度白内障 眼底图像') elif int(label) == 2: print('中度白内障 眼底图像') else: print('重度白内障 眼底图像')
def run(self): c, g = None, None if self.cexp != None: c = 2.0**self.cexp if self.gexp != None: g = 2.0**self.gexp cmdline = self.get_cmd(c,g) cmd = shlex.split(cmdline) path = cmd[0] input = cmd[-1] param = ' '.join(cmd[1:-1]) sys.path.append(path) from svmutil import svm_read_problem, svm_train y, x = svm_read_problem(input) cv_acc = svm_train(y, x, param) return (self.cexp, self.gexp, cv_acc)
def run(self): c, g = None, None if self.cexp != None: c = 2.0**self.cexp if self.gexp != None: g = 2.0**self.gexp cmdline = self.get_cmd(c, g) cmd = shlex.split(cmdline) path = cmd[0] input = cmd[-1] param = ' '.join(cmd[1:-1]) sys.path.append(path) from svmutil import svm_read_problem, svm_train y, x = svm_read_problem(input) cv_acc = svm_train(y, x, param) return (self.cexp, self.gexp, cv_acc)
def test(test_file, model_file, fold_num, mapping=None, multilabel=None, debug=False): ''' Returns predicted labels, prediction values and the as the the test labels (potentially remapped). Requires a test instance file and a corresponding model file. Remaps the labels in the test file (optional), classifies the test instances against the model. ''' if multilabel: temp_labels, instances = alt_read_problem(test_file) temp_labels = [[mapping[l] for l in label] for label in temp_labels] labels = [] for temp_labs in temp_labels: if multilabel[1] in temp_labs: labels.append(multilabel[1]) else: assert len(set(temp_labs)) == 1, "Something appears to be wrong with the intermediate mapping. There's still more than one label present for an instance: {0}".format(temp_labs) labels.append([l for l in temp_labs if l != multilabel[1]][0]) else: labels, instances = svm_read_problem(test_file) labels = reMap(labels, mapping) # Exclude instances which have 0 as their label labels, instances = zip(*[(label, instance) for label, instance in zip(labels, instances) if label != 0]) if debug: with open(os.path.basename(test_file) + '.remap', 'w') as fout: for label, instance in zip(labels, instances): output = '{0} '.format(str(label)) for idx, val in instance.items(): output += '{0}:{1} '.format(str(idx), str(val)) output = output.strip() + '\n' fout.write(output) model = svm_load_model(model_file) print '---testing' if classifier == 'liblinear': pred_labels, ACC, pred_values, label_order = svm_predict(labels, instances, model) elif classifier == 'libsvm': pred_labels, (ACC, MSC, SCC), pred_values = svm_predict(labels, instances, model, options='-b 1') label_order = model.get_labels() return pred_labels, pred_values, label_order, labels
def classify2(filename, classLabel=0): str = "/Thu_Life/CS/SVM/data/trainData/Test_SVMFile/singleSVM_TestFile" f = open(str, "wb") t = VSM.TextToVector2(filename) slabel = ("%d ") % classLabel if len(t) > 0: f.write(slabel) for k in range(len(t)): str1 = ("%d:%d ") % (t[k][0], t[k][1]) f.write(str1) f.write("\r\n") else: return 0 f.close() y, x = svmutil.svm_read_problem(str) model = svmutil.svm_load_model("../SVMTrainFile250.model") label, b, c = svmutil.svm_predict(y, x, model) return label[0]
def cSvmTrainSet(self): dataMat = [] labelMat = [] file_pattern = re.compile('^%s-\d.rec' % self.legalName) for fdata in os.listdir('data'): if file_pattern.match(fdata): data,label = loadDataSet('data/'+fdata,1) else: data,label = loadDataSet('data/'+fdata,-1) dataMat+=data labelMat+=label libSvmFormatSaveInFile(dataMat,labelMat,'data_format/%s.mat' % self.legalName) # todo: duoxiancheng y,x = svmutil.svm_read_problem('data_format/%s.mat' % self.legalName) prob = svmutil.svm_problem(y,x,isKernel = True) param = svmutil.svm_parameter('-t 0 ') self.model = svmutil.svm_train(prob,param) print self.model
def easy_predict(train_name, test_name): range_file = train_name + ".range" model_file = train_name + ".model" assert os.path.exists(test_name),"testing file not found" assert os.path.exists(model_file),"model file not found" assert os.path.exists(range_file),"range file not found" file_name = os.path.split(test_name)[1] scaled_test_file = file_name + ".scale" predict_test_file = file_name + ".predict" cmd = '{0} -r "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, test_name, scaled_test_file) print('Scaling testing data...') Popen(cmd, shell = True, stdout = PIPE).communicate() (prob_y, prob_x) = svmutil.svm_read_problem(scaled_test_file) model = svmutil.svm_load_model(model_file) pred_labels, (ACC, MSE, SCC), pred_values = svmutil.svm_predict(prob_y, prob_x, model, "-b 1") return pred_values,MSE,SCC
def svm_predict(problem_filepath, model_filepath): """ Using LibSVM to predict result of a problem Returns ------- (ids, labels) """ # Reading a problem ids, x = svmutil.svm_read_problem(problem_filepath) print "len(x) = ", len(x) # Preparing a model model = svmutil.svm_load_model(model_filepath) # Predicting y = [-2] * len(x) p_label, p_acc, p_val = svmutil.svm_predict(y, x, model) return (ids, p_label)
__author__ = 'yunlinz' import svm import svmutil import sklearn.cross_validation import numpy import matplotlib.pyplot as plt import time import ctypes # load data y_train, x_train = svmutil.svm_read_problem('splice_noise_train.txt') y_test, x_test = svmutil.svm_read_problem('splice_noise_test.txt') # scale x_train to [-1,1] x_train_sum = {} x_train_min = {} x_train_max = {} x_train_rng = {} x_train_scaled = [] x_test_scaled = [] for d in x_train: for i in range(1, 60, 1): if i in x_train_min: if d[i] < x_train_min[i]: x_train_min[i] = d[i] else: x_train_min[i] = d[i] if i in x_train_max: if d[i] > x_train_max[i]:
all_features = extractSift(fname) all_files_labels[fname] = 4 # label is unknown print "---------------------" print "## loading codebook from " + codebook_file with open(codebook_file, 'rb') as f: codebook = load(f) print "---------------------" print "## computing visual word histograms" word_histgram = computeHistograms(codebook, all_features[fname]) print "---------------------" print "## write the histograms to file to pass it to the svm" nclusters = codebook.shape[0] writeHistogramsToFile(nclusters, all_files_labels, fname, word_histgram, HISTOGRAMS_FILE) print "---------------------" print "## test data with svm" y,x=svmutil.svm_read_problem(HISTOGRAMS_FILE) model=svmutil.svm_load_model(model_file) result = svmutil.svm_predict(y,x,model) cat_label = load(open("cat.txt", "rb" )) print cat_label print result[0][0]
#coding=utf-8 import svmutil y,x=svmutil.svm_read_problem('../thuboy') m=svmutil.svm_load_model('../SVMTrain250.model') #m=svmutil.svm_train(y,x,'-c 5') p1,p2,p3=svmutil.svm_predict(y,x,m) print p1 print p2 print p3
import svmutil as svm import glob import csv import os # files = glob.glob('./*.ml') # predictfiles = glob.glob('./*.predict') # for deletefile in predictfiles: # os.remove(deletefile) # for currentFile in files: # if "trainning" in currentFile: y, x = svm.svm_read_problem('./trainning.ml') yt, xt = svm.svm_read_problem('./predict.ml') # print yt, xt m = svm.svm_train(y, x, '-c 32768 -g 0.125') p_label, p_acc, p_val = svm.svm_predict(yt, xt, m) print p_label # csv.writer(csvfile, delimiter=',') with open('./predict.ml.predict', 'wb') as f: # writer = csv.writer(f,quoting=csv.QUOTE_NONE) for label in p_label: f.write(str(int(label))) f.write("\n")
# print(len(vecWords)) #................................................................................ #添加tf-idf特征值到input向量中 for j in range(len(vecWords)): vecInput[j].append(tfidf(totalList,vecWords[j])) #.................................................................................. #生成libsvm训练使用的文本文件 print('#####################################') print('生成libsvm训练文档..................') print('#####################################') svmfile=open(svmtrainfile,'w') for k in range(len(vecOut)): svmfile.write(str(vecOut[k])+' 1:'+str(vecInput[k][0])+' 2:'+str(vecInput[k][1])+' 3:'+str(vecInput[k][2])+' 4:'+str(vecInput[k][3])+'\r') svmfile.close() #.................................................................................. y,x=svmutil.svm_read_problem(svmtrainfile) print('#####################################') print('svm训练中..................') print('#####################################') # print(x) # print('#####################################') # print(y) m=svmutil.svm_train(y,x) print('#####################################') print('svm测试中..................') print('#####################################') fsvmR=open(svmResultFile,'w') for each in svmutil.svm_predict(y,x,m): fsvmR.writelines(str(each)) fsvmR.close() # print(m)
def main(argv): # Dpark initialize dpark = DparkContext() # number of the training and testing set num_train = 6000 num_test = 6000 # Loading the dataset data = svm_read_problem('echo_liveness.01.libsvm') y, x = data # Preparing training and testing data if len(x) != len(y): print("The labels and features are not accorded!") sys.exit() x_live = [x[i] for i in find(y, 1.0)] x_stu = [x[i] for i in find(y, 0.0)] n_live = len(x_live) n_stu = len(x_stu) ind_live = range(n_live) ind_stu = range(n_stu) random.shuffle(ind_live) random.shuffle(ind_stu) x_te = [x_live[i] for i in ind_live[num_train : num_test + num_train]] + \ [x_stu[i] for i in ind_stu[num_train : num_test + num_train]] y_te = [1.0] * len(ind_live[num_train : num_test + num_train]) + \ [-1.0]*len(ind_stu[num_train : num_test + num_train]) x_tr = [x_live[i] for i in ind_live[:num_train]] + \ [x_stu[i] for i in ind_stu[:num_train]] y_tr = [1.0]*num_train + [-1.0]*num_train # dpark version def map_iter(i): y_tr_examplar = [-1.0] * len(y_tr) y_tr_examplar[i] = 1.0 # opt = '-t 0 -w1 ' + str(len(y_tr)) + ' -w-1 1 -b 1 -q' # It is suggested in Efros' paper that: # C1 0.5, C2 0.01 opt = '-t 0 -w1 0.5 -w-1 0.01 -b 1 -q' m = svm_train(y_tr_examplar, list(x_tr), opt) p_label, p_acc, p_val = svm_predict(y_te, x_te, m, '-b 1 -q') p_val = np.array(p_val) # p_val = np.delete(p_val,1,1) # shape = (N, 1) p_val = p_val[:, 0] # shape = (N, ) return p_val p_vals = dpark.makeRDD( range(len(y_tr)) ).map( map_iter ).collect() val = np.array(p_vals).T # for-loop version ''' # Examplar SVM Training ensemble_model = [] # DPark for i in range(len(y_tr)): y_tr_examplar = [-1.0] * len(y_tr) y_tr_examplar[i] = 1.0; #opt = '-t 0 -w1 ' + str(len(y_tr)) + ' -w-1 1 -b 1 -q' # It is suggested in Efros' paper that: # C1 0.5, C2 0.01 opt = '-t 0 -w1 0.5 -w-1 0.01 -b 1 -q' m = svm_train(y_tr_examplar, x_tr, opt) ensemble_model.append(m) print("The %s-th examplar SVM has been trained" %i) # Calibaration, to be updated # Since we adopt the probability estimation model of LIB_SVM, Calibrating seems unnecessary # Ensembly Classify val = np.zeros((len(y_te),1)) for m in ensemble_model: p_label, p_acc, p_val = svm_predict(y_te, x_te, m, '-b 1 -q') p_val = np.array(p_val) p_val = np.delete(p_val,1, 1) val = np.hstack((val, p_val)) if val.shape[1] != len(y_tr) + 1: print "Chaos!" val = np.delete(val,0,1) print 'val.shape =', val.shape ''' # KNN k = num_train / 8 sorted_index = val.argsort(axis=1) sorted_index = sorted_index.T[::-1].T p_label = [] for index in sorted_index: nearest_samples = [] for sample_index in index[:k]: nearest_samples.append(y_tr[sample_index]) n,bins,dummy = plt.hist(nearest_samples, 2, normed=1, facecolor='r', alpha=0.75) if n[0] > n[1]: p_label.append(-1.0) else: p_label.append(1.0) # evaluation rate, pos_rate, neg_rate = evaluation(y_te, p_label) print("The Examplar SVM framework achieves a precision of %f" % rate)
def multipule_eval_for_logistic(test_corpus_dir, feature_map_character, feature_map_numeric, feature_show, args): """ liblinearのlogisticで作成したモデル(一文ごとにラベルを判断)を行う 一回でも+1が発生すれば,文書にラベルが付与されたと見なす """ env = "pine" if env == "pine": # change below by an environment libsvm_wrapper_path = "/home/kensuke-mi/opt/libsvm-3.17/python/" elif env == "local": libsvm_wrapper_path = "/Users/kensuke-mi/opt/libsvm-3.17/python/" liblinear_wrapper_path = "/Users/kensuke-mi/opt/liblinear-1.94/python/" sys.path.append(liblinear_wrapper_path) sys.path.append(libsvm_wrapper_path) import liblinearutil import svmutil if args.save_performance == True: performance_out = codecs.open("./performance_result." + args.experiment_no, "w", "utf-8") performance_out.write(args.experiment_no + u"\n") performance_out.write(u"-" * 30 + u"\n") # 確信度の閾値 threshold = float(args.threshold) # 確信度を表示するか?オプション show_confidence = False # 確信度の平均値 average_confidence = 0 # +1のインスタンス数 times_plus_1_ins = 0 num_docs_having_motif = {} stop = args.stop tfidf_flag = args.tfidf exno = args.experiment_no model_dir_path = "../get_thompson_motif/classifier/logistic_2nd/" model_path_list = load_files(model_dir_path, "logistic." + exno) # 分類器が正しい判断をした回数を保存する.つまりCAP(gold motif tag, candidate by classifier) num_of_correct_decision = {} precision_sum = 0 recall_sum = 0 F_sum = 0 h_loss_sum = 0 subset_acc_sum = 0 ex_p_sum = 0 ex_r_sum = 0 ex_f_sum = 0 acc_sum = 0 classifier_return_1_sum = 0 for test_file in load_files(test_corpus_dir): # ============================================================ result_map = {} gold_map = {} # ------------------------------------------------------------ if args.persian_test == True: # 文ごとにインスタンスの作成 sentences_in_document, motif_stack = file_loader_sentence(test_file, stop) elif args.dutch_test == True: sentences_in_document, motif_stack = file_loader_dutch_sentence(test_file, stop) # ------------------------------------------------------------ out_libsvm_format_sentence( sentences_in_document, feature_map_character, feature_map_numeric, feature_show, tfidf_flag ) test_y, test_x = svmutil.svm_read_problem("test.data") # ------------------------------------------------------------ for model_file in model_path_list: decision_flag = False alphabet_label = unicode(os.path.basename(model_file)[0], "utf-8") result_map[alphabet_label] = 0 model = liblinearutil.load_model(model_file) p_label, p_acc, p_val = liblinearutil.predict(test_y, test_x, model, "-b 1") for index, result_label in enumerate(p_label): if result_label == 1.0: decision_flag = True # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if decision_flag == True and p_val[index][0] > threshold: result_map[alphabet_label] = 1 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ------------------------------------------------------------ for gold_motif in motif_stack: alphabet_label = gold_motif[0] gold_map[alphabet_label] = 1 # ------------------------------------------------------------ gold_cap_result = {} for label in result_map: if result_map[label] == 1 and label in gold_map: gold_cap_result[label] = 1 # ------------------------------------------------------------ try: average = average_confidence / times_plus_1_ins except ZeroDivisionError: average = 0 print "-" * 30 print "Filename:{}\nEstimated:{}\nGold:{}\nCorrect Estimation:{}".format( test_file, result_map, gold_map, gold_cap_result ) print "average confidence is {}".format(average) print "-" * 30 # ------------------------------------------------------------ h_loss_sum = calc_h_loss(result_map, gold_map, h_loss_sum) subset_acc_sum = calc_subset_acc(result_map, gold_map, subset_acc_sum) ex_p_sum, ex_r_sum, ex_f_sum, acc_sum = calc_p_r_f(result_map, gold_map, ex_p_sum, ex_r_sum, ex_f_sum, acc_sum) classifier_return_1_sum += get_the_num_of_1_classifier(result_map) # ============================================================ num_of_files = len(load_files(test_corpus_dir)) h_loss = h_loss_sum / num_of_files subset_acc = float(subset_acc_sum) / num_of_files ex_p = ex_p_sum / num_of_files ex_r = ex_r_sum / num_of_files ex_f = ex_f_sum / num_of_files acc = acc_sum / num_of_files classifier_return_1 = float(classifier_return_1_sum) / num_of_files precision_ave = precision_sum / len(load_files(test_corpus_dir)) recall_ave = recall_sum / len(load_files(test_corpus_dir)) F_ave = F_sum / len(load_files(test_corpus_dir)) print "-" * 30 print "RESULT for {} files classification".format(len(load_files(test_corpus_dir))) hamming_format = u"Hamming Loss:{}".format(h_loss) subset_format = u"Subset Accuracy(classification accuracy):{}".format(subset_acc) else_format = u"example-based precision:{} example-based recall:{} example-based F:{} accuracy:{}".format( ex_p, ex_r, ex_f, acc ) classifier_format = u"Ave. number of classifier which returns 1:{}".format(classifier_return_1) print hamming_format print subset_format print else_format print classifier_format if args.save_performance == True: performance_out.write(hamming_format + u"\n") performance_out.write(subset_format + u"\n") performance_out.write(else_format + u"\n") performance_out.write(classifier_format + u"\n") performance_out.close()
p_label_train, p_acc_train, p_val_train = svmutil.svm_predict(y[:i], x[:i], m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y[i:], x[i:], m) print p_acc_train[0], "\t", p_acc_validation[0], "\n" training_examples.append(i) train_accuracy.append(p_acc_train[0]) validation_accuracy.append(p_acc_validation[0]) return training_examples, train_accuracy, validation_accuracy def get_cross_val(x, y, x_val, y_val, gamma_c): prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m) return p_acc_validation[0] if __name__ == '__main__': y, x = svmutil.svm_read_problem("char_recon_shuffled.db") gamma = 1.0 / (2.0 * (3.0 ** 7) ** 2) C = 3.0 ** 3.0 prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(C, gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m)
thresh=K_THRESH) with open(DATASETPATH + CODEBOOK_FILE, 'wb') as f: dump(codebook, f, protocol=HIGHEST_PROTOCOL) print "---------------------" print "## compute the visual words histograms for each image" all_word_histgrams = {} for imagefname in all_features: word_histgram = computeHistograms(codebook, all_features[imagefname]) all_word_histgrams[imagefname] = word_histgram print "---------------------" print "## write the histograms to file to pass it to the svm" writeHistogramsToFile(nclusters,all_files_labels,all_files,all_word_histgrams,DATASETPATH + HISTOGRAMS_FILE) print "---------------------" print "## train svm" y,x=svmutil.svm_read_problem(DATASETPATH + HISTOGRAMS_FILE) model_file=svmutil.svm_train(y,x) svmutil.svm_save_model('trainingdata.svm.model', model_file) print "--------------------" print "## outputting results" print "codebook file: " + DATASETPATH + CODEBOOK_FILE print "category ==> label" for cat in cat_label: print '{0:13} ==> {1:6d}'.format(cat, cat_label[cat]) dump(cat_label, open("cat.txt", "wb" ))
#coding: utf-8 import svmutil import outputLIBSVMformat train_label, train_data = svmutil.svm_read_problem("./train_libsvmFormat.txt") #カーネル関数は線型 model = svmutil.svm_train(train_label, train_data, "-t 0") test_label, test_data = svmutil.svm_read_problem("./test_libsvmFormat.txt") p_label, p_acc, p_val = svmutil.svm_predict(test_label, test_data, model)