def __init__(self, train_feature_file=TRAIN_FEATURE_FILE): if os.path.exists(SAVED_MODEL): self.model = svmutil.svm_load_model(SAVED_MODEL) else: y, x = svmutil.svm_read_problem(train_feature_file) self.model = svmutil.svm_train(y, x, '-c 4') svmutil.svm_save_model(SAVED_MODEL, self.model)
def main(path, k): prabs = [] lns = [] for kk in range(0,k-1): testLabel = [] trainPoint = [] trainLabel = [] testPoint = [] wcCount = 0 for u in os.listdir(path): if u[-2:] == 'WC':r wcCount += 1 filePath = path+u WC = pickle.load(open(filePath, 'rb')) if wcCount % k == 0 + kk: testLabel.append(int(u[1])) testPoint.append(WC) else: trainLabel.append(int(u[1])) trainPoint.append(WC) lns.append(len(testLabel)) prob = svmutil.svm_problem(trainLabel, trainPoint) param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q') m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1') prabs.append(p_acc[0])
def kfold(data, labels, k): try: import svmutil except: return 0 prabs = [] for xxx in range(0, 10): picks = np.random.choice(len(data), len(data) / k, replace=False) testLabel = labels[picks] testPoint = data[picks] trainPoint = data[np.setdiff1d(range(0, len(data)), picks)] trainLabel = labels[np.setdiff1d(range(0, len(data)), picks)] trainLabel = trainLabel.tolist() trainPoint = trainPoint.tolist() prob = svmutil.svm_problem(trainLabel, trainPoint) param = svmutil.svm_parameter('-t 3 -c 4 -b 1 -q') testLabel = testLabel.tolist() testPoint = testPoint.tolist() m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1') prabs.append(p_acc[0]) print sum(prabs) / float(len(prabs)) print 'std' + str(np.std(prabs)) return sum(prabs) / float(len(prabs))
def train_list(self): print('Start to train.') paras = '-c 4 -t 0 -h 0 -m 1024' self.y, self.x = self.load_data_list(train=True) self.model = svmutil.svm_train(self.y, self.x, paras) svmutil.svm_save_model('./news/svmmodel', self.model) print('Train finished.')
def __init__(self,train_feature_file = TRAIN_FEATURE_FILE): if os.path.exists(SAVED_MODEL): self.model = svmutil.svm_load_model(SAVED_MODEL) else: y, x = svmutil.svm_read_problem(train_feature_file) self.model = svmutil.svm_train(y, x, '-c 4') svmutil.svm_save_model(SAVED_MODEL,self.model)
def Train_SVM_model(PathToFeatureFile):#生成训练模型文件,model.txt #print(PathToFeatureFile) y,x=svmutil.svm_read_problem(PathToFeatureFile) model=svmutil.svm_train(y,x) modelFilePath=os.path.join(os.path.split(PathToFeatureFile)[0],f"model_{PathToFeatureFile.split('_',1)[1]}") svmutil.svm_save_model(modelFilePath,model) print(modelFilePath)
def trainSVMAndSave(modelLoc, kernel, labels): if os.path.exists(modelLoc): return svm_load_model(modelLoc) else: model = trainSVM(kernel, labels) svm_save_model(modelLoc, model) return model
def write_compact_nonlinear_svm( file_compact_svm, target_class, file_svm_model, svm_model=None, file_SVs=None, SVs=None, str_kernel=None ): """ Writes a textfile with all the necessary file locations for (nonlinear) libSVM agent All the component files of 'file_compact_svm' will be written in the same directory @param file_compact_svm: file to be written with all the information below @param target_class: integer target class, e.g., 0 or 30. @param file_svm_model: filename to the compact svm model to be written @param file_SVs: filename to the support vectors (only applicable if nonlinear SVM) @param str_kernel: string of kernel function to be used (e.g., kernels.ngd etc) @param svm_model: actual svm_model from get_compact_nonlinear_svm, which will be saved at file_svm_model (if not already) @param SVs: actual support vectors in numpy format to be saved (if not already), generated by get_compact_linear_svm @return: 1 if success """ dir_compact = os.path.dirname(file_compact_svm) if svm_model: svmutil.svm_save_model(os.path.join(dir_compact, file_svm_model), svm_model) if SVs is not None: np.save(os.path.join(dir_compact, file_SVs), SVs) with open(file_compact_svm, "wb") as fin: fin.write("file_svm_model=%s\n" % file_svm_model) fin.write("target_class=%d\n" % target_class) if file_SVs: fin.write("file_SVs=%s\n" % file_SVs) if str_kernel: fin.write("str_kernel=%s\n" % str_kernel) fin.flush()
def getmodelandaccuary(line): """ 训练模型,预测结果 :param line: hdfs上的要读取的features目录的目录 :return: 准确率 """ train_y = [] train_x = [] test_y = [] test_x = [] for i in range(0, len(line) - 1): y, x = svmutil.svm_read_problem(line[i]) train_y.extend(y[0:90]) train_x.extend(x[0:90]) test_y.extend(y[90:100]) test_x.extend(x[90:100]) train_random_index = [i for i in range(len(train_y))] test_random_index = [i for i in range(len(test_y))] random.shuffle(train_random_index) random.shuffle(test_random_index) random_train_y = [train_y[x] for x in train_random_index] random_train_x = [train_x[x] for x in train_random_index] random_test_y = [test_y[x] for x in test_random_index] random_test_x = [test_x[x] for x in test_random_index] m = svmutil.svm_train(random_train_y, random_train_x, "-s 0 -t 2 -c 32 -g 8 -b 1") predict_label, accuary, prob_estimates = svmutil.svm_predict( random_test_y, random_test_x, m, '-b 1') svmutil.svm_save_model( '/home/sunbite/Co_KNN_SVM_TMP/CoKNNSVM2.model', m) return accuary
def train_svm(self, C, gamma, model_output): labels = self.labels features = self.features model = lib.svm_train( labels, features, "-h 0 -t 2 -c %f -g %f" % (2**C, 2**gamma)) lib.svm_save_model(model_output, model)
def saveModels(self, Dir): print "Saving models..." if not os.path.isdir(Dir): os.mkdir(Dir) for index in xrange(self.inputDim): filename = Dir + "/model_" + str(index) svmutil.svm_save_model(filename, self.models[index]) print "Saving models...Done."
def _train(self): """ 训练one_class_svm模型,每隔self._duration时间划分一次数据,然后生成feature :return: """ data = self._load_data() features = [self._get_feature(data_i) for data_i in data] model = svmutil.svm_train([1] * len(data), self._get_svm_format_data(features), self.model_parameter) svmutil.svm_save_model(os.path.join(MODEL_DIR, self.model_name), model)
def write_model(self,filename): """ store a model and the names """ if(self.model!=None): su.svm_save_model(filename,self.model) m=re.search("(.*)\.(svm)",filename) # write the names to npy basename=m.group(1) np.save(basename,self.names) return True return False
def TrainWordClassifier(alpha, temp_char_dir='cache/chars'): img_dir = settings.img_train_dir lex_dir = settings.lex0_train_dir gt_dir = settings.img_train_gt_dir num_procs=settings.n_procs temp_word_dir = 'cache/words' WordDetectorBatch(img_dir, temp_char_dir, temp_word_dir, alpha, settings.max_locations, settings.overlap_thr, num_procs, lex_dir, apply_word_nms=False) eval_results=EvaluateWordDetection(gt_dir, temp_word_dir, img_dir=img_dir, create_visualization=False) gt_results = eval_results[0] dt_results = eval_results[1] precision_before = eval_results[2] recall_before = eval_results[3] thrs = eval_results[4] Y = [] X_list = [] n_features = -1 for dt_result1 in dt_results: for dt_item in dt_result1: word_score = dt_item[3] char_bbs = dt_item[4] features = ComputeWordFeatures(char_bbs, word_score) if n_features < 0: n_features = len(features) X_list.append(features) if dt_item[1]: Y.append(1) else: Y.append(-1) assert n_features > 0 # scale features X_mat = np.vstack(X_list) min_vals = np.min(X_mat,axis=0) X_mat = X_mat - min_vals max_vals = np.max(X_mat,axis=0) X_mat = X_mat / max_vals alpha_min_max = (alpha, min_vals, max_vals) with open(settings.word_clf_meta_name,'wb') as fid: cPickle.dump(alpha_min_max,fid) X = [dict(zip(range(n_features), x_i)) for x_i in X_mat.tolist()] svm_model = TrainSvmLinear2(Y, X) svm.svm_save_model(settings.word_clf_name, svm_model) svm_model_poly = TrainSvmPoly2(Y, X) svm.svm_save_model(settings.word_clf_poly_name, svm_model_poly)
def get_cross_val(x, y, x_val, y_val, gamma_c): prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m) return p_acc_validation[0]
def get_svm_bytes(self): svm_model = self.iqr_session.rel_index.get_model() tmp_file_name = "tmp_svm.model" svmutil.svm_save_model(tmp_file_name.encode(), svm_model) with open(tmp_file_name, "rb") as f: model_file = f.read() b = bytearray(model_file) os.remove(tmp_file_name) return b
def get_svm_bytes(self): svm_model = self.iqr_session.rel_index.get_model() tmp_file_name = "tmp_svm.model" svmutil.svm_save_model(tmp_file_name, svm_model) with open(tmp_file_name, "rb") as f: model_file = f.read() b = bytearray(model_file) os.remove(tmp_file_name) return b
def iqr_model_train(filepath_model, matrix_kernel_train, labels_train, idx2clipid, svm_para = '-w1 50 -t 4 -b 1 -c 1'): """ Light-weighted SVM learning module for online IQR @param filepath_model: a full path to save the learned SVM model @param matrix_kernel_train: n-by-n square numpy array with kernel values between training data @param labels_train: row-wise labels of training data (1 or True indicates positive, 0 or False otherwise @param idx2clipid: idx2clipid(row_idx) returns the clipid for the 0-base row in matrix @param svm_para: (optional) SVM learning parameter @rtype: dictionary with 'clipids_SV': list of clipids for support vectors @return: output as a dictionary with 'clipids_SV' """ # set training inputs matrix_kernel_train = np.vstack((np.arange(1, len(matrix_kernel_train)+1), matrix_kernel_train)).T print "Done matrix_kernel_train" problem = svm.svm_problem(labels_train.tolist(), matrix_kernel_train.tolist(), isKernel=True) print "Done problem" svm_param = svm.svm_parameter(svm_para) print "Done svm_param" # train model model = svmutil.svm_train(problem, svm_param) print "Done train model" # release memory del problem del svm_param print "Done release memory" # check learning failure if model.l == 0: raise Exception('svm model learning failure') n_SVs = model.l clipids_SVs = [] idxs_train_SVs = svmtools.get_SV_idxs_nonlinear_svm(model) for i in range(n_SVs): _idx_1base = idxs_train_SVs[i] _idx_0base = _idx_1base - 1 clipids_SVs.append(idx2clipid[_idx_0base]) model.SV[i][0].value = i+1 # within SVM model, index needs to be 1-base print "Done checking learning failure" svmutil.svm_save_model(filepath_model, model) output = dict() output['model'] = model output['clipids_SVs'] = clipids_SVs return output
def learn_compact_nonlinear_svm(file_libsvm_model0, file_SVs, file_libsvm_model1, file_svm_compact, str_kernel, options_train, target_class, labels, data, file_data, kernel_matrix, file_kernel_matrix, kernel_matrix_type, flag_kernel_compute, splits, func_sort, logfile): """ @param file_libsvm_model0: file path for the leanred SVM model to be saved in libsvm format @param file_SVs: filename of support vectors to be saved in numpy format @param file_libsvm_model1: file path for compact SVM, still stored in libsvm format @param file_svm_compact: file path to the full compact svm model to be written (with many other info) @param str_kernel: string of kernel function to be used (e.g., kernels.ngd etc) @param options_train: list of lisbsvm training strings to be tried, e.g., ['-b 1 -c 1','-b 1 -c 1000'] @param options_test: libsvm test option string to be used during cross-validation, e.g., '-b 1' @param target_class: target positive class @param labels: ground truth labels in integers. Positive integers for event kit positives, Negatives for event kit negs, zero for None. @param data: training data, numpy row-wise matrix. If None and kernel_matrix does not exist, then, read from file_data @param file_data: file path to the input training 'data'. If data is None, then read from this file @param kernel_matrix: kernel matrix @param file_kernel_matrix: if kernel matrix is None, and this path is not, then, loaded from this file. if flag_kernel_compute==True, then, computed kernel is saved to this file. @param kernel_matrix_type: 'numpy' (square numpy matrix) or 'libsvm' (2dim list-type ready for libsvm) @param flag_kernel_compute: if True, re-compute kernel matrix @param splits: integer-based splits in numpy vector, e.g., [1 1 2 2 3 3] for 6 data in three splits @param file_scores: if not None, save the scores generated by SVM during cross validation @param func_error: func_error(labels, scores, target_label) outputs error to sort svm parameters @param logfile: log file where info will be written, e.g., the pairs of options_train & func_error outputs """ _log = None if logfile: _log = open(logfile, 'wb') # Learn nonlinear SVM model & save this initial model (before compactization) in libsvm format model0 = None # add training code with full data training svmutil.svm_save_model(file_libsvm_model0, model0) _log.write('Saved initial nonlinear SVM (model0) at: %s\n'%file_libsvm_model0) # computed compact SVM model 'model1' (model1, SVs) = get_compact_nonlinear_svm(model0, data) # write compact SVM model, with all the required information write_compact_nonlinear_svm(file_svm_compact, target_class, file_libsvm_model1, svm_model=model1, file_SVs=file_SVs, SVs=SVs, str_kernel=str_kernel) _log.write('Saved compact nonlinear SVM at: %s\n'%file_svm_compact) _log.close()
def run(history): # w, x, y = history.grab(nsample, ntracks, 0) timer = timer_c() lsvm = svmutil.svm_train(y, x, options) svmutil.svm_save_model('temp.svm', lsvm) print 'svm trained successfully in %s sec.' % (str(float('{0:.3f}'.format(timer.lag())))) # timer = timer_c() p_labels, p_acc, p_vals = svmutil.svm_predict(y, x, lsvm, '') print 'svm predicted successfully in %s sec.' % (str(float('{0:.3f}'.format(timer.lag())))) plotdraw(p_labels, y)
def train_grasp(grasp_type, side): """ train_grasp(grasp_type): train linear svm classifier for specific grasp type\n grasp_type: hand grasping type\n side: left hand or right hand\n """ #train datafile = "model/traindata_grasp_" + grasp_type + "_" + side if not os.path.isfile(datafile): srcfile = "data/feature_grasp_train.csv" write_svmdata_grasp(srcfile, datafile, grasp_type, side, 0) label_train, data_train = svmutil.svm_read_problem(datafile) modelfile = "model/model_grasp_" + grasp_type + "_" + side m = [] if not os.path.isfile(modelfile): print("train model: " + grasp_type + "_" + side) label_weight = {} for v in label_train: if label_weight.has_key(v): label_weight[v] += 1 else: label_weight[v] = 1 sorted_label = sorted(label_weight) param_weight = ' ' for v in sorted_label: label_weight[v] = float( len(label_train)) / len(sorted_label) / label_weight[v] param_weight += '-w%d %f ' % (v, label_weight[v]) prob = svmutil.svm_problem(label_train, data_train) param = svmutil.svm_parameter('-t 0 -b 1 -q' + param_weight) print '-t 0 -b 1 -q' + param_weight # param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q') m = svmutil.svm_train(prob, param) svmutil.svm_save_model(modelfile, m) else: print("load model: " + grasp_type + "_" + side) m = svmutil.svm_load_model(modelfile) #test grasp_info = read_info("data/feature_grasp_test.csv", side) datafile = "model/testdata_grasp_" + grasp_type + "_" + side if not os.path.isfile(datafile): srcfile = "data/feature_grasp_test.csv" write_svmdata_grasp(srcfile, datafile, grasp_type, side, 1) label_test, data_test = svmutil.svm_read_problem(datafile) p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1') f_result = open("result/grasp_" + grasp_type + "_" + side + ".csv", "w") for i in range(len(p_label)): f_result.write(grasp_info[i] + ", " + str(int(label_test[i])) + ", " + str(int(p_label[i])) + ", ") f_result.write("[%.4f]\n" % p_val[i][0]) f_result.close()
def learn_compact_nonlinear_svm( file_libsvm_model0, file_SVs, file_libsvm_model1, file_svm_compact, str_kernel, options_train, target_class, labels, data, file_data, kernel_matrix, file_kernel_matrix, kernel_matrix_type, flag_kernel_compute, splits, func_sort, logfile): """ @param file_libsvm_model0: file path for the leanred SVM model to be saved in libsvm format @param file_SVs: filename of support vectors to be saved in numpy format @param file_libsvm_model1: file path for compact SVM, still stored in libsvm format @param file_svm_compact: file path to the full compact svm model to be written (with many other info) @param str_kernel: string of kernel function to be used (e.g., kernels.ngd etc) @param options_train: list of lisbsvm training strings to be tried, e.g., ['-b 1 -c 1','-b 1 -c 1000'] @param options_test: libsvm test option string to be used during cross-validation, e.g., '-b 1' @param target_class: target positive class @param labels: ground truth labels in integers. Positive integers for event kit positives, Negatives for event kit negs, zero for None. @param data: training data, numpy row-wise matrix. If None and kernel_matrix does not exist, then, read from file_data @param file_data: file path to the input training 'data'. If data is None, then read from this file @param kernel_matrix: kernel matrix @param file_kernel_matrix: if kernel matrix is None, and this path is not, then, loaded from this file. if flag_kernel_compute==True, then, computed kernel is saved to this file. @param kernel_matrix_type: 'numpy' (square numpy matrix) or 'libsvm' (2dim list-type ready for libsvm) @param flag_kernel_compute: if True, re-compute kernel matrix @param splits: integer-based splits in numpy vector, e.g., [1 1 2 2 3 3] for 6 data in three splits @param file_scores: if not None, save the scores generated by SVM during cross validation @param func_error: func_error(labels, scores, target_label) outputs error to sort svm parameters @param logfile: log file where info will be written, e.g., the pairs of options_train & func_error outputs """ _log = None if logfile: _log = open(logfile, 'wb') # Learn nonlinear SVM model & save this initial model (before compactization) in libsvm format model0 = None # add training code with full data training svmutil.svm_save_model(file_libsvm_model0, model0) _log.write('Saved initial nonlinear SVM (model0) at: %s\n' % file_libsvm_model0) # computed compact SVM model 'model1' (model1, SVs) = get_compact_nonlinear_svm(model0, data) # write compact SVM model, with all the required information write_compact_nonlinear_svm(file_svm_compact, target_class, file_libsvm_model1, svm_model=model1, file_SVs=file_SVs, SVs=SVs, str_kernel=str_kernel) _log.write('Saved compact nonlinear SVM at: %s\n' % file_svm_compact) _log.close()
def trainmodel(self,train,cv,test,modelsavepath): y,x = svmutil.svm_read_problem(train)#读入训练数据 # ycv,xcv = svm_read_problem(cv)#读入验证集 # ytest,xtest=svm_read_problem(test)#读入测试集 prob = svm.svm_problem(y, x) param = svm.svm_parameter('-t 2 -c 0.5 -g 0.125 -b 1') model = svmutil.svm_train(prob, param) yt,xt = svmutil.svm_read_problem(train)#??????????? p_labs, p_acc, p_vals = svmutil.svm_predict(yt, xt, model,'-b 1') svmutil.svm_save_model(modelsavepath, model)#save model # model = svmutil.svm_load_model('model_file')#读取model pass
def train(self, document): """ Train the SVM model :param document: document object """ targs, tkwargs = svm_datas.svm_opts() # model training arguments self._vocab = self.generate_vocab_dict(document) y, x = self.fit(document, keep_on_doc=True) m = svmutil.svm_train(y, x, *targs) svmutil.svm_save_model(self.model_file, m) generatePickle(self._vocab, self.vocab_file)
def train_model_main2(model_path,file): # LibSVM # 按照 libSVM 指定的格式生成一组带特征值和标记值的向量文件 svm_path = r"C:\Python36\risk_down\libsvm" sys.path.append(svm_path + r"\python") import svmutil # y, x = svmutil.svm_read_problem('./' + str(n) + '_feature.txt') y, x = svmutil.svm_read_problem(file) # --------------------------------------------------- model = svmutil.svm_train(y[:50], x[:50], '-c 4') # print(model_path,file.split('/')[2][0]) model_path = './'+model_path+'/'+file.split('/')[2][0]+"_feature.model" svmutil.svm_save_model(model_path, model)
def example_make_model(img_kind, svm_params): subdir = "data/" problem = build_problem(img_kind) print "Prob built" param = svm.svm_parameter(svm_params) print "Params Set" problem_model = svmutil.svm_train(problem, param) print "Model built" svmutil.svm_save_model(subdir + img_kind + '.model', problem_model) print "Done"
def train(): print("Starting train process.") ''' for line in open(__FEATURE_FILE): line = line.split(None, 1) if len(line) == 1: line += [''] print(line) ''' y, x = svmutil.svm_read_problem(__FEATURE_FILE) model = svmutil.svm_train(y, x) svmutil.svm_save_model(Function.MODEL_FILE, model) print("train process done.") return
def train(train_file, fold_num, mapping=None, parameters={'-c': 1}, multilabel=None, output_folder=None): ''' Given a training instance file and (optionally) a label mapping, adapt the training vectors to fit the mapping and build an SVM model. ''' global classifier if not output_folder: output_folder = 'models' output_folder = os.path.join(output_folder, 'fold-{0:02d}'.format(fold_num+1)) if not os.path.exists(output_folder): os.makedirs(output_folder) if multilabel: temp_labels, instances = alt_read_problem(train_file) temp_labels = [[mapping[l] for l in label] for label in temp_labels] labels = [] for temp_labs in temp_labels: if multilabel[1] in temp_labs: labels.append(multilabel[1]) else: assert len(set(temp_labs)) == 1, "Something appears to be wrong with the intermediate mapping. There's still more than one label present for an instance: {0}".format(temp_labs) labels.append([l for l in temp_labs if l != multilabel[1]][0]) else: labels, instances = svm_read_problem(train_file) labels = reMap(labels, mapping) # Exclude instances which have 0 as their label labels, instances = zip(*[(label, instance) for label, instance in zip(labels, instances) if label != 0]) distribution = {} for label in set(labels): distribution[label] = float(labels.count(label))/len(labels) paramstring = '' for param, value in parameters.items(): paramstring += ' {0} {1}'.format(param, value) if classifier == 'libsvm' and '-b' not in parameters.keys(): paramstring += ' -b 1' paramstring += ' -q' if multilabel: model_file = os.path.join(output_folder, os.path.basename(train_file) + '.{0}.model'.format(multilabel[0])) else: model_file = os.path.join(output_folder, os.path.basename(train_file) + '.model') print '---training' model = svm_train(labels, instances, paramstring) svm_save_model(model_file, model) return model_file, distribution
def create_model(self,datasets,opt,opp,part_ids = None): # Should groups and ngroups be idch ? if part_ids is None: part_ids = datasets.pids ptrn,itrain = train2svm_prob(datasets.mkTrain(part_ids)) print "create model ..." #opt = svm.svm_parameter(opt) model = svmutil.svm_train(ptrn,opt) # create saving direcotry #self._mkdir(cnt) # create log files #self._save_log(itest,plbl,pval,cnt) model_name = "%s/model/%s.model" % (self._dir,self._name) svmutil.svm_save_model(model_name, model)
def create_model(self, datasets, opt, opp, part_ids=None): # Should groups and ngroups be idch ? if part_ids is None: part_ids = datasets.pids ptrn, itrain = train2svm_prob(datasets.mkTrain(part_ids)) print "create model ..." #opt = svm.svm_parameter(opt) model = svmutil.svm_train(ptrn, opt) # create saving direcotry #self._mkdir(cnt) # create log files #self._save_log(itest,plbl,pval,cnt) model_name = "%s/model/%s.model" % (self._dir, self._name) svmutil.svm_save_model(model_name, model)
def main(): carpeta_pedestres = '../pedestres/' carpeta_no_pedestres = '../no_pedestres/' carpeta_no_pedestres = '../floresRecortadas/' carpeta_salida = '../salidas/' archivo_modelo_svm = '../modelo_svm.model' num_positivas = 600 num_negativas = 600 k_val = 10 print('leyendo imagenes') imagenes_positivas = obtener_imagenes(carpeta_pedestres) imagenes_negativas = obtener_imagenes(carpeta_no_pedestres) imagenes_positivas = imagenes_positivas[:num_positivas] imagenes_negativas = imagenes_negativas[:num_negativas] print('obteniendo_descriptores') descriptores_positivos = obtener_descriptores(imagenes_positivas) descriptores_negativos = obtener_descriptores(imagenes_negativas) imagenes = imagenes_positivas + imagenes_negativas entradas = descriptores_positivos + descriptores_negativos salidas = [1]*len(descriptores_positivos) + [0]*len(descriptores_negativos) print('intentando cargar modelo desde archivo') modelo_svm = svmutil.svm_load_model(archivo_modelo_svm) if modelo_svm is None: print('ejecutando cross validation') (promedio, desviacion_estandar, modelo_svm) =\ cross_validation(entradas, salidas, k_val) print('Promedio:', promedio) print('Desviación estándar:', desviacion_estandar) print('Guardando modelo en archivo') svmutil.svm_save_model(archivo_modelo_svm, modelo_svm) #Aquí tengo las pruebas de imagenes print('Clasificando imagenes') for i in range(len(imagenes)): #pos = random.randint(0, len(imagenes)) pos = i imagen = imagenes[pos].copy() entrada = entradas[pos] salida_esperada = salidas[pos] salida = clasificar_imagen(modelo_svm, entrada, salida_esperada) val_pixel = [0,255,0] if salida == salida_esperada else [0,0,255] for j in range(len(imagen)-5, len(imagen)): for k in range(len(imagen[j])): imagen[j][k][:] = val_pixel; if salida != salida_esperada: nombre_archivo = carpeta_salida + str(i+1).zfill(3) + '.jpg' cv2.imwrite(nombre_archivo, imagen) # cv2.imshow('imagen', imagen) # cv2.waitKey() sys.exit(0)
def g_value_fun(g): fmodel = 'c_{c:.2f}_g_{g:.3f}.model'.format(c=0.25, g=g) if os.path.exists(fmodel): print("model %s found, loading ..." % fmodel) m = svmutil.svm_load_model(fmodel) print("model %s loaded" % fmodel) else: print("model %s not found, training ..." % fmodel) opt_str = '-c {c} -t 2 -g {g} -b 1'.format(c=0.25, g=g) m = svmutil.svm_train(y_svm, x_svm, opt_str) print("saving model %s ..." % fmodel) svmutil.svm_save_model(fmodel, m) clsfr = classifier.SVM_Classifier(m) roc_curve = clsfr.CalROC(x_svm_test, y_svm_test) auc = clsfr.CalAUC() return [m, [], roc_curve, auc]
def train_manipulation(mnp_type): """ """ #train datafile = "manipulate/model/traindata_mnp_"+mnp_type if not os.path.isfile(datafile): srcfile = "manipulate/data/feature_mnp_train.csv" write_svmdata_mnp(srcfile, datafile, mnp_type, 0) label_train,data_train = svmutil.svm_read_problem(datafile) modelfile = "manipulate/model/model_mnp_"+mnp_type m = [] if not os.path.isfile(modelfile): print("train model: " + mnp_type) label_weight = {} for v in label_train: if label_weight.has_key(v): label_weight[v]+=1 else: label_weight[v]=1 sorted_label = sorted(label_weight) param_weight = ' ' for v in sorted_label: label_weight[v] = float(len(label_train))/len(sorted_label)/label_weight[v] param_weight += '-w%d %f ' % (v, label_weight[v]) prob = svmutil.svm_problem(label_train, data_train) param = svmutil.svm_parameter('-t 0 -b 1 -q'+param_weight) print '-t 0 -b 1 -q'+param_weight m = svmutil.svm_train(prob, param) svmutil.svm_save_model(modelfile, m) else: print("load model: " + mnp_type) m = svmutil.svm_load_model(modelfile) # weight = read_model_linearSVM(modelfile, len(data_train[0])) # print weight #test mnp_info = read_info("manipulate/data/feature_mnp_test.csv") datafile = "manipulate/model/testdata_mnp_"+mnp_type if not os.path.isfile(datafile): srcfile = "manipulate/data/feature_mnp_test.csv" write_svmdata_mnp(srcfile, datafile, mnp_type, 1) label_test,data_test = svmutil.svm_read_problem(datafile) p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1') f_result = open("manipulate/result/mnp_" + mnp_type + ".csv", "w") for i in range(len(p_label)): f_result.write(mnp_info[i]+", "+str(int(label_test[i]))+", "+str(int(p_label[i]))+", ") f_result.write("[%.4f]\n" % p_val[i][0]) f_result.close()
def main(): timer = timer_c() res = numpy.array([], dtype='i') mat = numpy.empty((0, len(key) * ntracks), dtype='f') with open('../data/dbf/2017.dbf', 'r') as dbf: history = json.load(dbf) seven = history['layout'].index('PCHG') print 'seven at %d.' % (seven) first = None for code, table in history['record'].iteritems(): if len(table) < 180: continue if first is None or first < table[89][0]: first = table[89][0] for code, table in history['record'].iteritems(): # skip the stock has less than 180 rows of record if len(table) < 180: continue # skip the stock has suspended in 90 trading days if table[89][0] != first: continue # form mat & res for i in xrange(nsample): # mat++ row = [] for r in xrange(i + 1, i + ntracks + 1): for offset in key: row.append(table[r][offset]) mat = numpy.append(mat, numpy.array([row], dtype='f'), axis=0) # res++ hg = table[i][seven] + 10.0 hg = min(hg, 19.99) hg = max(hg, 0.0) res = numpy.append(res, int(hg / (20.0 / nlabels))) print 'mat & res loaded successfully in %s sec.' % (str( float('{0:.3f}'.format(timer.lag())))) # timer.reset() lsvm = svmutil.svm_train(res.tolist(), mat.tolist(), '-s 0 -t 0 -g 1.00 -c 1000000.00 -b 0 -q') svmutil.svm_save_model('temp.svm', lsvm) print 'svm trained successfully in %s sec.' % (str( float('{0:.3f}'.format(timer.lag())))) p_labels, p_acc, p_vals = svmutil.svm_predict(res.tolist(), mat.tolist(), lsvm, '')
def test_model_from_byte(self): # The original model svm_model_1 = self.iqr_session.rel_index.get_model() model_1_file, model_2_file = "tmp_svm_1.model", "tmp_svm_2.model" svmutil.svm_save_model(model_1_file, svm_model_1) # Get the bytes for the model first. bytes = self.get_svm_bytes() # Use the bytes to created a model svm_model_2 = self.get_model_from_bytes(bytes) # Save the model created using the bytes svmutil.svm_save_model(model_2_file, svm_model_2) # Check that the model created using the bytes is the same as the # original model. assert (filecmp.cmp(model_1_file, model_2_file) is True) os.remove(model_1_file) os.remove(model_2_file)
def test_model_from_byte(self): # The original model svm_model_1 = self.iqr_session.rel_index.get_model() model_1_file, model_2_file = "tmp_svm_1.model", "tmp_svm_2.model" svmutil.svm_save_model(model_1_file.encode(), svm_model_1) # Get the bytes for the model first. bytes = self.get_svm_bytes() # Use the bytes to created a model svm_model_2 = self.get_model_from_bytes(bytes) # Save the model created using the bytes svmutil.svm_save_model(model_2_file.encode(), svm_model_2) # Check that the model created using the bytes is the same as the # original model. assert(filecmp.cmp(model_1_file, model_2_file) is True) os.remove(model_1_file) os.remove(model_2_file)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-r', '--resdir', type=str, required=True, help="Results directory") parser.add_argument('-f', '--feature', type=str, required=True, help='feature to use to learn') args = parser.parse_args() SCORES_FPATH = os.path.join(args.resdir, 'scores.txt') FEAT_DIR = os.path.join(args.resdir, 'features', args.feature) scores = np.fromfile(SCORES_FPATH, sep='\n') feats = [] for i in range(1, len(scores) + 1): feats.append(np.fromfile(os.path.join(FEAT_DIR, str(i) + '.txt'), sep='\n').tolist()) # feats = np.array(feats) print('Read all features') params = svmutil.svm_parameter('-s 4 -t 2') model = svmutil.svm_train(svmutil.svm_problem(scores, feats), params) svmutil.svm_save_model(os.path.join(args.resdir, 'svr.model'), model) print svmutil.svm_predict(scores, feats, model)
def main(path): label = [] points = [] for u in os.listdir(path): if u[-2:] == 'WC': filePath = path+u WC = pickle.load(open(filePath, 'rb')) label.append(u[1]) points.append(WC) label = [int(i) for i in label] prob = svmutil.svm_problem(label, points) param = svmutil.svm_parameter('-t 0 -c 4 -b 1') m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(label, points, m, '-b 1') return p_acc
def train_attribute(attribute, side): """ train_attribute(str, float): train linear svm classifier for specific attribute\n attribute: should be one from ["prismatic", "sphere", "flat", "rigid"] """ #train datafile = "model/traindata_attribute_"+attribute+"_"+side if not os.path.isfile(datafile): srcfile = "data/feature_attribute_train.csv" write_svmdata_attribute(srcfile, datafile, attribute, side, 0) label_train,data_train = svmutil.svm_read_problem(datafile) modelfile = "model/model_attribute_"+attribute+"_"+side m = [] if not os.path.isfile(modelfile): print("train model: " + attribute+"_"+side) prob = svmutil.svm_problem(label_train, data_train) param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q') m = svmutil.svm_train(prob, param) svmutil.svm_save_model(modelfile, m) else: print("load model: " + attribute+"_"+side) m = svmutil.svm_load_model(modelfile) #test attribute_info = read_info("data/feature_attribute_test.csv", side) datafile = "model/testdata_attribute_"+attribute+"_"+side if not os.path.isfile(datafile): srcfile = "data/feature_attribute_test.csv" write_svmdata_attribute(srcfile, datafile, attribute, side, 1) label_test,data_test = svmutil.svm_read_problem(datafile) p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1') [precision, recall, f1, accuracy] = getF1(label_test, p_label) print "F1: [%.4f, %.4f, %.4f] Accuracy: %.4f" % (precision, recall, f1, accuracy) f_result = open("result/attribute_"+attribute+"_"+side+".csv", "w") for i in range(len(p_label)): f_result.write(attribute_info[i]+", "+str(int(label_test[i]))+", "+str(int(p_label[i]))+", ") f_result.write("[%.4f]\n" % p_val[i][0]) f_result.close()
def __getstate__(self): # If we don't have a model, or if we have one but its not being saved # to files. if not self.has_model() or (self.svm_model_fp is not None and self.svm_label_map_fp is not None): return self.get_config() else: self._log.debug("Saving model to temp file for pickling") fd, fp = tempfile.mkstemp() try: os.close(fd) state = self.get_config() state['__LOCAL__'] = True state['__LOCAL_LABELS__'] = self.svm_label_map svmutil.svm_save_model(fp, self.svm_model) with open(fp, 'rb') as model_f: state['__LOCAL_MODEL__'] = model_f.read() return state finally: os.remove(fp)
def OnUpdate(self,event): import sys import string sys.path.append('/home/guojian/Workspaces/FindFakeWeb/libsvm/python') import svmutil self.infoTxt.AppendText('模块导入成功' + os.linesep) wtf = open(self.wTxt.Value,'r') btf = open(self.bTxt.Value,'r') wf = wtf.read() bf = btf.read() wtf.close() btf.close() tf = open('train/t.train','w') tf.write(wf+bf) tf.close() self.infoTxt.AppendText(wf+bf+os.linesep) self.infoTxt.AppendText('文件合并完成' + os.linesep) self.infoTxt.ShowPosition(self.infoTxt.GetLastPosition()) y, x = svmutil.svm_read_problem('train/t.train') model = svmutil.svm_train(y, x, '-c 5') svmutil.svm_save_model('model_file.model',model) self.infoTxt.AppendText('训练模型构造完成,并且保存为文件model_file.model' + os.linesep) self.infoTxt.ShowPosition(self.infoTxt.GetLastPosition())
def write_compact_nonlinear_svm(file_compact_svm, target_class, file_svm_model, svm_model=None, file_SVs=None, SVs=None, str_kernel=None): """ Writes a textfile with all the necessary file locations for (nonlinear) libSVM agent All the component files of 'file_compact_svm' will be written in the same directory @param file_compact_svm: file to be written with all the information below @param target_class: integer target class, e.g., 0 or 30. @param file_svm_model: filename to the compact svm model to be written @param file_SVs: filename to the support vectors (only applicable if nonlinear SVM) @param str_kernel: string of kernel function to be used (e.g., kernels.ngd etc) @param svm_model: actual svm_model from get_compact_nonlinear_svm, which will be saved at file_svm_model (if not already) @param SVs: actual support vectors in numpy format to be saved (if not already), generated by get_compact_linear_svm @return: 1 if success """ dir_compact = os.path.dirname(file_compact_svm) if svm_model: svmutil.svm_save_model(os.path.join(dir_compact, file_svm_model), svm_model) if SVs is not None: np.save(os.path.join(dir_compact, file_SVs), SVs) with open(file_compact_svm, 'wb') as fin: fin.write('file_svm_model=%s\n' % file_svm_model) fin.write('target_class=%d\n' % target_class) if file_SVs: fin.write('file_SVs=%s\n' % file_SVs) if str_kernel: fin.write('str_kernel=%s\n' % str_kernel) fin.flush()
def do_training(classifier_name, train_x, train_y, test_x, test_y): model_save_file = str('./models/') + classifier_name + str('.model') if classifier_name == 'LIBSVM': prob = svm_problem( np.array(train_y).tolist(), np.array(train_x).tolist()) param = svm_parameter('-s 1 -t 1 -q -d 3') # param = svm_parameter('-t 2 -q') model = svm_train(prob, param) svm_save_model('./models/{}.model'.format(classifier_name), model) svm_predict( np.array(test_y).tolist(), np.array(test_x).tolist(), model) return model model_save = {} classifiers = { 'NB': naive_bayes_classifier, 'KNN': knn_classifier, 'LR': logistic_regression_classifier, 'RF': random_forest_classifier, 'DT': decision_tree_classifier, 'SVM': svm_classifier, 'SVMCV': svm_cross_validation, 'GBDT': gradient_boosting_classifier, 'ADA': ada_boosting_classifier, 'MLP': mlp_classifier, 'XGBOOST': xgboost_classifier } model = classifiers[classifier_name](train_x, train_y) model_save[classifier_name] = model predict = model.predict(test_x) accuracy = metrics.accuracy_score(test_y, predict) print('accuracy: %.2f%%' % (100 * accuracy)) jl.dump(model_save, model_save_file) return model
def train(request): points = models.Point2d.objects.all() # Storing the information to be presented to SVM labels = [] inputs = [] # For each point, store the information into arrays for p in points: labels.append(p.label) inputs.append([p.x, p.y]) prob = svm.svm_problem(labels, inputs) param = svm.svm_parameter('-t 2 -c 100') model = svmutil.svm_train(prob, param) try: svmutil.svm_save_model('libsvm.model', model) except Exception as e: print "error: ", e, "\n" data = {"status": "trained"} return json(data)
def train(request): points = models.Point2d.objects.all() # Storing the information to be presented to SVM labels = [] inputs = [] # For each point, store the information into arrays for p in points: labels.append( p.label ) inputs.append([p.x, p.y]) prob = svm.svm_problem(labels, inputs) param = svm.svm_parameter('-t 2 -c 100') model = svmutil.svm_train(prob, param) try: svmutil.svm_save_model('libsvm.model', model) except Exception as e: print "error: ", e, "\n" data = {"status": "trained"} return json(data)
def save_model(self, filename): svmutil.svm_save_model(filename, self.model)
def test_conversion_from_filesystem(self): libsvm_model_path = tempfile.mktemp(suffix = 'model.libsvm') svmutil.svm_save_model(libsvm_model_path, self.libsvm_model) spec = libsvm.convert(libsvm_model_path, 'data', 'target')
def _train(self, class_examples, **extra_params): """ Internal method that trains the classifier implementation. This method is called after checking that there is not already a model trained, thus it can be assumed that no model currently exists. The class labels will have already been checked before entering this method, so it can be assumed that the ``class_examples`` will container at least two classes. :param class_examples: Dictionary mapping class labels to iterables of DescriptorElement training examples. :type class_examples: dict[collections.abc.Hashable, collections.abc.Iterable[smqtk.representation.DescriptorElement]] :param extra_params: Dictionary with extra parameters for training. This is not used by this implementation. :type extra_params: None | dict[basestring, object] """ # Offset from 0 for positive class labels to use # - not using label of 0 because we think libSVM wants positive labels CLASS_LABEL_OFFSET = 1 # Stuff for debug reporting param_debug = {'-q': ''} if self._log.getEffectiveLevel() <= logging.DEBUG: param_debug = {} # Form libSVM problem input values self._log.debug("Formatting problem input") train_labels = [] train_vectors = [] train_group_sizes = [] # number of examples per class self.svm_label_map = {} # Making SVM label assignment deterministic to alphabetic order for i, l in enumerate(sorted(class_examples), CLASS_LABEL_OFFSET): # Map integer SVM label to semantic label self.svm_label_map[i] = l self._log.debug('-- class %d (%s)', i, l) # requires a sequence, so making the iterable ``g`` a tuple g = class_examples[l] if not isinstance(g, collections.abc.Sequence): self._log.debug(' (expanding iterable into sequence)') g = tuple(g) train_group_sizes.append(float(len(g))) x = numpy.array(DescriptorElement.get_many_vectors(g)) x = self._norm_vector(x) train_labels.extend([i] * x.shape[0]) train_vectors.extend(x.tolist()) del g, x assert len(train_labels) == len(train_vectors), \ "Count mismatch between parallel labels and descriptor vectors" \ "being sent to libSVM (%d != %d)" \ % (len(train_labels), len(train_vectors)) self._log.debug("Forming train params") #: :type: dict params = deepcopy(self.train_params) params.update(param_debug) # Calculating class weights if set to C-SVC type SVM if '-s' not in params or int(params['-s']) == 0: # (john.moeller): The weighting should probably be the geometric # mean of the number of examples over the classes divided by the # number of examples for the current class. gmean = scipy.stats.gmean(train_group_sizes) for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET): w = gmean / n params['-w' + str(i)] = w self._log.debug("-- class '%s' weight: %s", self.svm_label_map[i], w) self._log.debug("Making parameters obj") svm_params = svmutil.svm_parameter(self._gen_param_string(params)) self._log.debug("Creating SVM problem") svm_problem = svm.svm_problem(train_labels, train_vectors) del train_vectors self._log.debug("Training SVM model") self.svm_model = svmutil.svm_train(svm_problem, svm_params) self._log.debug("Training SVM model -- Done") if self.svm_label_map_elem and self.svm_label_map_elem.writable(): self._log.debug("saving labels to element (%s)", self.svm_label_map_elem) self.svm_label_map_elem.set_bytes( pickle.dumps(self.svm_label_map, -1) ) if self.svm_model_elem and self.svm_model_elem.writable(): self._log.debug("saving model to element (%s)", self.svm_model_elem) # LibSvm I/O only works with filepaths, thus the need for an # intermediate temporary file. fd, fp = tempfile.mkstemp() try: svmutil.svm_save_model(fp, self.svm_model) # Use the file descriptor to create the file object. # This avoids reopening the file and will automatically # close the file descriptor on exiting the with block. # fdopen() is required because in Python 2 open() does # not accept a file descriptor. with os.fdopen(fd, 'rb') as f: self.svm_model_elem.set_bytes(f.read()) finally: os.remove(fp)
def _train(self, class_examples, **extra_params): """ Internal method that trains the classifier implementation. This method is called after checking that there is not already a model trained, thus it can be assumed that no model currently exists. The class labels will have already been checked before entering this method, so it can be assumed that the ``class_examples`` will container at least two classes. :param class_examples: Dictionary mapping class labels to iterables of DescriptorElement training examples. :type class_examples: dict[collections.Hashable, collections.Iterable[smqtk.representation.DescriptorElement]] :param extra_params: Dictionary with extra parameters for training. This is not used by this implementation. :type extra_params: None | dict[basestring, object] """ # Offset from 0 for positive class labels to use # - not using label of 0 because we think libSVM wants positive labels CLASS_LABEL_OFFSET = 1 # Stuff for debug reporting etm_ri = None param_debug = {'-q': ''} if self._log.getEffectiveLevel() <= logging.DEBUG: etm_ri = 1.0 param_debug = {} # Form libSVM problem input values self._log.debug("Formatting problem input") train_labels = [] train_vectors = [] train_group_sizes = [] # number of examples per class self.svm_label_map = {} # Making SVM label assignment deterministic to alphabetic order for i, l in enumerate(sorted(class_examples), CLASS_LABEL_OFFSET): # Map integer SVM label to semantic label self.svm_label_map[i] = l self._log.debug('-- class %d (%s)', i, l) # requires a sequence, so making the iterable ``g`` a tuple g = class_examples[l] if not isinstance(g, collections.Sequence): self._log.debug(' (expanding iterable into sequence)') g = tuple(g) train_group_sizes.append(float(len(g))) x = elements_to_matrix(g, report_interval=etm_ri) x = self._norm_vector(x) train_labels.extend([i] * x.shape[0]) train_vectors.extend(x.tolist()) del g, x assert len(train_labels) == len(train_vectors), \ "Count mismatch between parallel labels and descriptor vectors" \ "being sent to libSVM (%d != %d)" \ % (len(train_labels), len(train_vectors)) self._log.debug("Forming train params") #: :type: dict params = deepcopy(self.train_params) params.update(param_debug) # Calculating class weights if set to C-SVC type SVM if '-s' not in params or int(params['-s']) == 0: # (john.moeller): The weighting should probably be the geometric # mean of the number of examples over the classes divided by the # number of examples for the current class. gmean = scipy.stats.gmean(train_group_sizes) for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET): w = gmean / n params['-w' + str(i)] = w self._log.debug("-- class '%s' weight: %s", self.svm_label_map[i], w) self._log.debug("Making parameters obj") svm_params = svmutil.svm_parameter(self._gen_param_string(params)) self._log.debug("Creating SVM problem") svm_problem = svm.svm_problem(train_labels, train_vectors) del train_vectors self._log.debug("Training SVM model") self.svm_model = svmutil.svm_train(svm_problem, svm_params) self._log.debug("Training SVM model -- Done") if self.svm_label_map_elem and self.svm_label_map_elem.writable(): self._log.debug("saving labels to element (%s)", self.svm_label_map_elem) self.svm_label_map_elem.set_bytes( cPickle.dumps(self.svm_label_map, -1) ) if self.svm_model_elem and self.svm_model_elem.writable(): self._log.debug("saving model to element (%s)", self.svm_model_elem) # LibSvm I/O only works with filepaths, thus the need for an # intermediate temporary file. fd, fp = tempfile.mkstemp() try: svmutil.svm_save_model(fp, self.svm_model) # Use the file descriptor to create the file object. # This avoids reopening the file and will automatically # close the file descriptor on exiting the with block. # fdopen() is required because in Python 2 open() does # not accept a file descriptor. with os.fdopen(fd, 'rb') as f: self.svm_model_elem.set_bytes(f.read()) finally: os.remove(fp)
svms = [] ws = [] bs = [] werrs = 0 for n in range(hl.n_out): print "Hidden neuron: %d" % n, print " Problem...", if n == 0: probs.append(svmutil.svm_problem(svm_targets[n], svm_inputs)) else: probs.append(svmutil.svm_problem(svm_targets[n], None, tmpl=probs[0])) params.append(svmutil.svm_parameter("-q -s 0 -t 0 -c 100")) print " Training...", svms.append(svmutil.svm_train(probs[n], params[n])) print " Saving...", svmutil.svm_save_model("hidden%04d.svm" % n, svms[n]) print " Testing..." # get weights from SVM w, b = get_svm_weights(svms[n], hl.n_in) ws.append(w) bs.append(b) # test model predv = numpy.dot(w, trsx.T) + b pred = numpy.sign(predv) pos = 0 neg = 0 for i in range(pred.size): if svm_targets[n][i] > 0: pos += 1
def train(self, class_examples=None, **kwds): """ Train the supervised classifier model. If a model is already loaded, we will raise an exception in order to prevent accidental overwrite. If the same label is provided to both ``class_examples`` and ``kwds``, the examples given to the reference in ``kwds`` will prevail. :param class_examples: Dictionary mapping class labels to iterables of DescriptorElement training examples. :type class_examples: dict[collections.Hashable, collections.Iterable[smqtk.representation.DescriptorElement]] :param kwds: Keyword assignment of labels to iterables of DescriptorElement training examples. :type kwds: dict[str, collections.Iterable[smqtk.representation.DescriptorElement]] :raises ValueError: There were no class examples provided. :raises ValueError: Less than 2 classes were given. :raises RuntimeError: A model already exists in this instance.Following through with training would overwrite this model. Throwing an exception for information protection. """ class_examples = \ super(LibSvmClassifier, self).train(class_examples, **kwds) # Offset from 0 for positive class labels to use # - not using label of 0 because we think libSVM wants positive labels CLASS_LABEL_OFFSET = 1 # Stuff for debug reporting etm_ri = None param_debug = {'-q': ''} if self._log.getEffectiveLevel() <= logging.DEBUG: etm_ri = 1.0 param_debug = {} # Form libSVM problem input values self._log.debug("Formatting problem input") train_labels = [] train_vectors = [] train_group_sizes = [] # number of examples per class self.svm_label_map = {} # Making SVM label assignment deterministic to alphabetic order for i, l in enumerate(sorted(class_examples), CLASS_LABEL_OFFSET): # Map integer SVM label to semantic label self.svm_label_map[i] = l self._log.debug('-- class %d (%s)', i, l) # requires a sequence, so making the iterable ``g`` a tuple g = class_examples[l] if not isinstance(g, collections.Sequence): g = tuple(g) train_group_sizes.append(float(len(g))) x = elements_to_matrix(g, report_interval=etm_ri) x = self._norm_vector(x) train_labels.extend([i] * x.shape[0]) train_vectors.extend(x.tolist()) del g, x assert len(train_labels) == len(train_vectors), \ "Count miss-match between parallel labels and descriptor vectors" \ "being sent to libSVM (%d != %d)" \ % (len(train_labels), len(train_vectors)) self._log.debug("Forming train params") #: :type: dict params = deepcopy(self.train_params) params.update(param_debug) # Calculating class weights for C-SVC SVM if '-s' not in params or int(params['-s']) == 0: total_examples = sum(train_group_sizes) for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET): # weight is the ratio of between number of other-class examples # to the number of examples in this class. other_class_examples = total_examples - n w = max(1.0, other_class_examples / float(n)) params['-w' + str(i)] = w self._log.debug("-- class '%s' weight: %s", self.svm_label_map[i], w) self._log.debug("Making parameters obj") svm_params = svmutil.svm_parameter(self._gen_param_string(params)) self._log.debug("Creating SVM problem") svm_problem = svm.svm_problem(train_labels, train_vectors) self._log.debug("Training SVM model") self.svm_model = svmutil.svm_train(svm_problem, svm_params) self._log.debug("Training SVM model -- Done") if self.svm_label_map_fp: self._log.debug("saving file -- labels -- %s", self.svm_label_map_fp) with open(self.svm_label_map_fp, 'wb') as f: cPickle.dump(self.svm_label_map, f, -1) if self.svm_model_fp: self._log.debug("saving file -- model -- %s", self.svm_model_fp) svmutil.svm_save_model(self.svm_model_fp, self.svm_model)
def saveModel(self, fname): svm_save_model( fname, self._model )
p_label_train, p_acc_train, p_val_train = svmutil.svm_predict(y[:i], x[:i], m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y[i:], x[i:], m) print p_acc_train[0], "\t", p_acc_validation[0], "\n" training_examples.append(i) train_accuracy.append(p_acc_train[0]) validation_accuracy.append(p_acc_validation[0]) return training_examples, train_accuracy, validation_accuracy def get_cross_val(x, y, x_val, y_val, gamma_c): prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m) return p_acc_validation[0] if __name__ == '__main__': y, x = svmutil.svm_read_problem("char_recon_shuffled.db") gamma = 1.0 / (2.0 * (3.0 ** 7) ** 2) C = 3.0 ** 3.0 prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(C, gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m)
def train(self, positive_classes, negatives): """ Train the supervised SVM classifier model. The class label ``negative`` is reserved for the negative class. If a model is already loaded, we will raise an exception in order to prevent accidental overwrite. NOTE: This abstract method provides generalized error checking and should be called via ``super`` in implementing methods. :param positive_classes: Dictionary mapping positive class labels to iterables of DescriptorElement training examples. :type positive_classes: dict[collections.Hashable, collections.Iterable[smqtk.representation.DescriptorElement]] :param negatives: Iterable of negative DescriptorElement examples. :type negatives: collections.Iterable[smqtk.representation.DescriptorElement] :raises ValueError: The ``negative`` label was found in the ``positive_classes`` dictionary. This is reserved for the negative example class. :raises ValueError: There were no positive or negative examples. :raises RuntimeError: A model already exists in this instance.Following through with training would overwrite this model. Throwing an exception for information protection. """ super(LibSvmClassifier, self).train(positive_classes, negatives) # Offset from 0 for positive class labels to use # - not using label of 0 because we think libSVM wants positive labels CLASS_LABEL_OFFSET = 1 # Stuff for debug reporting etm_ri = None param_debug = {"-q": ""} if self._log.getEffectiveLevel() <= logging.DEBUG: etm_ri = 1.0 param_debug = {} # Form libSVM problem input values self._log.debug("Formatting problem input") train_labels = [] train_vectors = [] train_group_sizes = [] self.svm_label_map = {} # Making SVM label assignment deterministic to alphabetic order for i, l in enumerate(sorted(positive_classes), CLASS_LABEL_OFFSET): # Map integer SVM label to semantic label self.svm_label_map[i] = l self._log.debug("-- class %d (%s)", i, l) # requires a sequence, so making the iterable ``g`` a tuple g = positive_classes[l] if not isinstance(g, collections.Sequence): g = tuple(g) train_group_sizes.append(float(len(g))) x = elements_to_matrix(g, report_interval=etm_ri) x = self._norm_vector(x) train_labels.extend([i] * x.shape[0]) train_vectors.extend(x.tolist()) del g, x self._log.debug("-- negatives (-1)") # Map integer SVM label to semantic label self.svm_label_map[-1] = self.NEGATIVE_LABEL # requires a sequence, so making the iterable ``negatives`` a tuple if not isinstance(negatives, collections.Sequence): negatives = tuple(negatives) negatives_size = float(len(negatives)) x = elements_to_matrix(negatives, report_interval=etm_ri) x = self._norm_vector(x) train_labels.extend([-1] * x.shape[0]) train_vectors.extend(x.tolist()) del negatives, x self._log.debug( "Training elements: %d labels, %d vectors " "(should be the same)", len(train_labels), len(train_vectors) ) self._log.debug("Forming train params") #: :type: dict params = deepcopy(self.train_params) params.update(param_debug) # Only need to calculate positive class weights when C-SVC type if "-s" not in params or int(params["-s"]) == 0: for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET): params["-w" + str(i)] = max(1.0, negatives_size / float(n)) self._log.debug("Making parameters obj") svm_params = svmutil.svm_parameter(self._gen_param_string(params)) self._log.debug("Creating SVM problem") svm_problem = svm.svm_problem(train_labels, train_vectors) self._log.debug("Training SVM model") self.svm_model = svmutil.svm_train(svm_problem, svm_params) self._log.debug("Training SVM model -- Done") if self.svm_label_map_fp: self._log.debug("saving file -- labels -- %s", self.svm_label_map_fp) with open(self.svm_label_map_fp, "wb") as f: cPickle.dump(self.svm_label_map, f) if self.svm_model_fp: self._log.debug("saving file -- model -- %s", self.svm_model_fp) svmutil.svm_save_model(self.svm_model_fp, self.svm_model)
def save_new_model(filename): mapped,labels,range,mapping = training_data_from_matchvotes() model = train_classifier(mapped,labels) svmutil.svm_save_model(filename+".model",model) pickle.dump(range,file(filename+".range","w")) pickle.dump(mapping,file(filename+".mapping","w"))
#!/usr/bin/env python if __name__ == '__main__': import numpy as np # svm 3.20 import svmutil as SVM from os.path import isfile from itertools import izip from final_utils import read_hwfile # initialize data datTrn, labTrn, nTrn = read_hwfile('ml14fall_train_align.dat.hog.dat', 169) datTst, labTst, nTst = read_hwfile('ml14fall_test1_no_answer_align.dat.hog.dat', 169) save_model_name = 'svm_train.model' if isfile(save_model_name): model = SVM.svm_load_model(save_model_name) else: param = SVM.svm_parameter('-t 2 -c 5 -g 2 -h 0') problem = SVM.svm_problem(labTrn.tolist(), datTrn.tolist()) model = SVM.svm_train(problem, param) SVM.svm_save_model(save_model_name, model) p_label, p_acc, p_val = SVM.svm_predict(labTst.tolist(), datTst.tolist(), model) with open("result.txt", 'w') as fp: for label in p_label: fp.write('{:d}\n'.format(int(label)))
def main(): dpark_ctx = dpark.DparkContext('mesos') assert os.path.isdir(BASE_PATH) and os.path.isdir(MODEL_PATH) # Read the weights and bias of SDAE from MODEL_PATH W, b = load_ae(MODEL_PATH) # SVM print 'Will adopt layer No. %i' % FEA_LAYER lyr = W.keys() lyr.sort() lyr_last = lyr[FEA_LAYER] lyr = lyr[:FEA_LAYER] lyr.append(lyr_last) # SVM training and validating data svm_data_tr, svm_label_tr, svm_data_va, svm_label_va = \ load_svm_tr_va_data(TRAIN_DATA_PATH, dpark_ctx, (lyr, W, b)) # SVM testing data svm_data_te, svm_label_te = load_svm_te_data(TEST_DATA_PATH, dpark_ctx, (lyr, W, b)) # Process data, view the GID distribution in tr or te sets print 'Processing data here.' GID_adjust = range(len(GID)) # GID adjust to 0-13 print '=' * 100 print 'Training data distributions:' tr_hist = data_dist(GID_adjust, svm_label_tr) print '=' * 100 print 'Testing data distributions:' te_hist = data_dist(GID_adjust, svm_label_te) print '=' * 100 print 'Validation data distributions:' va_hist = data_dist(GID_adjust, svm_label_va) # Binary Test on two classes in validation set if BINARY_TEST: print 'Doing a binary class test using validation set' lbl1 = 7 lbl2 = 11 svm_data_va_bin = [] svm_label_va_bin = [] for lbl_elem, data_elem in zip(svm_label_va, svm_data_va): if lbl_elem == lbl1: svm_label_va_bin.append(-1) svm_data_va_bin.append(data_elem) elif lbl_elem == lbl2: svm_label_va_bin.append(1) svm_data_va_bin.append(data_elem) else: pass print 'Binary classes data was prepared.' for svm_c in [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1e6, 1e7, 1e8]: svm_opt = '-c ' + str(svm_c) + ' -w-1 3 -w1 2 -v 5 -q' svm_model = svm.svm_train(svm_label_va_bin, svm_data_va_bin, svm_opt) # Cross Validation on whole validation set elif CROSS_VALIDATION: print 'SVM model starts cross validating.' for svm_c in [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]: svm_opt = '-c ' + str(svm_c) + ' ' for gid_elem, va_hist_elem in zip(GID_adjust, va_hist): wgt_tmp = max(va_hist) / float(va_hist_elem) ''' if wgt_tmp < 3.0: wgt = 1 elif wgt_tmp < 10: wgt = 4 elif wgt_tmp < 40: wgt = 16 else: wgt = 32 ''' if wgt_tmp < 10.0: wgt = int(wgt_tmp) elif wgt_tmp < 40: wgt = 16 else: wgt = 32 svm_opt += ('-w' + str(gid_elem) + ' ' + str(wgt) + ' ') svm_opt += '-v 5 -q' print svm_opt svm_model = svm.svm_train(svm_label_va, svm_data_va, svm_opt) # SVM running on whole Training / Testing sets else: fn_svm = 'svm_model_c1_wgt' if SAVE_OR_LOAD: # True print 'SVM model starts training.' svm_opt = '-c 1 ' for gid_elem, tr_hist_elem in zip(GID_adjust, tr_hist): wgt_tmp = max(tr_hist) / float(tr_hist_elem) if wgt_tmp < 3.0: wgt = 1 elif wgt_tmp < 10: wgt = 2 elif wgt_tmp < 40: wgt = 4 else: wgt = 8 svm_opt += ('-w' + str(gid_elem) + ' ' + str(wgt) + ' ') print svm_opt svm_model = svm.svm_train(svm_label_tr, svm_data_tr, svm_opt) # save SVM model svm.svm_save_model(fn_svm, svm_model) else: # False print 'SVM model loading.' # load SVM model svm_model = svm.svm_load_model(fn_svm) print 'SVM model training or loading done' p_label, p_acc, p_val = svm.svm_predict(svm_label_te, svm_data_te, svm_model) fid = open('res_tmp.pkl', 'wb') pickle.dump((p_label, p_acc, p_val), fid) fid.close()