def self_training2(X, y, X_unLabeled, param, th): model = svmutil.svm_train(svmutil.svm_problem(x=X.tolist(), y=y.tolist()), param) obj = model.get_objective_value()[0] itr_num = 0 while True: predicted_labels = np.array(svmutil.svm_predict(x=X_unLabeled.tolist(), y=[1]*len(X_unLabeled), m=model, options="-q")[0]) model = svmutil.svm_train(svmutil.svm_problem(x=np.append(X, X_unLabeled, axis=0).tolist(), y=np.append(y, predicted_labels).tolist()), param) obj_new = model.get_objective_value()[0] itr_num += 1 if abs(obj_new - obj) < th: break else: obj = obj_new y_unlabeled = ma.array(data=np.array(svmutil.svm_predict(x=X_unLabeled.tolist(), y=[1]*len(X_unLabeled), m=model, options="-q")[0]), mask=[True]*len(X_unLabeled)) return model, y_unlabeled, obj_new, itr_num
def gridSearch(x, y, kernel='rbf'): kernel_type = { 'linear': '0', 'polynomial': '1', 'rbf': '2', 'self-defined': '4' } C = [1, 10, 20, 30] gamma = [0.2, 0.1, 0.05, 0.01] degree = [2, 3, 4, 5] coef0 = [0, 5, 10] max_acc = 0.0 opt = '-q -s 0 -v 5 ' if (kernel == 'linear'): best_param = (C[0]) for c in C: # print('C = {}'.format(c)) acc = svm_train(y, x, opt + '-t {} -c {}'.format(kernel_type[kernel], c)) if (acc > max_acc): max_acc = acc best_param = (c) elif (kernel == 'polynomial'): best_param = (C[0], gamma[0], coef0[0], degree[0]) for c in C: for g in gamma: for d in degree: for r in coef0: # print('C = {}, gamma = {}, degree = {}, coef0 = {}'.format(c, g, d, r)) acc = svm_train( y, x, opt + '-t {} -c {} -g {} -d {} -r {}'.format( kernel_type[kernel], c, g, d, r)) if (acc > max_acc): max_acc = acc best_param = (c, g, d, r) elif (kernel == 'rbf'): best_param = (C[0], gamma[0]) for c in C: for g in gamma: # print('C = {}, gamma = {}'.format(c, g)) acc = svm_train( y, x, '-q -s 0 -v 5 -t {} -c {} -g {}'.format( kernel_type[kernel], c, g)) if (acc > max_acc): max_acc = acc best_param = (c, g) return best_param
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_SKLEARN: return if not _HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem( scikit_data["target"] > scikit_data["target"].mean(), scikit_data["data"].tolist(), ) param = svmutil.svm_parameter() param.svm_type = svmutil.C_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 libsvm_model = svmutil.svm_train(prob, param) libsvm_spec = libsvm_converter.convert( libsvm_model, scikit_data.feature_names, "target" ).get_spec() # Save the data and the model self.scikit_data = scikit_data self.libsvm_spec = libsvm_spec
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_LIBSVM: # setUpClass is still called even if class is skipped. return # Generate some random data. # This unit test should not rely on scikit learn for test data. self.x, self.y = [], [] random.seed(42) for _ in range(50): self.x.append([random.gauss(200, 30), random.gauss(-100, 22)]) self.y.append(random.choice([1, 2])) self.y[0] = 1 # Make sure 1 is always the first label it sees self.y[1] = 2 self.column_names = ["x1", "x2"] self.prob = svmutil.svm_problem(self.y, self.x) param = svmutil.svm_parameter() param.svm_type = svmutil.NU_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 param.probability = 1 # Save the data and the model self.libsvm_model = svmutil.svm_train(self.prob, param) self.df = pd.DataFrame(self.x, columns=self.column_names)
def train_svm(train_features, train_labels, arguments='-s 0 -t 0'): # -c param 10-4, 10+4 print "SVM is being trained" svm = svm_train(train_labels, train_features, arguments) print "SVM is trained" return svm
def test_convert_svmc(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmSvmc", [('input', FloatTensorType())]) self.assertTrue(node is not None) dump_data_and_model(X[:5].astype(numpy.float32), SkAPIClProba2(libsvm_model), node, basename="LibSvmSvmc-Dec2")
def test_libsvm_trains_correctly(heart_scale): y, x = heart_scale m = svm_train(y[:200], x[:200], '-c 4') p_label, p_acc, p_val = svm_predict(y[200:], x[200:], m) assert p_acc == pytest.approx( [84.28571428571429, 0.6285714285714286, 0.463744141163496]) assert p_label[:6] == [-1.0, 1.0, 1.0, -1.0, 1.0, -1.0]
def test_convert_nusvmc(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = NuSVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmNuSvmc", [('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(node is not None) dump_data_and_model( X[:5].astype(numpy.float32), SkAPIClProba2(libsvm_model), node, basename="LibSvmNuSvmc-Dec2", allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
def train(data, path): if os.path.exists(path): print 'Model path exists, do nothing.' return print "Loading features." integerizer = tools.integerization.CIntegerization() labels = [] samples = [] for sent in io.getsent(data): for index in range(len(sent)): f = feature.extractFeatures(sent, index, integerizer) x = int(sent[index][2]) assert x == 0 or x == 1 if x == 0: x = -1 labels.append(x) samples.append(f) print "Training SVM." problem = svm.svm_problem(labels, samples) param = svm.svm_parameter() param.svm_type = svm.C_SVC param.kernel_type = svm.LINEAR # param.C = 1 #param.degree=2 param.eps = 1.0 param.probability = 1 param.cache_size = 1000 param.shrinking = 0 model = svmutil.svm_train(problem, param) print "Saving model." os.mkdir(path) svmutil.svm_save_model(os.path.join(path, "scr"), model) integerizer.write(os.path.join(path, "int"))
def test_convert_svmc_raw(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 0 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) # known svm runtime dimension error in ONNX Runtime node = convert(libsvm_model, "LibSvmSvmcRaw", [('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(node is not None) dump_data_and_model( X[:5].astype(numpy.float32), SkAPICl(libsvm_model), node, basename="LibSvmSvmcRaw", allow_failure= "StrictVersion(onnxruntime.__version__) < StrictVersion('0.5.0')")
def get_SVM_trained_classifer(self, training_datafile, classifier_dumpfile): # read all tweets and labels tweet_items = common.get_filtered_training_data(training_datafile) tweets = [] for (words, sentiment) in tweet_items: words_filtered = [ e.lower() for e in words.split() if (common.is_ascii(e)) ] tweets.append((words_filtered, sentiment)) results = helper.get_SVM_feature_vector_and_labels( self.feature_list, tweets) self.feature_vectors = results['feature_vector'] self.labels = results['labels'] problem = svm_problem(self.labels, self.feature_vectors) # '-q' option suppress console output param = svm_parameter('-q') param.kernel_type = LINEAR # param.show() classifier = svm_train(problem, param) svm_save_model(classifier_dumpfile, classifier) return classifier
def test_convert_svmc_linear_raw_multi(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[-5:] = 3 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.LINEAR param.eps = 1 param.probability = 0 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmNuSvmcMultiRaw", [('input', FloatTensorType(shape=['None', 2]))]) self.assertTrue(node is not None) X2 = numpy.vstack([X[:2], X[60:62], X[110:112], X[147:149]]) # 5x0, 5x1 dump_data_and_model( X2.astype(numpy.float32), SkAPICl(libsvm_model), node, basename="LibSvmSvmcRaw-Dec3", verbose=False, allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
def train_impl(self): labels = [] features = [] for labeled_feature in self.get_labeled_features(): labels.append(labeled_feature.label) features.append(labeled_feature.feature) options = self.get_train_options() self._model = svm_train(labels, features, options)
def train_impl( self ): labels = [] features = [] for labeled_feature in self.get_labeled_features( ): labels.append(labeled_feature.label) features.append(labeled_feature.feature) options = self.get_train_options() self._model = svm_train( labels, features, options )
def generate_svm_model(rows, param, kmers=[1,2,3]): y, x = libsvm_generate_matrix(rows, kmers, dense=True) prob = svmutil.svm_problem(y, x) # s = 3 -- epsilon-SVR param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode param_str += " ".join(["-{} {}".format(k,v) for k,v in param.items()]) params = svmutil.svm_parameter(param_str) model = svmutil.svm_train(prob, params) return model
def _stop_training(self): super(LibSVMClassifier, self)._stop_training() self.normalizer = _LabelNormalizer(self.labels) labels = self.normalizer.normalize(self.labels.tolist()) features = self.data # Call svm training method. prob = libsvmutil.svm_problem(labels, features.tolist()) # Train self.model = libsvmutil.svm_train(prob, self.parameter)
def finishLearning(self): """ Method should be called to indicate that SVM learning is finished. When this happens we take all the learned values and feed them into a new SVM Model to train it for later inference. """ if self.__svmModel==None: #model = svm_train(y, x [, 'training_options']) if HMAX.DEBUG: print "creating svmModel..." self.__svmModel = svmutil.svm_train(self.classes, self.learned, "-q -b 1") if HMAX.DEBUG: print "svmModel successfully trained"
def finishLearning(self): """ Method should be called to indicate that SVM learning is finished. When this happens we take all the learned values and feed them into a new SVM Model to train it for later inference. """ if self.__svmModel == None: #model = svm_train(y, x [, 'training_options']) if HMAX.DEBUG: print "creating svmModel..." self.__svmModel = svmutil.svm_train(self.classes, self.learned, "-q -b 1") if HMAX.DEBUG: print "svmModel successfully trained"
def train_and_predict(x, y, kernel, option=''): kernel_type = { 'linear': '0', 'polynomial': '1', 'rbf': '2', 'sigmoid': '3', 'self-defined': '4' } x_train, x_test = x y_train, y_test = y param = '-q -t ' + kernel_type[ kernel] + option # -q: suppress the output in libsvm m = svm_train(y_train, x_train, param) pred, pred_acc, pred_val = svm_predict(y_test, x_test, m)
def train(self, input_data_path, params="-t 0 -c 4 -b 1", is_eval=True): with commons.PhaseLogger("LIBSVM.train.read_problem"): Y, X = svmutil.svm_read_problem(input_data_path + "/Train.txt") prob = svmutil.svm_problem(Y, X) #Y, X = svmutil.svm_read_problem(input_data_path + "\\Train.txt") self._params = svmutil.svm_parameter(params) with commons.PhaseLogger("LIBSVM.train.svm_train"): self._model = svmutil.svm_train(prob, self._params) self._init = True if is_eval is True: p_labels, p_acc, p_vals = svmutil.svm_predict(Y, X, self._model) acc, mse, _ = p_acc logging.info("[%s]: train with Acc[%.4f] Mse[%.4f]" % (self._get_class_name(), acc, mse))
def train(self, labeled_featuresets): """ Trains the classifier on the specified training set. Multiple calls to this method invalidates the previous ones. The labeled_featuresets parameter must have the format [([feature], label)] """ self.__features_ids = {} self.__last_feature_id = 0 self.__labels_ids = {} self.__labels = [] y, x = self.__adapt_labeled_featuresets(labeled_featuresets) prob = svm_problem(y, x) param = svm_parameter('-c 1000 -q') self.__model = svm_train(prob, param)
def predict(tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr): data_arr = data_format(tr_data_arr) prob = svm_problem(tr_label_arr, data_arr) # 以下参数c和g通过交叉验证得到 param = svm_parameter('-c 2048.0 -g 0.001953125') svm_model = svm_train(prob, param) pred_data_arr = data_format(pred_data_arr) pred_data_len = len(pred_label_arr) wrong = 0 for idx, data in enumerate(pred_data_arr): p_label, p_acc, p_val = svm_predict( [pred_label_arr[idx]], [data], svm_model) if int(p_label[0]) != int(pred_label_arr[idx]): wrong += 1 accuracy = (pred_data_len - wrong) * 100.0 / pred_data_len return pred_data_len, wrong, accuracy
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not _HAS_SKLEARN: return if not _HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem(scikit_data["target"], scikit_data["data"].tolist()) param = svmutil.svm_parameter() param.svm_type = svmutil.NU_SVR param.kernel_type = svmutil.LINEAR param.eps = 1 self.libsvm_model = svmutil.svm_train(prob, param)
def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # Convert data to lists for libsvm. class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) samples = class_1 + class_2 problem = svmutil.svm_problem(labels, samples) # Don't print to stdout, use radial basis functions. param = svmutil.svm_parameter('-q -t 2') model = svmutil.svm_train(problem, param) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) if check: # Sadly, this prints to stdout too :-/ svmutil.svm_predict(labels, class_1 + class_2, model) # Prints accuracy. if draw: def classify(x, y, model=model): return array( svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0]) imtools.plot_2d_boundary( [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1]) show()
def svr_training(X_features,Y_classes,classes=[],output='svr_',training_options = '-s 3 -t 0 -b 1'): """ Configure multiple SV Machines based on a one-against-all (1AA) approach Input: - X_features ndarray(float) : Array of instance features (instances on rows) - Y_classes ndarray(int) : Array of classes identification - classes ['int'] : Classes to be used for the 1AA approach Output: - model_classes [] : SVM models for classes given in 'classes' --- """ model_classes = [] training_options = '-s 3 -t 0 -b 1' diro = 'models/' try: os.mkdir(diro) except: pass; for i_class in classes: classe = 'class'+str(i_class) this_class_indx, other_class_indx = sample_selection(Y_classes,i_class) X = X_features[np.concatenate((this_class_indx,other_class_indx))] Y = np.zeros((len(X),1)) Y[:len(this_class_indx)] = 1 Y[len(this_class_indx):] = -1 Y_list,X_list = _convert_arrays2lists(Y,X) model_classes.append(svm_train(Y_list,X_list,training_options)) svm_save_model(diro+output+classe+'.model',model_classes[-1]) np.savetxt(output+classe+'_svr.dat',np.concatenate((Y,X),axis=1),fmt='%f') return model_classes
def computeLayer(self, layer): """ @param layer: the output HmaxLayer to store results in. """ LevelFilter.computeLayer(layer) #need to enable learning mode from UI (after S2 trained) #during training, need to pass in class labels vec = layer.array[:,0,0].tolist() if self.isLearning: if len(self.learned)>=100 and self.svmModel==None: #model = svm_train(y, x [, 'training_options']) self.svmModel = svmutil.svm_train(self.classes, self.learned) return self.classes.append(0) self.learned.append(vec) elif self.svmModel!=None: #p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options']) pLabs, pAcc, pVals = svmutil.svm_predict([0], vec, self.svmModel) print "SVM Result: ", pLabs, pAcc, pVals
def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # Convert data to lists for libsvm. class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) samples = class_1 + class_2 problem = svmutil.svm_problem(labels, samples) # Don't print to stdout, use radial basis functions. param = svmutil.svm_parameter('-q -t 2') model = svmutil.svm_train(problem, param) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) if check: # Sadly, this prints to stdout too :-/ svmutil.svm_predict(labels, class_1 + class_2, model) # Prints accuracy. if draw: def classify(x, y, model=model): return array(svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0]) imtools.plot_2d_boundary( [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1]) show()
def test_convert_svmr_linear(self): iris = load_iris() X = iris.data[:, :2] y = iris.target prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVR param.kernel_type = svmutil.LINEAR param.eps = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmSvmrLinear", [('input', FloatTensorType())]) self.assertTrue(node is not None) dump_data_and_model(X[:5].astype(numpy.float32), SkAPIReg(libsvm_model), node, basename="LibSvmSvmrLinear-Dec3")
def run_kfold(param_dict, rows, numfold, kmers=[1,2,3]): """ Run k KFold Args: param_dict: dictionary mapping param string to its value rows: input rows numfold: k for cross validation kmers: list of kmers, default [1,2,3] Return: dictionary of model performance (SCC, MSE) if benchmark is True, else return predictions for each fold """ kf = KFold(numfold, shuffle=True) splitted = kf.split(rows) param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode param_str += " ".join(["-{} {}".format(k,v) for k,v in param_dict.items()]) params = svmutil.svm_parameter(param_str) foldidx = 1 fold_results = [] for train_idx, test_idx in splitted: train_list = [rows[i] for i in train_idx] test_list = [rows[i] for i in test_idx] y_train, x_train = libsvm_generate_matrix(train_list, kmers) y_test, x_test = libsvm_generate_matrix(test_list, kmers) train_prob = svmutil.svm_problem(y_train, x_train) model = svmutil.svm_train(train_prob, params) #svmutil.svm_save_model('model_name.model', m) # y is only needed when we need the model performance svmpred = svmutil.svm_predict(y_test, x_test, model, options="-q") fold_results.append({"test":test_list, "svmpred":svmpred}) return fold_results
def train(data, path): if os.path.exists(path): print 'Model path exists, do nothing.' return print "Loading features." integerizer = tools.integerization.CIntegerization() labels = [] samples = [] for classid, feature in extractfeat.extractFeat(data, integerizer): labels.append(classid) samples.append(feature) print "Training SVM." problem = svm.svm_problem(labels, samples) param = svm.svm_parameter() param.kernel_type = svm.LINEAR# param.C=1 #param.degree=2 param.eps=1 param.probability=1 model = svmutil.svm_train(problem, param) print "Saving model." os.mkdir(path) svmutil.svm_save_model(os.path.join(path, "scr"), model) integerizer.write(os.path.join(path, "int"))
def train(c, train_x, train_y): params = f'-h 0 -s 0 -t 4 -c {c} -q' return svmutil.svm_train(train_y, train_x, params)
def Training(self, kernel, target, data, IsKernel=False): problem = svm.svm_problem(target, data, isKernel=IsKernel) parameter = svm.svm_parameter(kernel) model = svmutil.svm_train(problem, parameter) return model
sum1+=model_summary.wv[tok[0]] # print(sum1) sum1 = sum1/len(headlines[h]) # print(sum1) sum1 = np.concatenate((sum1,cat_vec[combine_f["Category"][h]],cat_vec[combine_f["Post_Type"][h]], arrays[h])) head_vec.append(sum1) head_vec = np.array(head_vec) feat_len = int(0.8*len(head_vec)) train = head_vec[:feat_len] res_train = combine_f["class"][:feat_len].tolist() test = head_vec[feat_len:] res_test = combine_f["class"][feat_len:].tolist() print(len(test), len(res_test)) model_svm = svmutil.svm_train(res_train, train,'-t 2 -c 2') y = svmutil.svm_predict(res_test, test, model_svm) confusion_matrix(res_test, y[0], labels=[0,1]) tn, fp, fn, tp = confusion_matrix(res_test, y[0], labels=[0,1]).ravel() recall = tp/(tp+fn) recall precision = tp/(tp+fp) precision f1 = precision*recall*2/(precision+recall) f1
from train import * from tools import * from libsvm.svmutil import svm_train, svm_save_model from ml_ops import search, train, bootstrap import argparse, sys, datetime modes = ['train', 'bootstrap', 'search'] parser = argparse.ArgumentParser() parser.add_argument("-m", "--mode", help="specify the mode [-m train|bootstrap|search]", required=True, choices=modes) args = parser.parse_args() if args.mode == 'train': hists, labels = train() svm = svm_train(labels, hists, '-s 0 -t 0 -c 1') model_name = 'svm_trained_' + str(datetime.datetime.now()).replace(' ', '_') + '.dat' svm_save_model(model_name, svm) if args.mode == 'bootstrap': hists, labels = train() hists, labels = bootstrap(hists, labels) svm = svm_train(labels, hists, '-s 0 -t 0 -c 1') model_name = 'svm_bootstrapped_' + str(datetime.datetime.now()).replace(' ', '_') + '.dat' svm_save_model(model_name, svm) if args.mode == 'search': search()
f.close() train_data = train_true[::] train_data.extend(train_fake) tmp = list() for li in train_data: tmp.append(dict(enumerate(li))) train_data = tmp train_label = [1 for i in range(len(train_true)) ] + [-1 for i in range(len(train_fake))] eva_data = eva_true[::] eva_data.extend(eva_fake) tmp = list() for li in eva_data: tmp.append(dict(enumerate(li))) eva_data = tmp eva_label = [1 for i in range(len(eva_true)) ] + [-1 for i in range(len(eva_fake))] model = svmutil.svm_train([1 for i in range(len(train_true))] + [-1 for i in range(len(train_fake))], train_data, '-c 0.03125 -g 0.25') print type(model) p_label, p_acc, p_val = svmutil.svm_predict([1 for i in range(len(eva_true))] + [-1 for i in range(len(eva_fake))], eva_data, model) print p_acc
def train(y, x): train_len = int(1.0 * len(y)) prob = svmutil.svm_problem(y[:train_len], x[:train_len]) param = svmutil.svm_parameter('-t 2 -c 4 -b 1 -e 1e-12') m = svmutil.svm_train(prob, param) return m
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'Bayes Accuracy:', acc print_confusion(res, test_labels, classnames) # FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced # enough. Probably some float underflow due to things not using log # probabilities? # Test SVM. features = map(list, features) test_features = map(list, test_features) str_int_map = {} # libSVM needs int labels. for i, c in enumerate(classnames): str_int_map[c], str_int_map[i] = i, c def convert_labels(labels, str_int_map): return [str_int_map[l] for l in labels] problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features) # Use a linear kernel, radial basis functions have horrible results (~20% acc) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) res = svmutil.svm_predict(convert_labels(test_labels, str_int_map), test_features, model)[0] res = convert_labels(res, str_int_map) acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'SVM Accuracy:', acc print_confusion(res, test_labels, classnames)
################################################################################ ## Train a SVM classification model # print("Fitting the classifier to the training set") param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } prob = svm_problem(y_train,X_train_pca) param = svm_parameter("-q") param.kernel='rbf' #param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], # 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } param.C=32 param.gamma=0.0001 print("Pass") #parameters = GridSearchCV( param_grid) model= svm_train(prob,param) #clf = clf.fit(X_train_pca, y_train) #print("Best estimator found by grid search:") #print(m.best_estimator_) y_pred, pred_acc, pred_val = svm_predict(y_test,X_test_pca,model) ################################################################################ ## Quantitative evaluation of the model quality on the test set print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) print("Predicting people's names on the test set")
def train(): (data, chords) = load_data() print len(data) model = svmutil.svm_train(chords, data) svmutil.svm_save_model(model_file, model)
import os from libsvm import svmutil import ocr OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/' features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training')) test_features, test_labels = \ ocr.load_ocr_data(os.path.join(OCR_PATH, 'testing')) features = map(list, features) test_features = map(list, test_features) problem = svmutil.svm_problem(labels, features) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) print 'Training data fit:' svmutil.svm_predict(labels, features, model) print 'Testing data fit:' svmutil.svm_predict(test_labels, test_features, model)