def main() -> int: """Train each classifier using all the training data and run on test data.""" y_train, x_train = libsvm_commonutil.svm_read_problem(TRAIN_DATA_PATH) y_test, x_test = libsvm_commonutil.svm_read_problem(TEST_DATA_PATH) trained_linear_kernel = libsvm_svmutil.svm_train(y_train, x_train, "-t 0") _, (train_linear_accuracy, _, _), _ = libsvm_svmutil.svm_predict(y_train, x_train, trained_linear_kernel) _, (test_linear_accuracy, _, _), _ = libsvm_svmutil.svm_predict(y_test, x_test, trained_linear_kernel) trained_polynomial_kernel = libsvm_svmutil.svm_train( y_train, x_train, "-t 1 -d 5") _, (train_polynomial_accuracy, _, _), _ = libsvm_svmutil.svm_predict(y_train, x_train, trained_polynomial_kernel) _, (test_polynomial_accuracy, _, _), _ = libsvm_svmutil.svm_predict(y_test, x_test, trained_polynomial_kernel) print(f"Linear Training Accuracy: {train_linear_accuracy}%") print(f"Linear Test Accuracy: {test_linear_accuracy}%") print(f"Polynomial Training Accuracy: {train_polynomial_accuracy}%") print(f"Polynomial Test Accuracy: {test_polynomial_accuracy}%") return EXIT_CODE_SUCCESS
def TrainSvmLinear(Y, X, sweep_c=range(-2,8)): num_positives = float(Y.count(1)) num_negatives = float(Y.count(-1)) best_c = -1 best_acc = -1 for c_pow in sweep_c: current_c = np.power(2.0,c_pow) prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-v 5 -t 0 -c %f -w-1 %f -w1 %f -q' % (current_c, 100/num_negatives, 100/num_positives)) current_acc = svm.svm_train(prob, param) print '%f, %f' % (current_c, current_acc) if best_acc < current_acc: best_acc = current_acc best_c = current_c # recompute accuracy param = svm.svm_parameter('-t 0 -c %f -w-1 %f -w1 %f -q' % (best_c, 100/num_negatives, 100/num_positives)) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y, X, svm_model, '-q') prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-t 0 -c %f -w-1 %f -w1 %f -q' % (best_c, 100/num_negatives, 100/num_positives)) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y, X, svm_model, '-q') pdb.set_trace() return svm_model
def part_d(): print("\n--- Part D ---\n") print("Reading Data") train_y, train_x = read_data("train") test_y, test_x = read_data("test") print("Normalizing") train_x = normalize(train_x) test_x = normalize(test_x) problem = svm_problem(train_y, train_x) params = "-q -s 0 -t 2 -g 0.05" results = [] for c in [10**-5, 10**-3, 1, 5, 10]: c = " -c %f " % c print("10-fold CV using" + c) cv_acc = svm_train(problem, params + c + "-v 10") print("On test data using" + c) model = svm_train(problem, params + c) _, test_acc, _ = svm_predict(test_y, test_x, model) print("C, Accuracy: ", c, cv_acc, test_acc) results.append((c, cv_acc, test_acc[0]))
def part_c(): print("\n--- Part C ---\n") print("Reading Data") train_y, train_x = read_data("train") test_y, test_x = read_data("test") print("Normalizing") train_x = normalize(train_x).tolist() test_x = normalize(test_x).tolist() problem = svm_problem(train_y, train_x) params = svm_parameter("-q -s 0 -c 1") # Timing calculations print("Training SVM (linear kernel)") params.parse_options("-t 0") model = svm_train(problem, params) _, p_acc, _ = svm_predict(test_y, test_x, model) print("Accuracy: ", p_acc) print("Training SVM (gaussian kernel)") params.parse_options("-t 2 -g 0.05") model = svm_train(problem, params) _, p_acc, _ = svm_predict(test_y, test_x, model) print("Accuracy: ", p_acc)
def calculate_race(): correct = 0 answers = [] input = [] count = 0 for d in data: answers.append(question2b_race_truth.truth[count]) input.append(d) if count == 49: break count += 1 prob = svmutil.svm_problem(answers, input) param = svmutil.svm_parameter('-t 2 -c 4') param.cross_validation = 1 param.nr_fold = 10 cv = svmutil.svm_train(prob, param) param = svmutil.svm_parameter('-t 2 -c 4') m = svmutil.svm_train(prob, param) count = 0 for d in data: if count < 50: count += 1 continue else: x0, max_idx = gen_svm_nodearray(d) p = libsvm.svm_predict(m, x0) if p == question2b_race_truth.truth[count]: correct += 1 count += 1 return cv, correct / float(50) * 100
def main() -> int: """Use 10-fold cross-validation to predict test accuracy for each kernel.""" y, x = libsvm_commonutil.svm_read_problem(TRAIN_DATA_PATH) ten_fold_cross_validator = KFold(n_splits=10, shuffle=True) linear_accuracies = [] polynomial_accuracies = [] for train_index, test_index in ten_fold_cross_validator.split(x): x_test = np.array(x)[test_index] x_train = np.array(x)[train_index] y_test = np.array(y)[test_index] y_train = np.array(y)[train_index] # For reference on options used in training, refer to: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ trained_linear_kernel = libsvm_svmutil.svm_train(y_train, x_train, "-t 0") _, (linear_accuracy, _, _), _ = libsvm_svmutil.svm_predict(y_test, x_test, trained_linear_kernel) linear_accuracies.append(linear_accuracy) trained_polynomial_kernel = libsvm_svmutil.svm_train(y_train, x_train, "-t 1 -d 5") _, (polynomial_accuracy, _, _), _ = libsvm_svmutil.svm_predict(y_test, x_test, trained_polynomial_kernel) polynomial_accuracies.append(polynomial_accuracy) print(f"Linear Accuracies: {linear_accuracies}") print(f"Avg. Linear Accuracy: {sum(linear_accuracies) / len(linear_accuracies)}") print(f"Polynomial Accuracies: {polynomial_accuracies}") print(f"Avg. Polynomial Accuracy: {sum(polynomial_accuracies) / len(polynomial_accuracies)}") return EXIT_CODE_SUCCESS
def TrainSvmRbf(Y, X, sweep_c=range(-5,5), sweep_g=range(-5,5)): num_negatives = float(Y.count(-1)) num_positives = float(Y.count(1)) best_c = -1 best_g = -1 best_acc = -1 for c_pow in sweep_c: for g_pow in sweep_g: current_c = np.power(2.0,c_pow) current_g = np.power(2.0,g_pow) prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-v 5 -t 2 -c %f -g %f -w-1 %f -w1 %f -q' % (current_c, current_g, 100/num_negatives, 100/num_positives)) current_acc = svm.svm_train(prob, param) print 'c = %f, g = %f, cv acc = %f' % (current_c, current_g, current_acc) if best_acc < current_acc: best_acc = current_acc best_c = current_c best_g = current_g prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-t 2 -c %f -g %f -w-1 %f -w1 %f -q' % (best_c, best_g, 100/num_negatives, 100/num_positives)) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y, X, svm_model, '-q') pdb.set_trace() return svm_model
def main(): """Build representation from files.""" parser = argparse.ArgumentParser() parser.add_argument("-r", "--regen", help="increase output verbosity", action="store_true") args = parser.parse_args() if args.regen: train_class, train_frames = read_dir('dataset/train/*.txt', ' ') test_class, test_frames = read_dir('dataset/test/*.txt', ' ') rad_d1 = [get_rad(action) for action in train_frames] rad_d1_t = [get_rad(action) for action in test_frames] cust_d1 = [get_custom(action) for action in train_frames] cust_d1_t = [get_custom(action) for action in test_frames] with open('rad_d1', 'w') as f: f.writelines([' '.join(line) + '\n' for line in rad_d1]) with open('rad_d1.t', 'w') as f: f.writelines([' '.join(line) + '\n' for line in rad_d1_t]) with open('cust_d1', 'w') as f: f.writelines([' '.join(line) + '\n' for line in cust_d1]) with open('cust_d1.t', 'w') as f: f.writelines([' '.join(line) + '\n' for line in cust_d1_t]) rad_d2 = reformat(rad_d1, train_class) rad_d2_t = reformat(rad_d1_t, test_class) cust_d2 = reformat(cust_d1, train_class) cust_d2_t = reformat(cust_d1_t, test_class) with open('rad_d2', 'w') as f: f.writelines([' '.join(line) + '\n' for line in rad_d2]) with open('rad_d2.t', 'w') as f: f.writelines([' '.join(line) + '\n' for line in rad_d2_t]) with open('cust_d2', 'w') as f: f.writelines([' '.join(line) + '\n' for line in cust_d2]) with open('cust_d2.t', 'w') as f: f.writelines([' '.join(line) + '\n' for line in cust_d2_t]) # Train the models and test with them y, x = svm.svm_read_problem('rad_d2') y_t, x_t = svm.svm_read_problem('rad_d2.t') rad_model = svm.svm_train(y, x, '-s 0 -t 2 -c 2 -g 0.0005') rad_labels, (rad_acc, *_), _ = svm.svm_predict(y_t, x_t, rad_model) y, x = svm.svm_read_problem('cust_d2') y_t, x_t = svm.svm_read_problem('cust_d2.t') cust_model = svm.svm_train(y, x, '-s 0 -t 2 -c 8 -g 0.0005') cust_labels, (cust_acc, *_), _ = svm.svm_predict(y_t, x_t, cust_model) print(f'RAD accuracy: {rad_acc}') print(f'Custom accuracy: {cust_acc}')
def train(self): for i in range(4): self.convert() #rbf param1 = svmutil.svm_parameter("-t 2 -b 1 -c 1 -g 0.001") param2 = svmutil.svm_parameter("-t 2 -b 1 -c 0.1 -g 0.001") self.mr.append(svmutil.svm_train(self.problem[0], param1))#hist self.mr.append(svmutil.svm_train(self.problem[1], param2))#vector #linear param3 = svmutil.svm_parameter("-t 0 -b 1 -c 0.1") param4 = svmutil.svm_parameter("-t 0 -b 1 -c 0.01") self.ml.append(svmutil.svm_train(self.problem[0], param3))#hist self.ml.append(svmutil.svm_train(self.problem[1], param4))#vector self.images = self.images[1:]+self.images[:1]
def valid(self,datasets,opt,opp,method = fold,part_ids = None,seed = None,test_data = None): if seed is None: # If seed is not set. UNIX time is used as seed. seed = time.time() saving_seed = "%s/log/%s.log.seed" % (self._dir,self._name) with open(saving_seed,"w") as fp: # Save used seed value. fp.write("seed:%f\n" % seed) if part_ids is None: part_ids = datasets.pids groups = [(test,train) for test,train in method(part_ids,seed = seed)] for cnt,pdtsts in enumerate(groups): # cnt is number of cluster. if test_data is None: test = False ltest,dtest,itest = test2svm_prob(datasets.mkTest(pdtsts[0])) else: test = True ltest,dtest,itest = test2svm_prob(test_data.mkTest(test_data.pids)) print "start %s validation" % (cnt) ptrn,itrain = train2svm_prob(datasets.mkTrain(pdtsts[1])) #opt = svm.svm_parameter(opt) model = svmutil.svm_train(ptrn,opt) plbl,pacc,pval = svmutil.svm_predict(ltest,dtest,model,opp) # create saving direcotry #self._mkdir(cnt) # create log files self._save_log(itest,plbl,pval,cnt,test) model_name = "%s/model/%s.model.%s" % (self._dir,self._name,cnt)
def problem14(X_train, y_train): label = list(2 * (np.array(y_train) == 0) - 1) train = np.array(pd.DataFrame(X_train)) for t in range(-3, 2): c = 10**t model = svmutil.svm_train(label, X_train, '-g 80 -c %s' % c) sv_indices = model.get_sv_indices()
def train_list(self): print('Start to train.') paras = '-c 4 -t 0 -h 0 -m 1024' self.y, self.x = self.load_data_list(train=True) self.model = svmutil.svm_train(self.y, self.x, paras) svmutil.svm_save_model('./news/svmmodel', self.model) print('Train finished.')
def test_convert_svmc_linear_raw_multi(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[-5:] = 3 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.LINEAR param.eps = 1 param.probability = 0 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmNuSvmcMultiRaw", [('input', FloatTensorType(shape=['None', 2]))]) self.assertTrue(node is not None) X2 = numpy.vstack([X[:2], X[60:62], X[110:112], X[147:149]]) # 5x0, 5x1 dump_data_and_model( X2.astype(numpy.float32), SkAPICl(libsvm_model), node, basename="LibSvmSvmcRaw-Dec3", verbose=False, allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
def test_convert_nusvmc(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = NuSVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmNuSvmc", [('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(node is not None) dump_data_and_model( X[:5].astype(numpy.float32), SkAPIClProba2(libsvm_model), node, basename="LibSvmNuSvmc-Dec2", allow_failure= "StrictVersion(onnxruntime.__version__) <= StrictVersion('0.1.3')")
def kfold(data, labels, k): try: import svmutil except: return 0 prabs = [] for xxx in range(0, 10): picks = np.random.choice(len(data), len(data) / k, replace=False) testLabel = labels[picks] testPoint = data[picks] trainPoint = data[np.setdiff1d(range(0, len(data)), picks)] trainLabel = labels[np.setdiff1d(range(0, len(data)), picks)] trainLabel = trainLabel.tolist() trainPoint = trainPoint.tolist() prob = svmutil.svm_problem(trainLabel, trainPoint) param = svmutil.svm_parameter('-t 3 -c 4 -b 1 -q') testLabel = testLabel.tolist() testPoint = testPoint.tolist() m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1') prabs.append(p_acc[0]) print sum(prabs) / float(len(prabs)) print 'std' + str(np.std(prabs)) return sum(prabs) / float(len(prabs))
def svm_predizer(X, Y, X_teste, Y_teste, C, G, K): """ Retorna o melhor valor para os parâmetros custo e gamma do SVM radial. Parâmetros ---------- X : matriz com os dados de treinamento y : vetor com classes de cada dados de treinamento X_teste : amostra Y_teste : classe da amostra C : lista com valores para custo G : lista com valores para gamma K : inteiro indicando o kernel a ser usado Retorno ------- resultado : classificação do SVM. """ model = svm_train(Y, X, '-c %f -t %d -g %f -q' % (C, K, G)) resultado = svm_predict(Y_teste, X_teste, model) return resultado
def TrainSvmLinear2(Y, X, sweep_c=range(-2,18)): num_positives = float(Y.count(1)) num_negatives = float(Y.count(-1)) best_c = -1 best_acc = -1 for c_pow in sweep_c: current_c = np.power(2.0,c_pow) param = svm.svm_parameter('-t 0 -c %f -w-1 %f -w1 %f -q' % (current_c, 100/num_negatives, 100/num_positives)) current_pos_acc, current_neg_acc = CrossValidate(Y, X, param) current_acc = current_pos_acc print '%f, %f, %f' % (current_c, current_acc, current_neg_acc) if best_acc < current_acc: best_acc = current_acc best_c = current_c prob = svm.svm_problem(Y,X) param = svm.svm_parameter('-t 0 -c %f -w-1 %f -w1 %f -q' % (best_c, 100/num_negatives, 100/num_positives)) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y, X, svm_model, '-q') return svm_model
def test_default_names(self): df = pd.DataFrame({'input': self.x}) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, '-b 1 -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['classProbability'] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities='classProbability') self.assertLess(metrics['max_probability_error'], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u'target') (df['prediction'], _, _) = svm_predict(self.y, self.x, no_probability_model, ' -q') metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0)
def _evaluation_test_helper_with_probability(self, labels, allow_slow): import copy df = pd.DataFrame(self.x, columns=self.column_names) y = copy.copy(self.y) for i, val in enumerate(labels): y[i] = val probability_param = '-b 1' for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = ' '.join([self.base_param, param1, param2, probability_param]) # print("PARAMS: ", param_str) param = svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, probability_lists) = svm_predict(y, self.x, model, probability_param + ' -q') probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists] df['probabilities'] = probability_dicts spec = libsvm.convert(model, self.column_names, 'target', 'probabilities') if macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001) if not allow_slow: break if not allow_slow: break
def printSvmValidationAccuracy(input, output): prob = svmutil.svm_problem(output, input) param = getSvmParam(True) accuracy = svmutil.svm_train(prob, param) return accuracy
def train(self,x,y): """ training using y=list,x=dict parameter = string of parameters """ prob=su.svm_problem(y,x) para="" para+= "-s %d -t %d -d %d -g %f -r %f -c %f -n %f -p %f -e %f -b %d" %\ ( self.type, self.kernel, self.degree, self.gamma, self.coef0, self.c, self.nu, self.p, self.eps, self.prob ) if(self.v!=0): para+=" -v %d" % self.v if(self.q!=0): para+= " -q" print para para1=su.svm_parameter(para) self.model=su.svm_train(prob,para1) return True
def test_multi_class_without_probability(self): # Generate some random data. # This unit test should not rely on scikit learn for test data. x, y = [], [] for _ in range(50): x.append([ random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42) ]) y.append(random.choice([1, 2, 10, 12])) y[0], y[1], y[2], y[3] = 1, 2, 10, 12 column_names = ['x1', 'x2', 'x3'] prob = svmutil.svm_problem(y, x) df = pd.DataFrame(x, columns=column_names) for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = ' '.join([self.base_param, param1, param2]) param = svm_parameter(param_str) model = svm_train(prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, _) = svm_predict(y, x, model, ' -q') spec = libsvm.convert(model, column_names, 'target') metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0)
def train_test_model(train_datafile, test_datafile): from svmutil import svm_read_problem, svm_train, svm_predict y,x = svm_read_problem(train_datafile) m = svm_train(y,x,'-t 0 -e .01 -m 1000 -h 0') y_test,x_test = svm_read_problem(test_datafile) p_labs,p_acc,p_vals = svm_predict(y_test,x_test,m) return p_labs, p_acc, p_vals
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_SKLEARN: return if not HAS_LIBSVM: return scikit_data = load_boston() prob = svmutil.svm_problem( scikit_data['target'] > scikit_data['target'].mean(), scikit_data['data'].tolist()) param = svmutil.svm_parameter() param.svm_type = svmutil.C_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 libsvm_model = svmutil.svm_train(prob, param) libsvm_spec = libsvm_converter.convert(libsvm_model, scikit_data.feature_names, 'target').get_spec() # Save the data and the model self.scikit_data = scikit_data self.libsvm_spec = libsvm_spec
def iqr_model_train(matrix_kernel_train, labels_train, idx2clipid, svm_para = '-w1 50 -t 4 -b 1 -c 1'): """ Light-weighted SVM learning module for online IQR @param matrix_kernel_train: n-by-n square numpy array with kernel values between training data @param labels_train: row-wise labels of training data (1 or True indicates positive, 0 or False otherwise @param idx2clipid: idx2clipid(row_idx) returns the clipid for the 0-base row in matrix @param svm_para: (optional) SVM learning parameter @rtype: dictionary with 'clipids_SV': list of clipids for support vectors @return: output as a dictionary with 'clipids_SV' """ log = logging.getLogger('iqr_model_train') # set training inputs matrix_kernel_train = np.vstack((np.arange(1, len(matrix_kernel_train)+1), matrix_kernel_train)).T log.debug("Done matrix_kernel_train") problem = svm.svm_problem(labels_train.tolist(), matrix_kernel_train.tolist(), isKernel=True) log.debug("Done problem") svm_param = svm.svm_parameter(svm_para) log.debug("Done svm_param") # train model model = svmutil.svm_train(problem, svm_param) log.debug("Done train model") # release memory del problem del svm_param log.debug("Done release memory") # check learning failure if model.l == 0: raise Exception('svm model learning failure') log.debug("Done checking learning failure (no failure)") n_SVs = model.l clipids_SVs = [] idxs_train_SVs = svmtools.get_SV_idxs_nonlinear_svm(model) for i in range(n_SVs): _idx_1base = idxs_train_SVs[i] _idx_0base = _idx_1base - 1 clipids_SVs.append(idx2clipid[_idx_0base]) model.SV[i][0].value = i+1 # within SVM model, index needs to be 1-base log.debug("Done collecting support vector IDs") #svmutil.svm_save_model(filepath_model, model) output = dict() output['model'] = model output['clipids_SVs'] = clipids_SVs return output
def classify(train_inputs, train_outputs, test_inputs, test_outputs): kernel_train, kernel_test = \ compute_kernel_matrices(train_inputs, test_inputs) counter = 0 average_ap = 0 average_acc = 0 for label in range(train_outputs.shape[1]): n_tot = train_inputs.shape[0] n_pos = train_outputs[:, label].sum() n_neg = n_tot - n_pos w_pos = np.float32(n_tot)/(2*n_pos) w_neg = np.float32(n_tot)/(2*n_neg) option_string = '-t 4 -q -s 0 -b 1 -c %f -w1 %f -w0 %f' % (100, w_pos, w_neg) model = svmutil.svm_train( train_outputs[:, label].tolist(), kernel_train.tolist(), option_string) _, accuracy, prob_estimates = svmutil.svm_predict( test_outputs[:, label].tolist(), kernel_test.tolist(), model, '-b 1') ap = compute_aps(np.array(prob_estimates)[:,np.where(np.asarray(model.get_labels())==1)], test_outputs[:,label]) average_ap += ap average_acc += accuracy[0] counter += 1 print 'label = %d, ap = %f, w_neg = %f, w_pos = %f\n' % ( label, ap, w_neg, w_pos) mean_ap = np.float32(average_ap) / train_outputs.shape[1] mean_acc = np.float32(average_acc) / train_outputs.shape[1] print 'mean_ap = %f, mean_acc = %f\n' % (mean_ap, mean_acc)
def main(path, k): prabs = [] lns = [] for kk in range(0,k-1): testLabel = [] trainPoint = [] trainLabel = [] testPoint = [] wcCount = 0 for u in os.listdir(path): if u[-2:] == 'WC':r wcCount += 1 filePath = path+u WC = pickle.load(open(filePath, 'rb')) if wcCount % k == 0 + kk: testLabel.append(int(u[1])) testPoint.append(WC) else: trainLabel.append(int(u[1])) trainPoint.append(WC) lns.append(len(testLabel)) prob = svmutil.svm_problem(trainLabel, trainPoint) param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q') m = svmutil.svm_train(prob, param) svmutil.svm_save_model('n.model', m) p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1') prabs.append(p_acc[0])
def CrossValidate(Y, X, param, k_folds=5): rand_idx = range(len(Y)) random.shuffle(rand_idx) idx_groups = SplitIntoK(k_folds, rand_idx) pos_acc = 0 neg_acc = 0 for i in range(k_folds): test_idx = idx_groups[i] exclude_test = [idx_groups[j] for j in range(len(idx_groups)) if i != j] train_idx = list(chain(*exclude_test)) Y_test = [Y[test_i] for test_i in test_idx] X_test = [X[test_i] for test_i in test_idx] Y_train = [Y[train_i] for train_i in train_idx] X_train = [X[train_i] for train_i in train_idx] # recompute accuracy prob = svm.svm_problem(Y_train,X_train) svm_model = svm.svm_train(prob, param) p_labs, p_acc, p_vals = svm.svm_predict(Y_test, X_test, svm_model, '-q') tps = sum([1 for j in range(len(p_labs)) if (p_labs[j]==1 and Y_test[j]==1)]) fns = sum([1 for j in range(len(p_labs)) if (p_labs[j]==-1 and Y_test[j]==1)]) tns = sum([1 for j in range(len(p_labs)) if (p_labs[j]==-1 and Y_test[j]==-1)]) fps = sum([1 for j in range(len(p_labs)) if (p_labs[j]==1 and Y_test[j]==-1)]) pos_acc += tps / float(tps + fns) neg_acc += tns / float(tns + fps) pos_acc = pos_acc / k_folds neg_acc = neg_acc / k_folds return (pos_acc, neg_acc)
def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ if not HAS_LIBSVM: # setUpClass is still called even if class is skipped. return # Generate some random data. # This unit test should not rely on scikit learn for test data. self.x, self.y = [], [] random.seed(42) for _ in range(50): self.x.append([random.gauss(200, 30), random.gauss(-100, 22)]) self.y.append(random.choice([1, 2])) self.y[0] = 1 # Make sure 1 is always the first label it sees self.y[1] = 2 self.column_names = ['x1', 'x2'] self.prob = svmutil.svm_problem(self.y, self.x) param = svmutil.svm_parameter() param.svm_type = svmutil.C_SVC param.kernel_type = svmutil.LINEAR param.eps = 1 param.probability = 1 self.libsvm_model = svmutil.svm_train(self.prob, param)
def test_convert_svmc(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 1 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) node = convert(libsvm_model, "LibSvmSvmc", [('input', FloatTensorType())]) self.assertTrue(node is not None) dump_data_and_model(X[:5].astype(numpy.float32), SkAPIClProba2(libsvm_model), node, basename="LibSvmSvmc-Dec2")
def run_cross_validation(dTrain,dTest): 'Work with Polynomal kernel with cross validation' print '--run_cross_validation--' print '-- 1 versus 5 with Q = 2 and Cross Validation--' Cs = [0.0001,0.001,0.01,0.1,1] Ecvs = [[],[],[],[],[]] print '-- Train and Test --' dTrain_shuffle = dTrain # Try 100 runs with different partitions for j in range(100): # roll those dices shuffle(dTrain_shuffle) # Get data and formated vectors dTrain_1vs5 = getDataOneVsOne(dTrain_shuffle,1,5) X_train_1vs5,Y_train_1vs5 = get_svm_vector_format(dTrain_1vs5) # Try all Cs with cross validation for i in range(len(Cs)): # Type = Polynomial. Degree = 2. Gamma 1. # Coef = 1. C = Cs[i].Cross Validation at 10. be quiet options = '-t 1 -d 2 -g 1 -r 1 -c '+str(Cs[i])+ ' -v 10 -q' m = svm_train(Y_train_1vs5,X_train_1vs5,options) Ecvs[i].append(100 - m) # display print for i in range(len(Ecvs)): print 'Ecv = %s \tfor C = %s'%(sum(Ecvs[i])/100.,Cs[i]) print
def train_test(): train_subdir = "data/train/" test_subdir = "data/test/" img_kinds = ["happy", "anger", "neutral", "surprise"] models = {} params = "-t 0 -c 3" svm_params = { "happy": params, "anger": params, "neutral": params, "surprise": params} #train the models print 'BUILDING TRAIN MODELS' for img_kind in img_kinds: print "\t" + img_kind problem = build_problem(img_kind, train_subdir) param = svm.svm_parameter(svm_params[img_kind]) models[img_kind] = svmutil.svm_train(problem, param) print '================================' #for each image in test set let's see what is the answe total_count = 0 correct_count = 0 wrong_count = 0 print 'TESTING MODELS' for img_kind in img_kinds: images = glob.glob(test_subdir + "f_" + img_kind + "*.jpg") for image in images: print "\t" + image image_data = cv.LoadImage(image) # Let's see what are the results from the models results = {} for kind in img_kinds: test_data = get_image_features(image_data, True, kind) predict_input_data = [] predict_input_data.append(test_data) # do svm query (val, val_2, label) = svmutil.svm_predict([1] ,predict_input_data, models[kind]) results[kind] = label[0][0] sorted_results = sorted(results.iteritems(), key=operator.itemgetter(1)) result = sorted_results[len(sorted_results)-1][0] total_count += 1 if result == img_kind: print 'YES :' + result correct_count += 1 else: print 'NO :' + result print sorted_results wrong_count += 1 print '-----------------------' print '================================' print "Total Pictures: " + str(total_count) print "Correct: " + str(correct_count) print "Wrong: " + str(wrong_count) print "Accuracy: " + str(correct_count/float(total_count) * 100)
def train(cls, featuresets, params="-t 0 -q"): """Train a classifier using the given featuresets. Args: featuresets: List of featuresets. params: Parameter string to pass to svmutil.svm_parameter. Returns: SvmClassifier object. """ all_features = set() all_labels = set() for featuredict, label in featuresets: all_features.update(set(featuredict.keys())) all_labels.add(label) all_labels = sorted(all_labels) all_features = sorted(all_features) featureindex = dict(zip(all_features, range(1, len(all_features) + 1))) labelindex = dict(zip(all_labels, range(1, len(all_labels) + 1))) vectors, labels = cls.featuresets_to_svm(featureindex, labelindex, featuresets) prob = svmutil.svm_problem(labels, vectors) param = svmutil.svm_parameter(params) model = svmutil.svm_train(prob, param) return cls(featureindex, labelindex, model)
def __init__(self,train_feature_file = TRAIN_FEATURE_FILE): if os.path.exists(SAVED_MODEL): self.model = svmutil.svm_load_model(SAVED_MODEL) else: y, x = svmutil.svm_read_problem(train_feature_file) self.model = svmutil.svm_train(y, x, '-c 4') svmutil.svm_save_model(SAVED_MODEL,self.model)
def run(history, tweek, pref, selections): logstr = '' # w, x, y = history.grab(nsample, ntracks, tweek + 1, pref, 180, 90, 80000) x = xfilter(x) timer = timer_c() lsvm = svmutil.svm_train(y, x, options) #svmutil.svm_save_model('benchmark.svm', lsvm) #lsvm = svmutil.svm_load_model('benchmark.svm') print 'svm trained successfully in %s sec with %d samples.' % (str( float('{0:.3f}'.format(timer.lag()))), len(w)) # timer = timer_c() w, x, y = history.grab(1, ntracks, tweek, pref, 180, 90, 80000) x = xfilter(x) p_labels, p_acc, p_vals = svmutil.svm_predict(y, x, lsvm, '') print 'svm predicted successfully in %s sec with %d samples.' % (str( float('{0:.3f}'.format(timer.lag()))), len(w)) foo = [] for i in xrange(len(y)): foo.append((p_labels[i], y[i], w[i])) foo.sort(key=lambda tup: (-tup[0])) selections['code'].append(foo[0][2]) selections['tweek'].append(tweek) return [row[1] for row in foo]
def prob20(): import random gamma = [1, 10, 100, 1000, 10000] chosen = {1:0, 10:0, 100:0, 1000:0, 10000:0} for _ in range(100): Eout = [] for g in gamma: trainX, testX, trainy, testy = readdat() mul_label_2_bin(trainy, testy, 0) trainX = zip(trainX, trainy) random.shuffle(trainX) trainX, trainy = zip(*trainX) valX = trainX[:1000] valy = trainy[:1000] trainX = trainX[1000:] trainy = trainy[1000:] m = svmutil.svm_train(trainy, trainX, '-s 0 -t 2 -c 0.1 -g %f -h 0'%(g)) p_label, p_acc, p_val = svmutil.svm_predict(valy, valX, m) Eout.append(100.0 - p_acc[0]) chosen[gamma[Eout.index(min(Eout))]] += 1 print "prob20: ", for k in chosen.keys(): print "gamma=%d:%d, " % (k, chosen[k]), print ""
def prob2_to_4(): x = np.array([[1, 0], [0, 1], [0, -1], [-1, 0], [0, 2], [0, -2], [-2, 0]]) y = np.array([-1, -1, -1, 1, 1, 1, 1]) print "===prob 2===" xf = np.fliplr(x.copy()) print xf * xf - 2 * x + np.array([[3, -3]]) print "===prob 3===" prob = SVM.svm_problem(y.tolist(), x.tolist()) param = SVM.svm_parameter( '-t 1 -c 100 -d 2 -r 1 -g 1') # very large C for hard margin m = SVM.svm_train(prob, param) sumA = 0 poly = [0] * 6 # xx, xy, yy, x, y, 1 for i in xrange(m.l): idx = m.sv_indices[i] alphay = m.sv_coef[0][i] alpha = abs(m.sv_coef[0][i]) print "{:d} {:+1.2f}".format(idx, alpha) sumA += alpha v = x[idx - 1] poly[0] += alphay * v[0] * v[0] poly[1] += alphay * v[1] * v[0] * 2 poly[2] += alphay * v[1] * v[1] poly[3] += alphay * v[0] * 2 poly[4] += alphay * v[1] * 2 # poly[5] += alphay*1 # no need because Sum(alphay) = 0 poly[5] -= m.rho[0] print "Sum of alpha is {:1.3f}\nb = {}".format(sumA, m.rho[0]) print "{:+2.2f}xx {:+2.2f}xy {:+2.2f}yy {:+2.2f}x {:+2.2f}y {:+2.2f}".format( *poly)
def test_convert_svmc_raw(self): iris = load_iris() X = iris.data[:, :2] y = iris.target y[y == 2] = 1 prob = svmutil.svm_problem(y, X.tolist()) param = svmutil.svm_parameter() param.svm_type = SVC param.kernel_type = svmutil.RBF param.eps = 1 param.probability = 0 if noprint: param.print_func = noprint libsvm_model = svmutil.svm_train(prob, param) # known svm runtime dimension error in ONNX Runtime node = convert(libsvm_model, "LibSvmSvmcRaw", [('input', FloatTensorType(shape=['None', 'None']))]) self.assertTrue(node is not None) dump_data_and_model( X[:5].astype(numpy.float32), SkAPICl(libsvm_model), node, basename="LibSvmSvmcRaw", allow_failure= "StrictVersion(onnxruntime.__version__) < StrictVersion('0.5.0')")
def n_gram_svm(class_size, take_size): cost = np.array([2.0, 2.0, 2.0, 2.0, 2.0]) gamma = np.array([0.0078125, 0.0078125, 0.0078125, 0.0078125, 0.0078125]) tst = time() preset = np.load(get_feature_file('small')) vocab_size = int(np.max(preset[:, :-class_size])) + 1 np.random.shuffle(preset) train_set = preset[take_size:] train_txt = train_set[:, :-class_size].astype(np.int64) train_cls = train_set[:, -class_size:].astype(np.float32) train_dict = [{ gram: 1 if gram in txt else 0 for gram in np.arange(1, vocab_size) } for txt in train_txt] train_major: List[Set[int]] = [set() for _ in np.arange(class_size)] for i, cls in enumerate(train_cls): for k in np.nonzero(np.abs(cls - np.max(cls)) < 1e-4)[0]: train_major[k].add(i) models = [] for k in np.arange(class_size): problem = svm_problem([ 1 if i in train_major[k] else -1 for i in np.arange(len(train_cls)) ], train_dict) param = svm_parameter('-t 0 -c %f -g %f -b 1 -q' % (cost[k], gamma[k])) models.append(svm_train(problem, param)) train_time = time() - tst tst = time() test_set = preset[:take_size] test_txt = test_set[:, :-5].astype(np.int64) test_cls = test_set[:, -5:].astype(np.float32) res = np.array([]) test_dict = [{ gram: 1 if gram in txt else 0 for gram in np.arange(1, vocab_size) } for txt in test_txt] for dic, cls in zip(test_dict, test_cls): prob = np.zeros(class_size) for k in np.arange(class_size): _, _, p = svm_predict([], [dic], models[k], '-b 1 -q') prob[k] = p[0][0] prob /= np.sum(prob) res = np.append( res, cls @ prob / (np.linalg.norm(cls) * np.linalg.norm(prob))) test_time = time() - tst test_acc = np.mean(res) return train_time, test_time, test_acc
def test_default_names(self): df = pd.DataFrame({"input": self.x}) df["input"] = df["input"].apply(np.array) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() if _is_macos() and _macos_version() >= (10, 13): (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, "-b 1 -q") probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df["classProbability"] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities="classProbability") self.assertLess(metrics["max_probability_error"], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u"target") if _is_macos() and _macos_version() >= (10, 13): (df["prediction"], _, _) = svm_predict(self.y, self.x, no_probability_model, " -q") metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics["num_errors"], 0)
def _test_prob_model(self, param1, param2): probability_param = '-b 1' df = self.df param_str = ' '.join( [self.base_param, param1, param2, probability_param]) param = svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, probability_lists) = svm_predict(self.y, self.x, model, probability_param + ' -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['probabilities'] = probability_dicts spec = libsvm.convert(model, self.column_names, 'target', 'probabilities') if macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001)
def Train_SVM_model(PathToFeatureFile):#生成训练模型文件,model.txt #print(PathToFeatureFile) y,x=svmutil.svm_read_problem(PathToFeatureFile) model=svmutil.svm_train(y,x) modelFilePath=os.path.join(os.path.split(PathToFeatureFile)[0],f"model_{PathToFeatureFile.split('_',1)[1]}") svmutil.svm_save_model(modelFilePath,model) print(modelFilePath)
def svm_training_function(vltr, vftr, parameters): assert isinstance(vltr, list) assert isinstance(vftr, list) assert isinstance(parameters, str) all_parameters = ' -q -s 0 -t 2 ' + parameters yellow_err('svm_training_function(): parameters = {0}'.format(all_parameters)) return svmutil.svm_train(vltr, vftr, all_parameters)
def _lib_train_libsvm(user_tfidf, num_pos, num_neg, ignore): sparse_user_tfidf, num_pos, num_neg = _convert_to_sparse_matrix(user_tfidf, num_pos, num_neg, ignore) labels = ([1] * num_pos) + ([-1] * num_neg) param = svm_parameter("-t %d" % KERNEL_NUMBER) prob = svm_problem(labels, sparse_user_tfidf) modellog = svm_train(prob, param) return modellog
def trainSVM(kernel, labels): #need to add an id number as the first column of the list svmKernel = column_stack((arange(1, len(kernel.tolist()) + 1), kernel)) prob = svm_problem(labels.tolist(), svmKernel.tolist(), isKernel=True) param = svm_parameter('-t 4') model = svm_train(prob, param) return model
def simulate_skin_with_svm(data_size=None, train_params='-s 0 -t 0'): """Simulate learning skin data set with libsvm.""" convert_skin_to_svm(data_size) train_y, train_x = svm.svm_read_problem('skin_train.svm') model = svm.svm_train(train_y, train_x, train_params) test_y, test_x = svm.svm_read_problem('skin_test.svm') p_label, p_acc, p_val = svm.svm_predict(test_y, test_x, model)
def get_cross_val(x, y, x_val, y_val, gamma_c): prob = svmutil.svm_problem(y, x) param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma)) m = svmutil.svm_train(prob, param) svmutil.svm_save_model("model", m) p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m) return p_acc_validation[0]
def trainSVM(trainMatrix, trainCategory): svm.svm_model.predict = lambda self, x: svm.svm_predict([0], [x], self)[0][0] prob = svm.svm_problem(trainCategory, trainMatrix) param = svm.svm_parameter() param.kernel_type = svm.LINEAR param.C = 10 model = svm.svm_train(prob, param) return model
def leave_one_out(y, x, param, n='DUMMY'): results = [] for i, test in enumerate(zip(y, x)): training_y = y[:i] + y[i+1:] training_x = x[:i] + x[i+1:] problem = svm.svm_problem(training_y, training_x) model = svmutil.svm_train(problem, param, '-q') result = svmutil.svm_predict(y[i:i+1], x[i:i+1], model, '-b 1') results.append(result + (test[0], make_d.decode(x[i], make_d.decode_dic))) return results
def train_and_get(c, lr): c.reset_weight() c.compile_lr(lr) print '..building the cnn model: %r with lr: %r' % (c.nkerns, lr) c.fit_lr(n_epochs = 200, slient = True) features = test_features.tran2libsvm(c.get_feature(x)) m = svm_train(y[:l], features[:l], '-q') p_label, p_acc, p_val = svm_predict(y[l:], features[l:], m) ans = p_acc[0] return ans
def getSVMAccuracy(trainingData): numOutputs = len(trainingData[0][1]) for outputIndex in range(numOutputs): inputs = [input for (input, output) in trainingData] outputs = [output[outputIndex] for (input, output) in trainingData] prob = svmutil.svm_problem(outputs, inputs) param = svmAccuracy.getSvmParam(cross_validation_only = True) model = svmutil.svm_train(prob, param) print 'output index: %d - %s\n' % (outputIndex, {0 : "Index", 1: "Middle"}[outputIndex])
def _stop_training(self): super(LibSVMClassifier, self)._stop_training() self.normalizer = _LabelNormalizer(self.labels) labels = self.normalizer.normalize(self.labels.tolist()) features = self.data # Call svm training method. prob = libsvmutil.svm_problem(labels, features.tolist()) # Train self.model = libsvmutil.svm_train(prob, self.parameter)
def hiksvm_train(labels, features, beta): # calculate class prior np = len([1 for lab in labels if 1 == lab]) nn = len([1 for lab in labels if -1 == lab]) wp = float(beta)/np wn = (1.0-beta)/nn wp *= (np+nn) wn *= (np+nn) parameters = "-s 0 -c 1 -t %d -w-1 %g -w1 %g" % (KERNEL_TYPE.index("HI"), wn, wp) model = svm_train(labels, features, parameters) return model
def train_test_model(train_datafile, test_datafile): """ :param train_datafile: relative path :param test_datafile: relative path :return: trains a libsvm model using the training data in train_datafile and test in on the data in test_datafile """ from svmutil import svm_read_problem, svm_predict, svm_train y_test, x_test = svm_read_problem(test_datafile) y_train, x_train = svm_read_problem(train_datafile) model = svm_train(y_train, x_train, '-t 0 -e .01 -m 1000 -h 0') p_labs, p_acc, p_vals = svm_predict(y_test, x_test, model) return p_labs, p_acc, p_vals
def multiclass_train(valid_labels, labels, data, svm_parameters=None): if svm_parameters == None: # make default empty parameters svm_parameters = [] for i in valid_labels: svm_parameters.append(svmutil.svm_parameter()) models = [] for i in valid_labels: oaa_labels = relabel_one_against_all(labels, i) prob = svmutil.svm_problem(oaa_labels, data) model = svmutil.svm_train(prob, svm_parameters[i]) models.append(model) return models