def train_search_user_defined_kernel(): """ Funciton for training and applying grid search for linear+RBF kernel """ print('Start searching user-defined linear+RBF kernel...') with open('%s_search1.txt' % 'udefined', 'w') as f: cost_list = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2] gamma_list = [1e-3, 1e-6, 1e-9] print('Cost: %s\ngamma: %s\n' % (cost_list, gamma_list), file=f) for cost in cost_list: for gamma in gamma_list: try: start = time.time() prob = svm_problem(Y_train, linear_RBF_kernel_train(gamma), isKernel=True) param = svm_parameter('-q -s 0 -t 4 -c %.14f' % cost) model = svm_train(prob, param) pred_label, pred_acc, pred_val = svm_predict( Y_test, linear_RBF_kernel_test(gamma), model) elapsed_time = time.time() - start print('%f,%f,%f,%f' % (cost, gamma, pred_acc[0], elapsed_time), file=f) print([cost, gamma, pred_acc[0], elapsed_time]) except: print('%f,%f(FAIL)' % (cost, gamma), file=f) f.flush()
def train_predict(y_train: np.array, x_train: np.array, y_test: np.array, x_test: np.array, degree: int = 3, c: int = 2): model = svmutil.svm_train(y_train, x_train, f'-r 2 -t 1 -d {degree} -c {c} -q') p_label, p_acc, p_val = svmutil.svm_predict(y_test, x_test, model, '-q') acc, mse, _ = p_acc sv_num = model.l return acc, mse, sv_num
def train_svm(kernel, parameter_for_training=''): """ Train SVM with specific kernel (linear, polynomial, or RBF) """ kernel2int = {'linear': 0, 'polynomial': 1, 'RBF': 2} start = time.time() # set training arguments and training model = svm_train( Y_train, X_train, parameter_for_training + ' -q -s 0 -t %d' % (kernel2int[kernel])) # prediction pred_label, pred_acc, pred_val = svm_predict(Y_test, X_test, model) return pred_acc[0], time.time() - start
def fit_validate(self, X_tr, y_tr, X_va, y_va, kernel_type, c, extra_options=None): options = '-q -t %d -c %f ' % (kernel_type, c) options += extra_options if extra_options is not None else "" self.options = options model = svmutil.svm_train(y_tr.tolist(), X_tr.tolist(), options.rstrip()) self.model = model return self.predict(X_va, y_va)
def libsvm(lb_tr, ins_tr, lb_te, ins_te): """ libsvm classifier, using libsvm ------------------------------- read normalized libsvm format files from ./dataset output: accu --classification accuracy co --confusion matrix """ rate, param = grid.find_parameters('dataset/YaleB.scale.tr', '-log2c -1,1,1 -log2g -1,1,1') prob = svmutil.svm_problem(lb_tr, ins_tr) param = svmutil.svm_parameter('-c %f -g %f' % (param['c'], param['g'])) m = svmutil.svm_train(prob, param) p_label, p_acc, p_val = svmutil.svm_predict(lb_te, ins_te, m) co = met.confusion_matrix(lb_te, p_label) accu = p_acc[0] return accu, co
def cross_validation(self, X, y, kernel_type, c, extra_options=None): if kernel_type != PRECOMPUTED: self.features = X.shape[1] X_list = X.tolist() else: X_list = X options = '-v 5 -q -t %d -c %f ' % (kernel_type, c) options += extra_options if extra_options is not None else "" self.options = options acc = svmutil.svm_train(y.tolist(), X_list, options.rstrip()) # Line Parameters # W = np.matmul(X[np.array(model.get_sv_indices()) - 1].T, model.get_sv_coef()) # b = -model.rho.contents.value # if model.get_labels()[1] == -1: # No idea here but it should be done :| # W = -W # b = -b self.X_train = X self.y_train = y # self.W = W # self.b = b # self.model = model # return return acc
def train(y, X): model = svmutil.svm_train(y, X, option) return model
# -*- coding: utf-8 -*- """Excercise 6.2""" import numpy as np from libsvm.python import svm, svmutil import os dir = os.path.dirname(__file__) abspath = os.path.join(dir, 'data.txt') prob = svmutil.svm_read_problem(abspath) print('Linear\n') svmutil.svm_train(prob[0], prob[1], '-t 0') print('RBF\n') svmutil.svm_train(prob[0], prob[1], '-t 2')
files = [el for el in files if '.npy' in el] files = [el for el in files if 'Val' not in el] dat_list = [np.load(base_dir + el, allow_pickle=True) for el in files] k = 20 holdout = 0 y, x, y_test, x_test = form_svm_input(dat_list, k, holdout) # train SVM #y, x = scipy.asarray(Y), scipy.asarray(X) #y_test, x_test = scipy.asarray(Y_test), scipy.asarray(X_test) x = scipy.transpose(x) #x_test = scipy.transpose(x_test) prob = svmutil.svm_problem(y, x) #param = svmutil.svm_parameter('-s 1 -t 1 -c 1 -n .05 -b 0 -m 10240') #1 works best, cost of 100 seems good (.000001 multiclass) param = svmutil.svm_parameter('-t 1 -c .000001 -b 0') m = svmutil.svm_train(prob, param) #label = svmutil.svm_predict(y_test, x_test, m) files = os.listdir(base_dir) files = [el for el in files if '.npy' in el] files = [el for el in files if 'Val' in el] dat_list = [np.load(base_dir + el, allow_pickle=True) for el in files] y_test, x_test, dc1, dc2 = form_svm_input(dat_list, k, 0) x_test = scipy.transpose(x_test) label = svmutil.svm_predict(y_test, x_test, m) print(confusion_matrix(label[0], y_test))
print >>fw, features rate, param = find_parameters(svm_data_file, '-s 0 -t 2 -log2c -1,10,1 -log2g 3,-15,-2 -v 5 -gnuplot null') os.remove(svm_data_file) C_g_dict = {'C':param['c'], 'gamma':param['g'], 'Rate': rate} except Exception, e: print e # use the default one C_g_dict = {'C':DEFAULT_SVM_C, 'gamma':DEFAULT_SVM_g} else: C_g_dict = {'C':SVM_C, 'gamma':SVM_g} # -------------------------------------------------------------------------- # SVM train (using LIBSVM) train_param = svm_parameter('-t 2 -s 0 -b 1 -c %f -g %f -q' % (C_g_dict['C'], C_g_dict['gamma'])) train_prob = svm_problem(seed_docs_cls, seed_docs_theta) seed_docs_svm_mdl = svm_train(train_prob, train_param) # # SVM prediction for all the documents in the corpus # num_corpus_docs = len(features) # svm_labels, _, svm_decision_values = svm_predict([0]*num_corpus_docs, # features, seed_docs_svm_mdl, # '-b 0 -q') # svm_prediction = [[doc_id, svm_label, svm_decision_values[doc_id][0]] # for doc_id, svm_label in enumerate(svm_labels)] # SVM prediction only for the test documents num_test_docs = len(test_doc_ids) svm_labels, _, svm_decision_values = svm_predict([0]*num_test_docs, test_docs_features, seed_docs_svm_mdl,
def find_parameters(problem, options): if type(options) == str: options = options.split() i = 0 fold = 5 kernel_type = 2 c_begin, c_end, c_step = -5, 15, 2 g_begin, g_end, g_step = 3, -15, -2 r_begin, r_end, r_step = -2, 15, 2 d_end = 3 grid_with_c, grid_with_g, grid_with_r, grid_with_d = True, False, False, False while i < len(options): if options[i] == '-t': i = i + 1 kernel_type = options[i] elif options[i] == '-log2c': i = i + 1 if options[i] == 'null': grid_with_c = False else: c_begin, c_end, c_step = map(float, options[i].split(',')) elif options[i] == '-log2g': grid_with_g = True i = i + 1 if options[i] == 'null': grid_with_g = False else: g_begin, g_end, g_step = map(float, options[i].split(',')) elif options[i] == '-d': grid_with_d = True i = i + 1 if options[i] == 'null': grid_with_g = False else: d_end = int(options[i]) elif options[i] == '-log2r': grid_with_r = True i = i + 1 if options[i] == 'null': grid_with_r = False else: r_begin, r_end, r_step = map(float, options[i].split(',')) i = i + 1 c_seq = range_f(c_begin, c_end, c_step) g_seq = range_f(g_begin, g_end, g_step) r_seq = range_f(r_begin, r_end, r_step) d_seq = range_f(1, d_end, 1) tables = [] col = [] if not grid_with_c: c_seq = [None] if not grid_with_g: g_seq = [None] if not grid_with_r: r_seq = [None] if not grid_with_d: d_seq = [None] job = [] for i in range(len(c_seq)): for j in range(len(g_seq)): for k in range(len(r_seq)): for l in range(len(d_seq)): job.append((c_seq[i], g_seq[j], r_seq[k], d_seq[l])) best_rate = 0 best_c = 0 bext_g = 0 best_r = 0 best_d = 0 for i in range(len(job)): table = [] cmd_line = '-q -t {0} -v {1}'.format(kernel_type, fold) if grid_with_c: table.append(2**job[i][0]) cmd_line += ' -c {0}'.format(2**job[i][0]) if grid_with_g: table.append(2**job[i][1]) cmd_line += ' -g {0}'.format(2**job[i][1]) if grid_with_r: table.append(2**job[i][2]) cmd_line += ' -r {0}'.format(2**job[i][2]) if grid_with_d: table.append(job[i][3]) cmd_line += ' -d {0}'.format(job[i][3]) table.append(cmd_line) print('[{:5d}/{:5d}]'.format(i + 1, len(job)), end=' ') rate = svmutil.svm_train(problem, cmd_line) table.append(rate) tables.append(table) if best_rate < rate: best_rate = rate best_c = 2**int(job[i][0]) if grid_with_g: best_g = 2**int(job[i][1]) if grid_with_r: best_r = 2**int(job[i][2]) if grid_with_d: best_d = job[i][3] ret = [] ret.append(best_c) col.append('c') if grid_with_g: col.append('g') ret.append(best_g) if grid_with_r: col.append('r') ret.append(best_r) if grid_with_d: col.append('d') ret.append(best_d) ret.append(best_rate) col.append('options') col.append('rate') return ret, tables, col
m = {} p_label = {} p_acc = {} p_val = {} train_time = {} test_time = {} train_flag = True y_test, x_test = svmutil.svm_read_problem('test.csv') y1, x_test_precomputed = svmutil.svm_read_problem('test_precomputed.csv') for kernel_type, opts in options.items(): print('\tkernel type: {0}\n\t'.format(kernel_type), end='') tic() if (train_flag): m[kernel_type] = svmutil.svm_train(prob, opts) svmutil.svm_save_model('model/' + kernel_type + '.model', m[kernel_type]) else: m[kernel_type] = svmutil.svm_load_model('model/' + kernel_type + '.model') train_time[kernel_type] = toc() tic() p_label[kernel_type], p_acc[kernel_type], p_val[kernel_type] = \ svmutil.svm_predict(y_test, x_test, m[kernel_type]) test_time[kernel_type] = toc() print('\tresult(acc, mse, scc): {0}\n'.format(p_acc[kernel_type])) for kernel_type, opts in options_precomputed.items(): print('\tkernel type: {0}\n\t'.format(kernel_type), end='') tic()
categories.append(actual.rstrip()) data = myread(articles, './feeds') for cat in categories: vectors = [] answers = [] for title, answer in articles.items(): if answer == cat: answers.append(1) else: answers.append(-1) vectors.append(data[title]) prob = svm.svm_problem(answers, vectors) param = svm.svm_parameter('-t 2 -v 10') correct = svmu.svm_train(prob, param) print('Category: %s \n\t Percent Correct: %d' % (cat, correct)) #|||||||||||||||||||||||||||||||||||||||||||||| #Question 3 old_data = [] with open("old_data_set.txt", 'r') as olddata: for line in olddata: old_data.append(line.split('\t')[0].rstrip()) links = [] with open('twitter_links.txt', 'r') as linkfile: for line in linkfile: links.append(line.rstrip()) data = dict(zip(links, old_data))
def train( y, X ): model = svmutil.svm_train(y, X, option) return model