Example #1
0
def train_search_user_defined_kernel():
    """
    Funciton for training and applying grid search for linear+RBF kernel
    """
    print('Start searching user-defined linear+RBF kernel...')
    with open('%s_search1.txt' % 'udefined', 'w') as f:
        cost_list = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2]
        gamma_list = [1e-3, 1e-6, 1e-9]
        print('Cost: %s\ngamma: %s\n' % (cost_list, gamma_list), file=f)
        for cost in cost_list:
            for gamma in gamma_list:
                try:
                    start = time.time()

                    prob = svm_problem(Y_train,
                                       linear_RBF_kernel_train(gamma),
                                       isKernel=True)
                    param = svm_parameter('-q -s 0 -t 4 -c %.14f' % cost)
                    model = svm_train(prob, param)
                    pred_label, pred_acc, pred_val = svm_predict(
                        Y_test, linear_RBF_kernel_test(gamma), model)

                    elapsed_time = time.time() - start
                    print('%f,%f,%f,%f' %
                          (cost, gamma, pred_acc[0], elapsed_time),
                          file=f)
                    print([cost, gamma, pred_acc[0], elapsed_time])
                except:
                    print('%f,%f(FAIL)' % (cost, gamma), file=f)
                f.flush()
Example #2
0
def train_predict(y_train: np.array,
                  x_train: np.array,
                  y_test: np.array,
                  x_test: np.array,
                  degree: int = 3,
                  c: int = 2):
    model = svmutil.svm_train(y_train, x_train,
                              f'-r 2 -t 1 -d {degree} -c {c} -q')
    p_label, p_acc, p_val = svmutil.svm_predict(y_test, x_test, model, '-q')
    acc, mse, _ = p_acc
    sv_num = model.l
    return acc, mse, sv_num
Example #3
0
def train_svm(kernel, parameter_for_training=''):
    """
    Train SVM with specific kernel (linear, polynomial, or RBF)
    """
    kernel2int = {'linear': 0, 'polynomial': 1, 'RBF': 2}

    start = time.time()
    # set training arguments and training
    model = svm_train(
        Y_train, X_train,
        parameter_for_training + ' -q -s 0 -t %d' % (kernel2int[kernel]))
    # prediction
    pred_label, pred_acc, pred_val = svm_predict(Y_test, X_test, model)
    return pred_acc[0], time.time() - start
Example #4
0
    def fit_validate(self,
                     X_tr,
                     y_tr,
                     X_va,
                     y_va,
                     kernel_type,
                     c,
                     extra_options=None):

        options = '-q -t %d -c %f ' % (kernel_type, c)
        options += extra_options if extra_options is not None else ""
        self.options = options
        model = svmutil.svm_train(y_tr.tolist(), X_tr.tolist(),
                                  options.rstrip())
        self.model = model
        return self.predict(X_va, y_va)
Example #5
0
def libsvm(lb_tr, ins_tr, lb_te, ins_te):
    """
    libsvm classifier, using libsvm
    -------------------------------
    read normalized libsvm format files from ./dataset

    output:
    accu --classification accuracy
    co --confusion matrix
    """
    rate, param = grid.find_parameters('dataset/YaleB.scale.tr', '-log2c -1,1,1 -log2g -1,1,1')
    prob  = svmutil.svm_problem(lb_tr, ins_tr)
    param = svmutil.svm_parameter('-c %f -g %f' % (param['c'], param['g']))
    m = svmutil.svm_train(prob, param)
    p_label, p_acc, p_val = svmutil.svm_predict(lb_te, ins_te, m)
    co = met.confusion_matrix(lb_te, p_label)
    accu = p_acc[0]
    return accu, co
Example #6
0
 def cross_validation(self, X, y, kernel_type, c, extra_options=None):
     if kernel_type != PRECOMPUTED:
         self.features = X.shape[1]
         X_list = X.tolist()
     else:
         X_list = X
     options = '-v 5 -q -t %d -c %f ' % (kernel_type, c)
     options += extra_options if extra_options is not None else ""
     self.options = options
     acc = svmutil.svm_train(y.tolist(), X_list, options.rstrip())
     # Line Parameters
     # W = np.matmul(X[np.array(model.get_sv_indices()) - 1].T, model.get_sv_coef())
     # b = -model.rho.contents.value
     # if model.get_labels()[1] == -1:  # No idea here but it should be done :|
     #     W = -W
     # b = -b
     self.X_train = X
     self.y_train = y
     # self.W = W
     # self.b = b
     # self.model = model
     # return
     return acc
Example #7
0
def train(y, X):
    model = svmutil.svm_train(y, X, option)
    return model
Example #8
0
# -*- coding: utf-8 -*-
"""Excercise 6.2"""
import numpy as np
from libsvm.python import svm, svmutil

import os

dir = os.path.dirname(__file__)
abspath = os.path.join(dir, 'data.txt')
prob = svmutil.svm_read_problem(abspath)
print('Linear\n')
svmutil.svm_train(prob[0], prob[1], '-t 0')
print('RBF\n')
svmutil.svm_train(prob[0], prob[1], '-t 2')
files = [el for el in files if '.npy' in el]
files = [el for el in files if 'Val' not in el]
dat_list = [np.load(base_dir + el, allow_pickle=True) for el in files]

k = 20
holdout = 0
y, x, y_test, x_test = form_svm_input(dat_list, k, holdout)

# train SVM
#y, x = scipy.asarray(Y), scipy.asarray(X)
#y_test, x_test = scipy.asarray(Y_test), scipy.asarray(X_test)
x = scipy.transpose(x)
#x_test = scipy.transpose(x_test)
prob = svmutil.svm_problem(y, x)
#param = svmutil.svm_parameter('-s 1 -t 1 -c 1 -n .05 -b 0 -m 10240') #1 works best, cost of 100 seems good (.000001 multiclass)
param = svmutil.svm_parameter('-t 1 -c .000001 -b 0')
m = svmutil.svm_train(prob, param)

#label = svmutil.svm_predict(y_test, x_test, m)

files = os.listdir(base_dir)
files = [el for el in files if '.npy' in el]
files = [el for el in files if 'Val' in el]
dat_list = [np.load(base_dir + el, allow_pickle=True) for el in files]

y_test, x_test, dc1, dc2 = form_svm_input(dat_list, k, 0)
x_test = scipy.transpose(x_test)
label = svmutil.svm_predict(y_test, x_test, m)

print(confusion_matrix(label[0], y_test))
                    print >>fw, features
            rate, param = find_parameters(svm_data_file, '-s 0 -t 2 -log2c -1,10,1 -log2g 3,-15,-2 -v 5 -gnuplot null')
            os.remove(svm_data_file)
            C_g_dict = {'C':param['c'], 'gamma':param['g'], 'Rate': rate}
        except Exception, e:
            print e
            # use the default one 
            C_g_dict = {'C':DEFAULT_SVM_C, 'gamma':DEFAULT_SVM_g}
    else:
        C_g_dict = {'C':SVM_C, 'gamma':SVM_g}
    
    # --------------------------------------------------------------------------
    # SVM train (using LIBSVM) 
    train_param = svm_parameter('-t 2 -s 0 -b 1 -c %f -g %f -q' % (C_g_dict['C'], C_g_dict['gamma']))
    train_prob  = svm_problem(seed_docs_cls, seed_docs_theta)
    seed_docs_svm_mdl = svm_train(train_prob, train_param)
     
#     # SVM prediction for all the documents in the corpus 
#     num_corpus_docs = len(features) 
#     svm_labels, _, svm_decision_values = svm_predict([0]*num_corpus_docs, 
#                                                      features, seed_docs_svm_mdl, 
#                                                      '-b 0 -q')
#     svm_prediction = [[doc_id, svm_label, svm_decision_values[doc_id][0]] 
#                       for doc_id, svm_label in enumerate(svm_labels)] 
    

    # SVM prediction only for the test documents
    num_test_docs = len(test_doc_ids)
    svm_labels, _, svm_decision_values = svm_predict([0]*num_test_docs, 
                                                     test_docs_features, 
                                                     seed_docs_svm_mdl, 
Example #11
0
def find_parameters(problem, options):
    if type(options) == str:
        options = options.split()
    i = 0

    fold = 5
    kernel_type = 2
    c_begin, c_end, c_step = -5, 15, 2
    g_begin, g_end, g_step = 3, -15, -2
    r_begin, r_end, r_step = -2, 15, 2
    d_end = 3
    grid_with_c, grid_with_g, grid_with_r, grid_with_d = True, False, False, False

    while i < len(options):
        if options[i] == '-t':
            i = i + 1
            kernel_type = options[i]
        elif options[i] == '-log2c':
            i = i + 1
            if options[i] == 'null':
                grid_with_c = False
            else:
                c_begin, c_end, c_step = map(float, options[i].split(','))
        elif options[i] == '-log2g':
            grid_with_g = True
            i = i + 1
            if options[i] == 'null':
                grid_with_g = False
            else:
                g_begin, g_end, g_step = map(float, options[i].split(','))
        elif options[i] == '-d':
            grid_with_d = True
            i = i + 1
            if options[i] == 'null':
                grid_with_g = False
            else:
                d_end = int(options[i])
        elif options[i] == '-log2r':
            grid_with_r = True
            i = i + 1
            if options[i] == 'null':
                grid_with_r = False
            else:
                r_begin, r_end, r_step = map(float, options[i].split(','))
        i = i + 1

    c_seq = range_f(c_begin, c_end, c_step)
    g_seq = range_f(g_begin, g_end, g_step)
    r_seq = range_f(r_begin, r_end, r_step)
    d_seq = range_f(1, d_end, 1)

    tables = []
    col = []

    if not grid_with_c:
        c_seq = [None]
    if not grid_with_g:
        g_seq = [None]
    if not grid_with_r:
        r_seq = [None]
    if not grid_with_d:
        d_seq = [None]

    job = []
    for i in range(len(c_seq)):
        for j in range(len(g_seq)):
            for k in range(len(r_seq)):
                for l in range(len(d_seq)):
                    job.append((c_seq[i], g_seq[j], r_seq[k], d_seq[l]))

    best_rate = 0
    best_c = 0
    bext_g = 0
    best_r = 0
    best_d = 0

    for i in range(len(job)):
        table = []
        cmd_line = '-q -t {0} -v {1}'.format(kernel_type, fold)
        if grid_with_c:
            table.append(2**job[i][0])
            cmd_line += ' -c {0}'.format(2**job[i][0])
        if grid_with_g:
            table.append(2**job[i][1])
            cmd_line += ' -g {0}'.format(2**job[i][1])
        if grid_with_r:
            table.append(2**job[i][2])
            cmd_line += ' -r {0}'.format(2**job[i][2])
        if grid_with_d:
            table.append(job[i][3])
            cmd_line += ' -d {0}'.format(job[i][3])
        table.append(cmd_line)
        print('[{:5d}/{:5d}]'.format(i + 1, len(job)), end=' ')
        rate = svmutil.svm_train(problem, cmd_line)
        table.append(rate)
        tables.append(table)
        if best_rate < rate:
            best_rate = rate
            best_c = 2**int(job[i][0])
            if grid_with_g:
                best_g = 2**int(job[i][1])
            if grid_with_r:
                best_r = 2**int(job[i][2])
            if grid_with_d:
                best_d = job[i][3]
    ret = []
    ret.append(best_c)
    col.append('c')
    if grid_with_g:
        col.append('g')
        ret.append(best_g)
    if grid_with_r:
        col.append('r')
        ret.append(best_r)
    if grid_with_d:
        col.append('d')
        ret.append(best_d)
    ret.append(best_rate)
    col.append('options')
    col.append('rate')
    return ret, tables, col
Example #12
0
m = {}
p_label = {}
p_acc = {}
p_val = {}
train_time = {}
test_time = {}
train_flag = True

y_test, x_test = svmutil.svm_read_problem('test.csv')
y1, x_test_precomputed = svmutil.svm_read_problem('test_precomputed.csv')

for kernel_type, opts in options.items():
    print('\tkernel type: {0}\n\t'.format(kernel_type), end='')
    tic()
    if (train_flag):
        m[kernel_type] = svmutil.svm_train(prob, opts)
        svmutil.svm_save_model('model/' + kernel_type + '.model',
                               m[kernel_type])
    else:
        m[kernel_type] = svmutil.svm_load_model('model/' + kernel_type +
                                                '.model')
    train_time[kernel_type] = toc()
    tic()
    p_label[kernel_type], p_acc[kernel_type], p_val[kernel_type] = \
    svmutil.svm_predict(y_test, x_test, m[kernel_type])
    test_time[kernel_type] = toc()
    print('\tresult(acc, mse, scc): {0}\n'.format(p_acc[kernel_type]))

for kernel_type, opts in options_precomputed.items():
    print('\tkernel type: {0}\n\t'.format(kernel_type), end='')
    tic()
Example #13
0
        categories.append(actual.rstrip())

data = myread(articles, './feeds')
for cat in categories:
    vectors = []
    answers = []
    for title, answer in articles.items():
        if answer == cat:
            answers.append(1)
        else:
            answers.append(-1)
        vectors.append(data[title])
    prob = svm.svm_problem(answers, vectors)
    param = svm.svm_parameter('-t 2 -v 10')

    correct = svmu.svm_train(prob, param)
    print('Category: %s \n\t Percent Correct: %d' % (cat, correct))

#||||||||||||||||||||||||||||||||||||||||||||||
#Question 3

old_data = []
with open("old_data_set.txt", 'r') as olddata:
    for line in olddata:
        old_data.append(line.split('\t')[0].rstrip())
links = []
with open('twitter_links.txt', 'r') as linkfile:
    for line in linkfile:
        links.append(line.rstrip())

data = dict(zip(links, old_data))
Example #14
0
def train( y, X ):
    model = svmutil.svm_train(y, X, option)
    return model