def train_test_model(train_datafile, test_datafile):
    from svmutil import svm_read_problem, svm_train, svm_predict
    y,x = svm_read_problem(train_datafile)
    m = svm_train(y,x,'-t 0 -e .01 -m 1000 -h 0')
    y_test,x_test = svm_read_problem(test_datafile)
    p_labs,p_acc,p_vals = svm_predict(y_test,x_test,m)
    return p_labs, p_acc, p_vals
Exemple #2
0
def main():
    """Build representation from files."""
    parser = argparse.ArgumentParser()
    parser.add_argument("-r",
                        "--regen",
                        help="increase output verbosity",
                        action="store_true")
    args = parser.parse_args()

    if args.regen:
        train_class, train_frames = read_dir('dataset/train/*.txt', ' ')
        test_class, test_frames = read_dir('dataset/test/*.txt', ' ')

        rad_d1 = [get_rad(action) for action in train_frames]
        rad_d1_t = [get_rad(action) for action in test_frames]

        cust_d1 = [get_custom(action) for action in train_frames]
        cust_d1_t = [get_custom(action) for action in test_frames]

        with open('rad_d1', 'w') as f:
            f.writelines([' '.join(line) + '\n' for line in rad_d1])
        with open('rad_d1.t', 'w') as f:
            f.writelines([' '.join(line) + '\n' for line in rad_d1_t])

        with open('cust_d1', 'w') as f:
            f.writelines([' '.join(line) + '\n' for line in cust_d1])
        with open('cust_d1.t', 'w') as f:
            f.writelines([' '.join(line) + '\n' for line in cust_d1_t])

        rad_d2 = reformat(rad_d1, train_class)
        rad_d2_t = reformat(rad_d1_t, test_class)

        cust_d2 = reformat(cust_d1, train_class)
        cust_d2_t = reformat(cust_d1_t, test_class)

        with open('rad_d2', 'w') as f:
            f.writelines([' '.join(line) + '\n' for line in rad_d2])
        with open('rad_d2.t', 'w') as f:
            f.writelines([' '.join(line) + '\n' for line in rad_d2_t])

        with open('cust_d2', 'w') as f:
            f.writelines([' '.join(line) + '\n' for line in cust_d2])
        with open('cust_d2.t', 'w') as f:
            f.writelines([' '.join(line) + '\n' for line in cust_d2_t])

    # Train the models and test with them
    y, x = svm.svm_read_problem('rad_d2')
    y_t, x_t = svm.svm_read_problem('rad_d2.t')
    rad_model = svm.svm_train(y, x, '-s 0 -t 2 -c 2 -g 0.0005')
    rad_labels, (rad_acc, *_), _ = svm.svm_predict(y_t, x_t, rad_model)

    y, x = svm.svm_read_problem('cust_d2')
    y_t, x_t = svm.svm_read_problem('cust_d2.t')
    cust_model = svm.svm_train(y, x, '-s 0 -t 2 -c 8 -g 0.0005')
    cust_labels, (cust_acc, *_), _ = svm.svm_predict(y_t, x_t, cust_model)

    print(f'RAD accuracy: {rad_acc}')
    print(f'Custom accuracy: {cust_acc}')
Exemple #3
0
def simulate_skin_with_svm(data_size=None, train_params='-s 0 -t 0'):
    """Simulate learning skin data set with libsvm."""
    convert_skin_to_svm(data_size)

    train_y, train_x = svm.svm_read_problem('skin_train.svm')
    model = svm.svm_train(train_y, train_x, train_params)

    test_y, test_x = svm.svm_read_problem('skin_test.svm')
    p_label, p_acc, p_val = svm.svm_predict(test_y, test_x, model)
Exemple #4
0
def train_grasp(grasp_type, side):
    """
    train_grasp(grasp_type): 
    train linear svm classifier for specific grasp type\n
    grasp_type: hand grasping type\n
    side: left hand or right hand\n
    """
    #train
    datafile = "model/traindata_grasp_" + grasp_type + "_" + side
    if not os.path.isfile(datafile):
        srcfile = "data/feature_grasp_train.csv"
        write_svmdata_grasp(srcfile, datafile, grasp_type, side, 0)
    label_train, data_train = svmutil.svm_read_problem(datafile)
    modelfile = "model/model_grasp_" + grasp_type + "_" + side
    m = []
    if not os.path.isfile(modelfile):
        print("train model: " + grasp_type + "_" + side)
        label_weight = {}
        for v in label_train:
            if label_weight.has_key(v):
                label_weight[v] += 1
            else:
                label_weight[v] = 1
        sorted_label = sorted(label_weight)
        param_weight = ' '
        for v in sorted_label:
            label_weight[v] = float(
                len(label_train)) / len(sorted_label) / label_weight[v]
            param_weight += '-w%d %f ' % (v, label_weight[v])
        prob = svmutil.svm_problem(label_train, data_train)
        param = svmutil.svm_parameter('-t 0 -b 1 -q' + param_weight)
        print '-t 0 -b 1 -q' + param_weight
        #        param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q')
        m = svmutil.svm_train(prob, param)
        svmutil.svm_save_model(modelfile, m)
    else:
        print("load model: " + grasp_type + "_" + side)
        m = svmutil.svm_load_model(modelfile)
    #test
    grasp_info = read_info("data/feature_grasp_test.csv", side)
    datafile = "model/testdata_grasp_" + grasp_type + "_" + side
    if not os.path.isfile(datafile):
        srcfile = "data/feature_grasp_test.csv"
        write_svmdata_grasp(srcfile, datafile, grasp_type, side, 1)
    label_test, data_test = svmutil.svm_read_problem(datafile)
    p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m,
                                                '-b 1')
    f_result = open("result/grasp_" + grasp_type + "_" + side + ".csv", "w")
    for i in range(len(p_label)):
        f_result.write(grasp_info[i] + ", " + str(int(label_test[i])) + ", " +
                       str(int(p_label[i])) + ", ")
        f_result.write("[%.4f]\n" % p_val[i][0])
    f_result.close()
def train_test_model(train_datafile, test_datafile):
    """
    :param train_datafile: relative path
    :param test_datafile: relative path
    :return: trains a libsvm model using the training data in train_datafile and test in on the data in test_datafile
    """
    from svmutil import svm_read_problem, svm_predict, svm_train
    y_test, x_test = svm_read_problem(test_datafile)
    y_train, x_train = svm_read_problem(train_datafile)
    model = svm_train(y_train, x_train, '-t 0 -e .01 -m 1000 -h 0')
    p_labs, p_acc, p_vals = svm_predict(y_test, x_test, model)
    return p_labs, p_acc, p_vals
Exemple #6
0
	def trainmodel(self,train,cv,test,modelsavepath):
		y,x = svmutil.svm_read_problem(train)#读入训练数据
		# ycv,xcv = svm_read_problem(cv)#读入验证集
		# ytest,xtest=svm_read_problem(test)#读入测试集
		prob  = svm.svm_problem(y, x)
		param = svm.svm_parameter('-t 2 -c 0.5 -g 0.125 -b 1')		
		model = svmutil.svm_train(prob, param)				
		yt,xt = svmutil.svm_read_problem(train)#???????????
		p_labs, p_acc, p_vals = svmutil.svm_predict(yt, xt, model,'-b 1')
		svmutil.svm_save_model(modelsavepath, model)#save model
		# model = svmutil.svm_load_model('model_file')#读取model
		pass
 def getmodelandaccuary(line):
     """
     训练模型,预测结果
     :param line: hdfs上的要读取的features目录的目录
     :return: 准确率
     """
     train_y = []
     train_x = []
     test_y = []
     test_x = []
     for i in range(0, len(line) - 1):
         y, x = svmutil.svm_read_problem(line[i])
         train_y.extend(y[0:90])
         train_x.extend(x[0:90])
         test_y.extend(y[90:100])
         test_x.extend(x[90:100])
     train_random_index = [i for i in range(len(train_y))]
     test_random_index = [i for i in range(len(test_y))]
     random.shuffle(train_random_index)
     random.shuffle(test_random_index)
     random_train_y = [train_y[x] for x in train_random_index]
     random_train_x = [train_x[x] for x in train_random_index]
     random_test_y = [test_y[x] for x in test_random_index]
     random_test_x = [test_x[x] for x in test_random_index]
     m = svmutil.svm_train(random_train_y, random_train_x,
                           "-s 0 -t 2 -c 32 -g 8 -b 1")
     predict_label, accuary, prob_estimates = svmutil.svm_predict(
         random_test_y, random_test_x, m, '-b 1')
     svmutil.svm_save_model(
         '/home/sunbite/Co_KNN_SVM_TMP/CoKNNSVM2.model', m)
     return accuary
Exemple #8
0
 def test(self):
     self.model = svmutil.svm_load_model('./news/svmmodel')
     self.yt, self.xt = svmutil.svm_read_problem('./news/temp/svmtest.txt')
     print('Start to predict...')
     p_label, p_acc, p_val = svmutil.svm_predict(self.yt, self.xt,
                                                 self.model)
     self.yt, self.xt = svmutil.svm_read_problem('./news/temp/svmtest.txt')
     confusion_matrix = pd.DataFrame([[0 for i in range(10)]
                                      for j in range(10)],
                                     index=list(range(1, 11)),
                                     columns=list(range(1, 11)),
                                     dtype='int')
     for i in range(len(p_label)):
         confusion_matrix[p_label[i]][self.yt[i]] += 1
     confusion_matrix.to_csv('confusion_matrix_svm.csv')
     self.show_test_result(confusion_matrix)
Exemple #9
0
def main():
    #transform('features.train','features-libsvm.train')
    #transform('features.test','features-libsvm.test')
    y_train, X_train = svmutil.svm_read_problem('../features-libsvm.train')
    #w_len=Problem11(X_train,y_train)
    nSV = Problem12(X_train, y_train)
    np.savez('../sSV.npz', sSV=np.array(nSV))
        def getmodelandaccuary(line):
            """
            训练模型,预测结果
            :param line: hdfs上的要读取的features目录的目录
            :return: 准确率
            """
            train_y = []
            train_x = []
            test_y = []
            test_x = []
            for i in range(0, len(line) - 1):
                y, x = svmutil.svm_read_problem(line[i])
                train_y.extend(y[0:60])
                train_x.extend(x[0:60])
                test_y.extend(y[60:300])
                test_x.extend(x[60:300])
            train_random_index = [i for i in range(len(train_y))]
            test_random_index = [i for i in range(len(test_y))]
            random.shuffle(train_random_index)
            random.shuffle(test_random_index)
            random_train_y = [train_y[x] for x in train_random_index]
            random_train_x = [train_x[x] for x in train_random_index]
            random_test_y = [test_y[x] for x in test_random_index]
            random_test_x = [test_x[x] for x in test_random_index]

            Co_KNN_SVM.co_knn_svm(random_train_y, random_train_x,
                                  random_test_y, random_test_x)
Exemple #11
0
def Train_SVM_model(PathToFeatureFile):#生成训练模型文件,model.txt
    #print(PathToFeatureFile)
    y,x=svmutil.svm_read_problem(PathToFeatureFile)
    model=svmutil.svm_train(y,x)
    modelFilePath=os.path.join(os.path.split(PathToFeatureFile)[0],f"model_{PathToFeatureFile.split('_',1)[1]}")
    svmutil.svm_save_model(modelFilePath,model)
    print(modelFilePath)
Exemple #12
0
def classify(filename, classLabel=0):
    str = "/Thu_Life/CS/SVM/data/trainData/Test_SVMFile/singleSVM_TestFile"
    f = open(str, "wb")
    t = VSM.TextToVector2(filename)
    slabel = ("%d ") % classLabel
    if len(t) > 0:
        f.write(slabel)
        for k in range(len(t)):
            str1 = ("%d:%d ") % (t[k][0], t[k][1])
            f.write(str1)
        f.write("\r\n")
    else:
        print "The text can't be classified to the Four Labels!"
        return "Can't be classified ! "
    f.close()
    y, x = svmutil.svm_read_problem(str)
    model = svmutil.svm_load_model("../SVMTrainFile250.model")
    label, b, c = svmutil.svm_predict(y, x, model)
    print "label", label
    if label[0] == 1:
        print "类别:财经"
        return "财经"
    elif label[0] == 2:
        print "类别:IT"
        return "IT"
    elif label[0] == 3:
        print "类别:旅游"
        return "旅游"
    elif label[0] == 4:
        print "类别:体育"
        return "体育"
Exemple #13
0
 def __init__(self,train_feature_file = TRAIN_FEATURE_FILE):
     if os.path.exists(SAVED_MODEL):
         self.model = svmutil.svm_load_model(SAVED_MODEL)
     else:
         y, x = svmutil.svm_read_problem(train_feature_file)
         self.model = svmutil.svm_train(y, x, '-c 4')
         svmutil.svm_save_model(SAVED_MODEL,self.model)
Exemple #14
0
 def testing_fold(self, k, m):
     folder_name = 'fold_' + str(k) + '/'
     file_name = self.filename + '.test.svm'
     y, x = svmutil.svm_read_problem(folder_name + file_name)
     print "Y", len(y), "X",  len(x)
     p_label, p_acc, p_val = svmutil.svm_predict(y, x, m)
     return float(p_acc[0])
Exemple #15
0
 def __init__(self, train_feature_file=TRAIN_FEATURE_FILE):
     if os.path.exists(SAVED_MODEL):
         self.model = svmutil.svm_load_model(SAVED_MODEL)
     else:
         y, x = svmutil.svm_read_problem(train_feature_file)
         self.model = svmutil.svm_train(y, x, '-c 4')
         svmutil.svm_save_model(SAVED_MODEL, self.model)
def main():
    y, x = svmutil.svm_read_problem("char_recon_shuffled.db")
    x_train = x[:1800]
    y_train = y[:1800]
    x_val = x[1800:]
    y_val = y[1800:]

    gamma_c_pairs = [GammaCPair(1.0 / (2.0 * (3.0 ** log_sigma) ** 2), 3.0 ** log_C)
                     for log_sigma in [7]
                     for log_C     in [3]
                    ]

    log_log_pairs = [[log_sigma, log_C]
                     for log_sigma in np.arange(6, 10, 0.5)
                     for log_C     in np.arange(0, 5, 0.5)
                    ]

    def cv(gamma_c):
        return get_cross_val(x_train, y_train, x_val, y_val, gamma_c)

    cross_val = []

    for gamma_c in gamma_c_pairs:
        cross_val.append(cv(gamma_c))

    f = open("gamma_c", "w")
    for i in range(len(gamma_c_pairs)):
        f.write("{0}   {1}   {2}\n".format(log_log_pairs[i][0], log_log_pairs[i][1], cross_val[i]))
    f.close()
Exemple #17
0
def train_manipulation(mnp_type):
    """  
    """
    #train
    datafile = "manipulate/model/traindata_mnp_"+mnp_type
    if not os.path.isfile(datafile):
        srcfile = "manipulate/data/feature_mnp_train.csv"
        write_svmdata_mnp(srcfile, datafile, mnp_type, 0)    
    label_train,data_train = svmutil.svm_read_problem(datafile)
    modelfile = "manipulate/model/model_mnp_"+mnp_type
    m = []
    if not os.path.isfile(modelfile):
        print("train model: " + mnp_type)
        label_weight = {}
        for v in label_train:
            if label_weight.has_key(v):
                label_weight[v]+=1
            else:
                label_weight[v]=1
        sorted_label = sorted(label_weight)
        param_weight = ' '
        for v in sorted_label:
            label_weight[v] = float(len(label_train))/len(sorted_label)/label_weight[v]
            param_weight += '-w%d %f ' % (v, label_weight[v])
        prob = svmutil.svm_problem(label_train, data_train)        
        param = svmutil.svm_parameter('-t 0 -b 1 -q'+param_weight)
        print '-t 0 -b 1 -q'+param_weight
        m = svmutil.svm_train(prob, param)        
        svmutil.svm_save_model(modelfile, m)
    else:
        print("load model: " + mnp_type)
        m = svmutil.svm_load_model(modelfile)
#    weight = read_model_linearSVM(modelfile, len(data_train[0]))
#    print weight
    #test    
    mnp_info = read_info("manipulate/data/feature_mnp_test.csv")
    datafile = "manipulate/model/testdata_mnp_"+mnp_type
    if not os.path.isfile(datafile):
        srcfile = "manipulate/data/feature_mnp_test.csv"
        write_svmdata_mnp(srcfile, datafile, mnp_type, 1)    
    label_test,data_test = svmutil.svm_read_problem(datafile)
    p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1')
    f_result = open("manipulate/result/mnp_" + mnp_type + ".csv", "w")
    for i in range(len(p_label)):
        f_result.write(mnp_info[i]+", "+str(int(label_test[i]))+", "+str(int(p_label[i]))+", ")
        f_result.write("[%.4f]\n" % p_val[i][0])
    f_result.close()
Exemple #18
0
def main():
    y_train, X_train = svmutil.svm_read_problem('../features-libsvm.train')
    y_test, X_test = svmutil.svm_read_problem('../features-libsvm.test')
    c = 0.1
    y_train = list(2 * (np.array(y_train) == 0) - 1)
    y_test = list(2 * (np.array(y_test) == 0) - 1)
    eout = list()
    for t in range(-3, 2):
        gamma = 10**t
        model = svmutil.svm_train(y_train, X_train, '-g %s -c %s' % (gamma, c))
        _, (accuracy, _, _), _ = svmutil.svm_predict(y_test, X_test, model)
        eout.append(100 - accuracy)
    plt.plot(range(-3, 2), eout)
    plt.xlabel("log10(gamma)")
    plt.ylabel("Eout on test set")
    plt.title("C=0.1")
    plt.show()
Exemple #19
0
def doRecognize():
    yt, xt = svm_read_problem(testTxt)
    model = svm_load_model(cdir + "model")
    p_label, p_acc, p_val = svm_predict(yt, xt, model)  #p_label即为识别的结果
    code = ''
    for item in p_label:
        code = code + str(int(item))
    print code
Exemple #20
0
def predict(trainFileName,
            testFileName,
            cgp={
                'c': 1024,
                'g': 16,
                'p': 0.015625
            }):

    from svmutil import svm_read_problem, svm_train, svm_predict

    y, x = svm_read_problem(trainFileName)
    yt, xt = svm_read_problem(testFileName)

    model = svm_train(
        y, x, '-s 4 -t 2 -c %f -g %f -p %f' % (cgp['c'], cgp['g'], cgp['p']))

    p_label, p_acc, p_val = svm_predict(yt, xt, model)
    return (p_label, p_acc, p_val)
Exemple #21
0
 def train_fold(self, k, c):
     self.print_debug('train_fold', k, c)
     folder_name = 'fold_' + str(k) + '/'
     file_name = self.filename + '.data.svm'
     y, x = svmutil.svm_read_problem(folder_name + file_name)
     prob = svmutil.svm_problem(y, x)
     param = svmutil.svm_parameter('-s 0 -t 0 -c ' + str(c))
     m = svmutil.svm_train(prob, param)
     return m
Exemple #22
0
 def train_fold_polynomial(self, k, c, g, d):
     self.print_debug('train_fold_polynomial', k, c, g, d)
     folder_name = 'fold_' + str(k) + '/'
     file_name = self.filename + '.data.svm'
     y, x = svmutil.svm_read_problem(folder_name + file_name)
     prob = svmutil.svm_problem(y, x, isKernel=True)
     param = svmutil.svm_parameter('-s 0 -t 1 -c ' + str(c)
                                   + ' -g ' + str(g) + ' -d ' + str(d))
     m = svmutil.svm_train(prob, param)
     return m
Exemple #23
0
def svm_model_test(svm_root, model_path):
    """
	使用测试集测试模型
	:return:
	"""
    svm_path = r"C:\Python36\risk_down\libsvm"
    sys.path.append(svm_path + r"\python")
    import svmutil
    # 训练总模型
    y, x = svmutil.svm_read_problem('./' + 'total_feature.txt')
Exemple #24
0
 def fit(self, document, keep_on_doc=False):
     if isinstance(document, str):
         data_file = str(document)
         y, x = svmutil.svm_read_problem(data_file)
     elif isinstance(document, object):
         y = []
         x =[]
         for y_section,x_section in self.extract_xy(self.get_svm_data(document, keep_on_doc)):
             y.append(y_section)
             x.append(x_section)
     return y,x
Exemple #25
0
def main():
    y_train, X_train = svmutil.svm_read_problem('../features-libsvm.train')
    # y_test, X_test = svmutil.svm_read_problem('../features-libsvm.test')
    y_train = list(2 * (np.array(y_train) == 0) - 1)
    # y_test = list(2 * (np.array(y_test) == 0) - 1)
    R=1000
    res=list()
    for r in range(R):
        print("round %s"%r)
        res.append(validate(X_train,y_train))
    pd.Series(res).hist()
Exemple #26
0
def predict_main(file, model, out, now_path):
    file_path = os.path.join(now_path, file)
    out_path = os.path.join(now_path, out)
    y_p, x_p = svm_read_problem(file_path)
    model = svm_load_model(model)
    p_label, p_acc, p_val = svm_predict(y_p, x_p, model)
    result = 'True,Predict\n'
    for i in range(len(y_p)):
        result += str(y_p[i]) + ',' + str(p_label[i]) + '\n'
    with open(out_path + '_predict.csv', 'w', encoding="utf-8") as f:
        f.write(result)
        f.close()
Exemple #27
0
def GetCode_SKB_123(picpath,types='path'):
    model=svmutil.svm_load_model(modelpath_SKB_123)
    if types=='path':img=Image.open(picpath).convert('L').point([0]*165+[1]*(256-165),'1')
    elif types=='img':img=picpath
    pixel_cnt_list=GetFeature(img,'img')
    tempath=os.path.join(os.getcwd(),'temp.txt')
    with open(tempath,'w') as f:
        f.writelines(GetFeatureStr(pixel_cnt_list,0))
    y0,x0=svmutil.svm_read_problem(tempath)
    os.remove(tempath)
    p_label,p_acc,p_val=svmutil.svm_predict(y0,x0,model,'-q')
    return chr(int(p_label[0]))
def solve(im):
    im = denoise(im)
    im_list = crop(im)
    tmp = str(round(time.time() * 1000))
    with open(tmp, "w") as f:
        for im in im_list:
            f.write("0" + feature(im))
    y, x = svmutil.svm_read_problem(tmp)
    model = svmutil.svm_load_model(MODEL_FILE)
    p_label, p_acc, p_val = svmutil.svm_predict(y, x, model)
    os.remove(tmp)
    return "".join([chr(round(x)) for x in p_label])
def train():
    print("Starting train process.")
    '''
    for line in open(__FEATURE_FILE):
        line = line.split(None, 1)
        if len(line) == 1: line += ['']
        print(line)
    '''
    y, x = svmutil.svm_read_problem(__FEATURE_FILE)
    model = svmutil.svm_train(y, x)
    svmutil.svm_save_model(Function.MODEL_FILE, model)
    print("train process done.")
    return
Exemple #30
0
def train_model_main2(model_path,file):
	# LibSVM
	# 按照 libSVM 指定的格式生成一组带特征值和标记值的向量文件
	svm_path = r"C:\Python36\risk_down\libsvm"
	sys.path.append(svm_path + r"\python")
	import svmutil
	# y, x = svmutil.svm_read_problem('./' + str(n) + '_feature.txt')
	y, x = svmutil.svm_read_problem(file)
	# ---------------------------------------------------
	model = svmutil.svm_train(y[:50], x[:50], '-c 4')
	# print(model_path,file.split('/')[2][0])
	model_path = './'+model_path+'/'+file.split('/')[2][0]+"_feature.model"
	svmutil.svm_save_model(model_path, model)
def train(train_file, fold_num, mapping=None, parameters={'-c': 1}, multilabel=None, output_folder=None):
	'''
	Given a training instance file and (optionally) a label mapping, adapt the
	training vectors to fit the mapping and build an SVM model.

	'''
	global classifier
	if not output_folder:
		output_folder = 'models'
	output_folder = os.path.join(output_folder, 'fold-{0:02d}'.format(fold_num+1))
	if not os.path.exists(output_folder):
		os.makedirs(output_folder)

	if multilabel:
		temp_labels, instances = alt_read_problem(train_file)
		temp_labels = [[mapping[l] for l in label] for label in temp_labels]

		labels = []
		for temp_labs in temp_labels:
			if multilabel[1] in temp_labs:
				labels.append(multilabel[1])
			else:
				assert len(set(temp_labs)) == 1, "Something appears to be wrong with the intermediate mapping. There's still more than one label present for an instance: {0}".format(temp_labs)
				labels.append([l for l in temp_labs if l != multilabel[1]][0])
	else:
		labels, instances = svm_read_problem(train_file)
	labels = reMap(labels, mapping)

	# Exclude instances which have 0 as their label
	labels, instances = zip(*[(label, instance) for label, instance in zip(labels, instances) if label != 0])

	distribution = {}
	for label in set(labels):
		distribution[label] = float(labels.count(label))/len(labels)

	paramstring = ''
	for param, value in parameters.items():
		paramstring += ' {0} {1}'.format(param, value)
	if classifier == 'libsvm' and '-b' not in parameters.keys():
		paramstring += ' -b 1'
	paramstring += ' -q'

	if multilabel:
		model_file = os.path.join(output_folder, os.path.basename(train_file) + '.{0}.model'.format(multilabel[0]))
	else:
		model_file = os.path.join(output_folder, os.path.basename(train_file) + '.model')
	print '---training'
	model = svm_train(labels, instances, paramstring)
	svm_save_model(model_file, model)

	return model_file, distribution
def train_attribute(attribute, side):
    """
    train_attribute(str, float): 
    train linear svm classifier for specific attribute\n
    attribute: should be one from ["prismatic", "sphere", "flat", "rigid"]
    """
    #train
    datafile = "model/traindata_attribute_"+attribute+"_"+side
    if not os.path.isfile(datafile):
        srcfile = "data/feature_attribute_train.csv"
        write_svmdata_attribute(srcfile, datafile, attribute, side, 0)    
    label_train,data_train = svmutil.svm_read_problem(datafile)    
    modelfile = "model/model_attribute_"+attribute+"_"+side
    m = []
    if not os.path.isfile(modelfile):
        print("train model: " + attribute+"_"+side)
        prob = svmutil.svm_problem(label_train, data_train)
        param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q')
        m = svmutil.svm_train(prob, param)        
        svmutil.svm_save_model(modelfile, m)
    else:
        print("load model: " + attribute+"_"+side)
        m = svmutil.svm_load_model(modelfile)
    #test
    attribute_info = read_info("data/feature_attribute_test.csv", side)
    datafile = "model/testdata_attribute_"+attribute+"_"+side
    if not os.path.isfile(datafile):
        srcfile = "data/feature_attribute_test.csv"
        write_svmdata_attribute(srcfile, datafile, attribute, side, 1)    
    label_test,data_test = svmutil.svm_read_problem(datafile)
    p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1')
    [precision, recall, f1, accuracy] = getF1(label_test, p_label)
    print "F1: [%.4f, %.4f, %.4f] Accuracy: %.4f" % (precision, recall, f1, accuracy)
    f_result = open("result/attribute_"+attribute+"_"+side+".csv", "w")
    for i in range(len(p_label)):
        f_result.write(attribute_info[i]+", "+str(int(label_test[i]))+", "+str(int(p_label[i]))+", ")
        f_result.write("[%.4f]\n" % p_val[i][0])
    f_result.close()
Exemple #33
0
    def loadTraces(self, fname, reLoad=False):
        if not reLoad:
            self._fnames.append(fname)

        y, x = svm_read_problem(fname)
        vectors = []
        record = []
        for t, i in zip( x, range(len(x)) ):
            vectors.append( [ v for k, v in t.items() ] )
            record.append( str(fname)+'_'+str(i) )

        self._traces += vectors
        self._labels += y
        self._records += record
Exemple #34
0
def GetCode_register(picpath):#识别注册验证码
    code=[];model=svmutil.svm_load_model(modelpath_login_register)#打开训练文件
    im=Pre_process_register(picpath) 
    Imgs=Crop_Vertical(im,th=3)#切割后的图片列表
    for img in Imgs:
        pixel_cnt_list=GetFeature(img,'img')        
        tempath=os.path.join(os.getcwd(),'temp.txt')#临时文件,用于存储将要识别的图片的特征
        with open(tempath,'w') as f:
            f.writelines(GetFeatureStr(pixel_cnt_list,0))
        y0,x0=svmutil.svm_read_problem(tempath)  
        os.remove(tempath)
        p_label,p_acc,p_val=svmutil.svm_predict(y0,x0,model,'-q')
        code.append(int(p_label[0]))
    code=''.join(list(map(chr,code)))
    return code  
Exemple #35
0
def over_sample( sample_df, dump_svmlight_file ):
    train_y, train_x = svm_read_problem(dump_svmlight_file)
    with open(dump_svmlight_file,"rb") as f: ans_lines = np.array(f.readlines());
    sample_df['label'] = pd.Series(train_y)
    positive = sample_df[ sample_df['label'] >= 0.99 ].index
    balence = ( len(train_y) - len(positive) ) / len(positive);
    for i in range(balence):
        ans_lines =  np.hstack((ans_lines, ans_lines[positive]))
        sample_df = pd.concat([sample_df, sample_df.iloc[positive,:] ], axis=0)
    new_idx = np.random.permutation(len(ans_lines))
    ans_lines = ans_lines[new_idx]
    sample_df = sample_df.reset_index(drop=True)
    sample_df = sample_df.iloc[new_idx,:].reset_index(drop=True)
    with open(dump_svmlight_file,"wb") as f: f.writelines(list(ans_lines))
    return sample_df[ np.setdiff1d(sample_df.columns,['label']) ], dump_svmlight_file
Exemple #36
0
def classification():
    print('test image classification...')
    y, x = svmutil.svm_read_problem('./show_data/test_data')
    print('model loading...')
    model = svmutil.svm_load_model('./train_data/model_weight')
    p_label, p_acc, p_val = svmutil.svm_predict(y, x, model)
    for label in p_label:
        if int(label) == 0:
            print('正常 眼底图像')
        elif int(label) == 1:
            print('轻度白内障 眼底图像')
        elif int(label) == 2:
            print('中度白内障 眼底图像')
        else:
            print('重度白内障 眼底图像')
 def run(self):
     c, g = None, None
     if self.cexp != None:
         c = 2.0**self.cexp
     if self.gexp != None:
         g = 2.0**self.gexp
     cmdline = self.get_cmd(c,g)
     cmd = shlex.split(cmdline)
     path = cmd[0]
     input = cmd[-1]
     param = ' '.join(cmd[1:-1])
     sys.path.append(path)
     from svmutil import svm_read_problem, svm_train
     y, x = svm_read_problem(input)
     cv_acc = svm_train(y, x, param)
     return (self.cexp, self.gexp, cv_acc)
Exemple #38
0
 def run(self):
     c, g = None, None
     if self.cexp != None:
         c = 2.0**self.cexp
     if self.gexp != None:
         g = 2.0**self.gexp
     cmdline = self.get_cmd(c, g)
     cmd = shlex.split(cmdline)
     path = cmd[0]
     input = cmd[-1]
     param = ' '.join(cmd[1:-1])
     sys.path.append(path)
     from svmutil import svm_read_problem, svm_train
     y, x = svm_read_problem(input)
     cv_acc = svm_train(y, x, param)
     return (self.cexp, self.gexp, cv_acc)
def test(test_file, model_file, fold_num, mapping=None, multilabel=None, debug=False):
	'''
	Returns predicted labels, prediction values and the as the the test labels
	(potentially remapped).

	Requires a test instance file and a corresponding model
	file. Remaps the labels in the test file (optional), classifies the test
	instances against the model.

	'''
	if multilabel:
		temp_labels, instances = alt_read_problem(test_file)
		temp_labels = [[mapping[l] for l in label] for label in temp_labels]

		labels = []
		for temp_labs in temp_labels:
			if multilabel[1] in temp_labs:
				labels.append(multilabel[1])
			else:
				assert len(set(temp_labs)) == 1, "Something appears to be wrong with the intermediate mapping. There's still more than one label present for an instance: {0}".format(temp_labs)
				labels.append([l for l in temp_labs if l != multilabel[1]][0])
	else:
		labels, instances = svm_read_problem(test_file)
	labels = reMap(labels, mapping)

	# Exclude instances which have 0 as their label
	labels, instances = zip(*[(label, instance) for label, instance in zip(labels, instances) if label != 0])

	if debug:
		with open(os.path.basename(test_file) + '.remap', 'w') as fout:
			for label, instance in zip(labels, instances):
				output = '{0} '.format(str(label))
				for idx, val in instance.items():
					output += '{0}:{1} '.format(str(idx), str(val))
				output = output.strip() + '\n'
				fout.write(output)

	model = svm_load_model(model_file)

	print '---testing'
	if classifier == 'liblinear':
		pred_labels, ACC, pred_values, label_order = svm_predict(labels, instances, model)
	elif classifier == 'libsvm':
		pred_labels, (ACC, MSC, SCC), pred_values = svm_predict(labels, instances, model, options='-b 1')
		label_order = model.get_labels()

	return pred_labels, pred_values, label_order, labels
Exemple #40
0
def classify2(filename, classLabel=0):
    str = "/Thu_Life/CS/SVM/data/trainData/Test_SVMFile/singleSVM_TestFile"
    f = open(str, "wb")
    t = VSM.TextToVector2(filename)
    slabel = ("%d ") % classLabel
    if len(t) > 0:
        f.write(slabel)
        for k in range(len(t)):
            str1 = ("%d:%d ") % (t[k][0], t[k][1])
            f.write(str1)
        f.write("\r\n")
    else:
        return 0
    f.close()
    y, x = svmutil.svm_read_problem(str)
    model = svmutil.svm_load_model("../SVMTrainFile250.model")
    label, b, c = svmutil.svm_predict(y, x, model)
    return label[0]
	def cSvmTrainSet(self):
	  	dataMat = []
	  	labelMat = []
	  	file_pattern = re.compile('^%s-\d.rec' % self.legalName)
	  	for fdata in os.listdir('data'):
	  		if file_pattern.match(fdata):
	  			data,label = loadDataSet('data/'+fdata,1)
	  		else:
	  			data,label = loadDataSet('data/'+fdata,-1)
	  		dataMat+=data
	  		labelMat+=label
		libSvmFormatSaveInFile(dataMat,labelMat,'data_format/%s.mat' % self.legalName) # todo: duoxiancheng
		y,x = svmutil.svm_read_problem('data_format/%s.mat' % self.legalName)
		prob = svmutil.svm_problem(y,x,isKernel = True)
		param = svmutil.svm_parameter('-t 0 ')
		self.model = svmutil.svm_train(prob,param)

		print self.model
Exemple #42
0
def easy_predict(train_name, test_name):
    range_file = train_name + ".range"
    model_file = train_name + ".model"
    assert os.path.exists(test_name),"testing file not found"
    assert os.path.exists(model_file),"model file not found"
    assert os.path.exists(range_file),"range file not found"
    
    file_name = os.path.split(test_name)[1]
    scaled_test_file = file_name + ".scale"
    predict_test_file = file_name + ".predict"

    cmd = '{0} -r "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, test_name, scaled_test_file)
    print('Scaling testing data...')
    Popen(cmd, shell = True, stdout = PIPE).communicate()
    
    (prob_y, prob_x) = svmutil.svm_read_problem(scaled_test_file)
    model = svmutil.svm_load_model(model_file)
    pred_labels, (ACC, MSE, SCC), pred_values = svmutil.svm_predict(prob_y, prob_x, model, "-b 1")
    
    return pred_values,MSE,SCC
Exemple #43
0
def svm_predict(problem_filepath, model_filepath):
    """
    Using LibSVM to predict result of a problem

    Returns
    -------
        (ids, labels)
    """

    # Reading a problem
    ids, x = svmutil.svm_read_problem(problem_filepath)

    print "len(x) = ", len(x)

    # Preparing a model
    model = svmutil.svm_load_model(model_filepath)

    # Predicting
    y = [-2] * len(x)
    p_label, p_acc, p_val = svmutil.svm_predict(y, x, model)

    return (ids, p_label)
Exemple #44
0
__author__ = 'yunlinz'

import svm
import svmutil
import sklearn.cross_validation
import numpy
import matplotlib.pyplot as plt
import time
import ctypes

# load data
y_train, x_train = svmutil.svm_read_problem('splice_noise_train.txt')
y_test, x_test = svmutil.svm_read_problem('splice_noise_test.txt')

# scale x_train to [-1,1]
x_train_sum = {}
x_train_min = {}
x_train_max = {}
x_train_rng = {}
x_train_scaled = []
x_test_scaled = []

for d in x_train:
    for i in range(1, 60, 1):
        if i in x_train_min:
            if d[i] < x_train_min[i]:
                x_train_min[i] = d[i]
        else:
            x_train_min[i] = d[i]
        if i in x_train_max:
            if d[i] > x_train_max[i]:
all_features = extractSift(fname)
all_files_labels[fname] = 4  # label is unknown

print "---------------------"
print "## loading codebook from " + codebook_file
with open(codebook_file, 'rb') as f:
    codebook = load(f)

print "---------------------"
print "## computing visual word histograms"
word_histgram = computeHistograms(codebook, all_features[fname])

print "---------------------"
print "## write the histograms to file to pass it to the svm"
nclusters = codebook.shape[0]
writeHistogramsToFile(nclusters,
                      all_files_labels,
                      fname,
                      word_histgram,
                      HISTOGRAMS_FILE)

print "---------------------"
print "## test data with svm"

y,x=svmutil.svm_read_problem(HISTOGRAMS_FILE)
model=svmutil.svm_load_model(model_file)
result = svmutil.svm_predict(y,x,model)
cat_label = load(open("cat.txt", "rb" ))
print cat_label
print result[0][0]
Exemple #46
0
#coding=utf-8

import svmutil
y,x=svmutil.svm_read_problem('../thuboy')
m=svmutil.svm_load_model('../SVMTrain250.model')
#m=svmutil.svm_train(y,x,'-c 5')
p1,p2,p3=svmutil.svm_predict(y,x,m)
print p1
print p2
print p3
import svmutil as svm
import glob
import csv
import os

# files = glob.glob('./*.ml')
# predictfiles = glob.glob('./*.predict')

# for deletefile in predictfiles:
# 	os.remove(deletefile)
	

# for currentFile in files:
	# if "trainning" in currentFile:
y, x = svm.svm_read_problem('./trainning.ml')

yt, xt = svm.svm_read_problem('./predict.ml') 
# print yt, xt
m = svm.svm_train(y, x, '-c 32768 -g 0.125')
p_label, p_acc, p_val = svm.svm_predict(yt, xt, m)
print p_label
# csv.writer(csvfile, delimiter=',')


with open('./predict.ml.predict', 'wb') as f:
	# writer = csv.writer(f,quoting=csv.QUOTE_NONE)
	for label in p_label:
		f.write(str(int(label)))
		f.write("\n")

		
	# print(len(vecWords))
	#................................................................................
	#添加tf-idf特征值到input向量中
	for j in range(len(vecWords)):
		vecInput[j].append(tfidf(totalList,vecWords[j]))
	#..................................................................................
	#生成libsvm训练使用的文本文件
	print('#####################################')
	print('生成libsvm训练文档..................')
	print('#####################################')
	svmfile=open(svmtrainfile,'w')
	for k in range(len(vecOut)):
		svmfile.write(str(vecOut[k])+' 1:'+str(vecInput[k][0])+' 2:'+str(vecInput[k][1])+' 3:'+str(vecInput[k][2])+' 4:'+str(vecInput[k][3])+'\r')
	svmfile.close()
	#..................................................................................
	y,x=svmutil.svm_read_problem(svmtrainfile)
	print('#####################################')
	print('svm训练中..................')
	print('#####################################')
	# print(x)
	# print('#####################################')
	# print(y)
	m=svmutil.svm_train(y,x)
	print('#####################################')
	print('svm测试中..................')
	print('#####################################')
	fsvmR=open(svmResultFile,'w')
	for each in svmutil.svm_predict(y,x,m):
		fsvmR.writelines(str(each))
	fsvmR.close()
	# print(m)
def main(argv):
    # Dpark initialize
    dpark = DparkContext()

    # number of the training and testing set
    num_train = 6000
    num_test = 6000

    # Loading the dataset
    data = svm_read_problem('echo_liveness.01.libsvm')
    y, x = data

    # Preparing training and testing data
    if len(x) != len(y):
        print("The labels and features are not accorded!")
        sys.exit()
    
    x_live = [x[i] for i in find(y, 1.0)]
    x_stu = [x[i] for i in find(y, 0.0)]
    n_live = len(x_live)
    n_stu = len(x_stu)
    ind_live = range(n_live)
    ind_stu = range(n_stu)
    random.shuffle(ind_live)
    random.shuffle(ind_stu)

    x_te = [x_live[i] for i in ind_live[num_train : num_test + num_train]] + \
        [x_stu[i] for i in ind_stu[num_train : num_test + num_train]]
    y_te = [1.0] * len(ind_live[num_train : num_test + num_train]) + \
        [-1.0]*len(ind_stu[num_train : num_test + num_train])
    x_tr = [x_live[i] for i in ind_live[:num_train]] + \
        [x_stu[i] for i in ind_stu[:num_train]]
    y_tr = [1.0]*num_train + [-1.0]*num_train

    # dpark version
    def map_iter(i):
        y_tr_examplar = [-1.0] * len(y_tr)
        y_tr_examplar[i] = 1.0
        # opt = '-t 0 -w1 ' + str(len(y_tr)) + ' -w-1 1 -b 1 -q'
        # It is suggested in Efros' paper that:
        # C1 0.5, C2 0.01
        opt = '-t 0 -w1 0.5 -w-1 0.01 -b 1 -q'
        m = svm_train(y_tr_examplar, list(x_tr), opt)
        p_label, p_acc, p_val = svm_predict(y_te, x_te, m, '-b 1 -q')
        p_val = np.array(p_val)
        # p_val = np.delete(p_val,1,1)  # shape = (N, 1)
        p_val = p_val[:, 0]  # shape = (N, )
        return p_val

    p_vals = dpark.makeRDD(
        range(len(y_tr))
    ).map(
        map_iter
    ).collect()

    val = np.array(p_vals).T

    # for-loop version
    '''
    # Examplar SVM Training
    ensemble_model = []
    # DPark

    for i in range(len(y_tr)):
        y_tr_examplar = [-1.0] * len(y_tr)
        y_tr_examplar[i] = 1.0;
        #opt = '-t 0 -w1 ' + str(len(y_tr)) + ' -w-1 1 -b 1 -q'
        # It is suggested in Efros' paper that:
        # C1 0.5, C2 0.01
        opt = '-t 0 -w1 0.5 -w-1 0.01 -b 1 -q'
        m = svm_train(y_tr_examplar, x_tr, opt)
        ensemble_model.append(m)
        print("The %s-th examplar SVM has been trained" %i)

    # Calibaration, to be updated
    # Since we adopt the probability estimation model of LIB_SVM, Calibrating seems unnecessary

    # Ensembly Classify
    val = np.zeros((len(y_te),1))
    for m in ensemble_model:
        p_label, p_acc, p_val = svm_predict(y_te, x_te, m, '-b 1 -q')
        p_val = np.array(p_val)
        p_val = np.delete(p_val,1, 1)
        val = np.hstack((val, p_val))
    if val.shape[1] != len(y_tr) + 1:
        print "Chaos!"
    val = np.delete(val,0,1)
    print 'val.shape =', val.shape
    '''
    
    # KNN
    k = num_train / 8
    sorted_index = val.argsort(axis=1)
    sorted_index = sorted_index.T[::-1].T
    p_label = []
    for index in sorted_index:
        nearest_samples = []
        for sample_index in index[:k]:
            nearest_samples.append(y_tr[sample_index])
        n,bins,dummy = plt.hist(nearest_samples, 2, normed=1, 
                                facecolor='r', alpha=0.75)
        if n[0] > n[1]:
            p_label.append(-1.0)
        else:
            p_label.append(1.0)

    # evaluation
    rate, pos_rate, neg_rate = evaluation(y_te, p_label)

    print("The Examplar SVM framework achieves a precision of %f" % rate)
def multipule_eval_for_logistic(test_corpus_dir, feature_map_character, feature_map_numeric, feature_show, args):
    """
    liblinearのlogisticで作成したモデル(一文ごとにラベルを判断)を行う
    一回でも+1が発生すれば,文書にラベルが付与されたと見なす
    """
    env = "pine"
    if env == "pine":
        # change below by an environment
        libsvm_wrapper_path = "/home/kensuke-mi/opt/libsvm-3.17/python/"
    elif env == "local":
        libsvm_wrapper_path = "/Users/kensuke-mi/opt/libsvm-3.17/python/"
        liblinear_wrapper_path = "/Users/kensuke-mi/opt/liblinear-1.94/python/"
        sys.path.append(liblinear_wrapper_path)
    sys.path.append(libsvm_wrapper_path)
    import liblinearutil
    import svmutil

    if args.save_performance == True:
        performance_out = codecs.open("./performance_result." + args.experiment_no, "w", "utf-8")
        performance_out.write(args.experiment_no + u"\n")
        performance_out.write(u"-" * 30 + u"\n")

    # 確信度の閾値
    threshold = float(args.threshold)
    # 確信度を表示するか?オプション
    show_confidence = False
    # 確信度の平均値
    average_confidence = 0
    # +1のインスタンス数
    times_plus_1_ins = 0

    num_docs_having_motif = {}
    stop = args.stop
    tfidf_flag = args.tfidf
    exno = args.experiment_no

    model_dir_path = "../get_thompson_motif/classifier/logistic_2nd/"
    model_path_list = load_files(model_dir_path, "logistic." + exno)
    # 分類器が正しい判断をした回数を保存する.つまりCAP(gold motif tag, candidate by classifier)
    num_of_correct_decision = {}
    precision_sum = 0
    recall_sum = 0
    F_sum = 0
    h_loss_sum = 0
    subset_acc_sum = 0
    ex_p_sum = 0
    ex_r_sum = 0
    ex_f_sum = 0
    acc_sum = 0
    classifier_return_1_sum = 0
    for test_file in load_files(test_corpus_dir):
        # ============================================================
        result_map = {}
        gold_map = {}
        # ------------------------------------------------------------
        if args.persian_test == True:
            # 文ごとにインスタンスの作成
            sentences_in_document, motif_stack = file_loader_sentence(test_file, stop)
        elif args.dutch_test == True:
            sentences_in_document, motif_stack = file_loader_dutch_sentence(test_file, stop)
        # ------------------------------------------------------------
        out_libsvm_format_sentence(
            sentences_in_document, feature_map_character, feature_map_numeric, feature_show, tfidf_flag
        )
        test_y, test_x = svmutil.svm_read_problem("test.data")
        # ------------------------------------------------------------
        for model_file in model_path_list:
            decision_flag = False
            alphabet_label = unicode(os.path.basename(model_file)[0], "utf-8")
            result_map[alphabet_label] = 0

            model = liblinearutil.load_model(model_file)
            p_label, p_acc, p_val = liblinearutil.predict(test_y, test_x, model, "-b 1")

            for index, result_label in enumerate(p_label):
                if result_label == 1.0:
                    decision_flag = True
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                if decision_flag == True and p_val[index][0] > threshold:
                    result_map[alphabet_label] = 1
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # ------------------------------------------------------------
        for gold_motif in motif_stack:
            alphabet_label = gold_motif[0]
            gold_map[alphabet_label] = 1
        # ------------------------------------------------------------
        gold_cap_result = {}
        for label in result_map:
            if result_map[label] == 1 and label in gold_map:
                gold_cap_result[label] = 1
        # ------------------------------------------------------------
        try:
            average = average_confidence / times_plus_1_ins
        except ZeroDivisionError:
            average = 0

        print "-" * 30
        print "Filename:{}\nEstimated:{}\nGold:{}\nCorrect Estimation:{}".format(
            test_file, result_map, gold_map, gold_cap_result
        )
        print "average confidence is {}".format(average)
        print "-" * 30
        # ------------------------------------------------------------
        h_loss_sum = calc_h_loss(result_map, gold_map, h_loss_sum)
        subset_acc_sum = calc_subset_acc(result_map, gold_map, subset_acc_sum)
        ex_p_sum, ex_r_sum, ex_f_sum, acc_sum = calc_p_r_f(result_map, gold_map, ex_p_sum, ex_r_sum, ex_f_sum, acc_sum)
        classifier_return_1_sum += get_the_num_of_1_classifier(result_map)
    # ============================================================
    num_of_files = len(load_files(test_corpus_dir))
    h_loss = h_loss_sum / num_of_files
    subset_acc = float(subset_acc_sum) / num_of_files
    ex_p = ex_p_sum / num_of_files
    ex_r = ex_r_sum / num_of_files
    ex_f = ex_f_sum / num_of_files
    acc = acc_sum / num_of_files
    classifier_return_1 = float(classifier_return_1_sum) / num_of_files
    precision_ave = precision_sum / len(load_files(test_corpus_dir))
    recall_ave = recall_sum / len(load_files(test_corpus_dir))
    F_ave = F_sum / len(load_files(test_corpus_dir))
    print "-" * 30
    print "RESULT for {} files classification".format(len(load_files(test_corpus_dir)))

    hamming_format = u"Hamming Loss:{}".format(h_loss)
    subset_format = u"Subset Accuracy(classification accuracy):{}".format(subset_acc)
    else_format = u"example-based precision:{} example-based recall:{} example-based F:{} accuracy:{}".format(
        ex_p, ex_r, ex_f, acc
    )
    classifier_format = u"Ave. number of classifier which returns 1:{}".format(classifier_return_1)
    print hamming_format
    print subset_format
    print else_format
    print classifier_format

    if args.save_performance == True:
        performance_out.write(hamming_format + u"\n")
        performance_out.write(subset_format + u"\n")
        performance_out.write(else_format + u"\n")
        performance_out.write(classifier_format + u"\n")
        performance_out.close()
        
        p_label_train, p_acc_train, p_val_train = svmutil.svm_predict(y[:i], x[:i], m)
        p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y[i:], x[i:], m)
        print p_acc_train[0], "\t", p_acc_validation[0], "\n"

        training_examples.append(i)
        train_accuracy.append(p_acc_train[0])
        validation_accuracy.append(p_acc_validation[0])

    return training_examples, train_accuracy, validation_accuracy

def get_cross_val(x, y, x_val, y_val, gamma_c):
    prob  = svmutil.svm_problem(y, x)
    param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma))
    m = svmutil.svm_train(prob, param)

    svmutil.svm_save_model("model", m)

    p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m)

    return p_acc_validation[0]


if __name__ == '__main__':
    y, x = svmutil.svm_read_problem("char_recon_shuffled.db")
    gamma = 1.0 / (2.0 * (3.0 ** 7) ** 2)
    C = 3.0 ** 3.0
    prob  = svmutil.svm_problem(y, x)
    param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(C, gamma))
    m = svmutil.svm_train(prob, param)
    svmutil.svm_save_model("model", m)
                                             thresh=K_THRESH)

    with open(DATASETPATH + CODEBOOK_FILE, 'wb') as f:

        dump(codebook, f, protocol=HIGHEST_PROTOCOL)

    print "---------------------"
    print "## compute the visual words histograms for each image"
    all_word_histgrams = {}
    for imagefname in all_features:
        word_histgram = computeHistograms(codebook, all_features[imagefname])
        all_word_histgrams[imagefname] = word_histgram

    print "---------------------"
    print "## write the histograms to file to pass it to the svm"
    writeHistogramsToFile(nclusters,all_files_labels,all_files,all_word_histgrams,DATASETPATH + HISTOGRAMS_FILE)

    print "---------------------"
    print "## train svm"
    y,x=svmutil.svm_read_problem(DATASETPATH + HISTOGRAMS_FILE)
    model_file=svmutil.svm_train(y,x)
    svmutil.svm_save_model('trainingdata.svm.model', model_file)

    print "--------------------"
    print "## outputting results"
    print "codebook file: " + DATASETPATH + CODEBOOK_FILE
    print "category      ==>  label"
    for cat in cat_label:
        print '{0:13} ==> {1:6d}'.format(cat, cat_label[cat])
    dump(cat_label, open("cat.txt", "wb" ))
#coding: utf-8
import svmutil
import outputLIBSVMformat

train_label, train_data = svmutil.svm_read_problem("./train_libsvmFormat.txt")
#カーネル関数は線型
model = svmutil.svm_train(train_label, train_data, "-t 0")

test_label, test_data = svmutil.svm_read_problem("./test_libsvmFormat.txt")
p_label, p_acc, p_val = svmutil.svm_predict(test_label, test_data, model)