Beispiel #1
0
 def __init__(self, train_feature_file=TRAIN_FEATURE_FILE):
     if os.path.exists(SAVED_MODEL):
         self.model = svmutil.svm_load_model(SAVED_MODEL)
     else:
         y, x = svmutil.svm_read_problem(train_feature_file)
         self.model = svmutil.svm_train(y, x, '-c 4')
         svmutil.svm_save_model(SAVED_MODEL, self.model)
Beispiel #2
0
def main(path, k):
	
	prabs = []
	lns = []
	for kk in range(0,k-1):
		testLabel = []
		trainPoint = []
		trainLabel = []
		testPoint = []
		wcCount = 0
		for u in os.listdir(path): 
			if u[-2:] == 'WC':r
				wcCount += 1
				filePath = path+u
				WC = pickle.load(open(filePath, 'rb'))
				if wcCount % k == 0 + kk:
					testLabel.append(int(u[1]))
					testPoint.append(WC)
					
				else:
					trainLabel.append(int(u[1]))
					trainPoint.append(WC)

		lns.append(len(testLabel))
		prob = svmutil.svm_problem(trainLabel, trainPoint)
		param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q')


		m = svmutil.svm_train(prob, param)
		svmutil.svm_save_model('n.model', m)
		p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1')
		prabs.append(p_acc[0])
Beispiel #3
0
def kfold(data, labels, k):
	try:
		import svmutil
	except:
		return 0
	prabs = []

	for xxx in range(0, 10):
		picks = np.random.choice(len(data), len(data) / k, replace=False)
		testLabel = labels[picks]
		testPoint = data[picks]
		trainPoint = data[np.setdiff1d(range(0, len(data)), picks)]
		trainLabel = labels[np.setdiff1d(range(0, len(data)), picks)]

		trainLabel = trainLabel.tolist()
		trainPoint = trainPoint.tolist()

		prob = svmutil.svm_problem(trainLabel, trainPoint)
		param = svmutil.svm_parameter('-t 3 -c 4 -b 1 -q')
		testLabel = testLabel.tolist()
		testPoint = testPoint.tolist()

		m = svmutil.svm_train(prob, param)
		svmutil.svm_save_model('n.model', m)

		p_label, p_acc, p_val = svmutil.svm_predict(testLabel, testPoint, m, '-b 1')

		prabs.append(p_acc[0])

	print sum(prabs) / float(len(prabs))
	print 'std' + str(np.std(prabs))
	return sum(prabs) / float(len(prabs))
Beispiel #4
0
 def train_list(self):
     print('Start to train.')
     paras = '-c 4 -t 0 -h 0 -m 1024'
     self.y, self.x = self.load_data_list(train=True)
     self.model = svmutil.svm_train(self.y, self.x, paras)
     svmutil.svm_save_model('./news/svmmodel', self.model)
     print('Train finished.')
Beispiel #5
0
 def __init__(self,train_feature_file = TRAIN_FEATURE_FILE):
     if os.path.exists(SAVED_MODEL):
         self.model = svmutil.svm_load_model(SAVED_MODEL)
     else:
         y, x = svmutil.svm_read_problem(train_feature_file)
         self.model = svmutil.svm_train(y, x, '-c 4')
         svmutil.svm_save_model(SAVED_MODEL,self.model)
Beispiel #6
0
def Train_SVM_model(PathToFeatureFile):#生成训练模型文件,model.txt
    #print(PathToFeatureFile)
    y,x=svmutil.svm_read_problem(PathToFeatureFile)
    model=svmutil.svm_train(y,x)
    modelFilePath=os.path.join(os.path.split(PathToFeatureFile)[0],f"model_{PathToFeatureFile.split('_',1)[1]}")
    svmutil.svm_save_model(modelFilePath,model)
    print(modelFilePath)
Beispiel #7
0
def trainSVMAndSave(modelLoc, kernel, labels):
    if os.path.exists(modelLoc):
        return svm_load_model(modelLoc)
    else:
        model = trainSVM(kernel, labels)
        svm_save_model(modelLoc, model)
        return model
Beispiel #8
0
def write_compact_nonlinear_svm(
    file_compact_svm, target_class, file_svm_model, svm_model=None, file_SVs=None, SVs=None, str_kernel=None
):
    """
    Writes a textfile with all the necessary file locations for (nonlinear) libSVM agent
    All the component files of 'file_compact_svm' will be written in the same directory

    @param file_compact_svm: file to be written with all the information below
    @param target_class: integer target class, e.g., 0 or 30.
    @param file_svm_model: filename to the compact svm model to be written
    @param file_SVs: filename to the support vectors (only applicable if nonlinear SVM)
    @param str_kernel: string of kernel function to be used (e.g., kernels.ngd etc)
    @param svm_model: actual svm_model from get_compact_nonlinear_svm, which will be saved at file_svm_model (if not already)
    @param SVs: actual support vectors in numpy format to be saved (if not already), generated by get_compact_linear_svm
    @return: 1 if success
    """

    dir_compact = os.path.dirname(file_compact_svm)

    if svm_model:
        svmutil.svm_save_model(os.path.join(dir_compact, file_svm_model), svm_model)
    if SVs is not None:
        np.save(os.path.join(dir_compact, file_SVs), SVs)

    with open(file_compact_svm, "wb") as fin:
        fin.write("file_svm_model=%s\n" % file_svm_model)
        fin.write("target_class=%d\n" % target_class)
        if file_SVs:
            fin.write("file_SVs=%s\n" % file_SVs)
        if str_kernel:
            fin.write("str_kernel=%s\n" % str_kernel)
        fin.flush()
 def getmodelandaccuary(line):
     """
     训练模型,预测结果
     :param line: hdfs上的要读取的features目录的目录
     :return: 准确率
     """
     train_y = []
     train_x = []
     test_y = []
     test_x = []
     for i in range(0, len(line) - 1):
         y, x = svmutil.svm_read_problem(line[i])
         train_y.extend(y[0:90])
         train_x.extend(x[0:90])
         test_y.extend(y[90:100])
         test_x.extend(x[90:100])
     train_random_index = [i for i in range(len(train_y))]
     test_random_index = [i for i in range(len(test_y))]
     random.shuffle(train_random_index)
     random.shuffle(test_random_index)
     random_train_y = [train_y[x] for x in train_random_index]
     random_train_x = [train_x[x] for x in train_random_index]
     random_test_y = [test_y[x] for x in test_random_index]
     random_test_x = [test_x[x] for x in test_random_index]
     m = svmutil.svm_train(random_train_y, random_train_x,
                           "-s 0 -t 2 -c 32 -g 8 -b 1")
     predict_label, accuary, prob_estimates = svmutil.svm_predict(
         random_test_y, random_test_x, m, '-b 1')
     svmutil.svm_save_model(
         '/home/sunbite/Co_KNN_SVM_TMP/CoKNNSVM2.model', m)
     return accuary
Beispiel #10
0
    def train_svm(self, C, gamma, model_output):

        labels = self.labels
        features = self.features

        model = lib.svm_train(
            labels, features, "-h 0 -t 2 -c %f -g %f" % (2**C, 2**gamma))
        lib.svm_save_model(model_output, model)
Beispiel #11
0
 def saveModels(self, Dir):
     print "Saving models..."
     if not os.path.isdir(Dir):
         os.mkdir(Dir)
     for index in xrange(self.inputDim):
         filename = Dir + "/model_" + str(index)
         svmutil.svm_save_model(filename, self.models[index])
     print "Saving models...Done."
Beispiel #12
0
 def _train(self):
     """
     训练one_class_svm模型,每隔self._duration时间划分一次数据,然后生成feature
     :return:
     """
     data = self._load_data()
     features = [self._get_feature(data_i) for data_i in data]
     model = svmutil.svm_train([1] * len(data), self._get_svm_format_data(features), self.model_parameter)
     svmutil.svm_save_model(os.path.join(MODEL_DIR, self.model_name), model)
Beispiel #13
0
 def write_model(self,filename):
     """ store a model and the names """
     if(self.model!=None):
         su.svm_save_model(filename,self.model)
         m=re.search("(.*)\.(svm)",filename)   # write the names to npy
         basename=m.group(1)
         np.save(basename,self.names)
         return True
     return False
Beispiel #14
0
def TrainWordClassifier(alpha, temp_char_dir='cache/chars'):
    img_dir = settings.img_train_dir
    lex_dir = settings.lex0_train_dir
    gt_dir = settings.img_train_gt_dir

    num_procs=settings.n_procs
    
    temp_word_dir = 'cache/words'

    WordDetectorBatch(img_dir, temp_char_dir, temp_word_dir,
                      alpha, settings.max_locations, settings.overlap_thr,
                      num_procs, lex_dir, apply_word_nms=False)

    eval_results=EvaluateWordDetection(gt_dir, temp_word_dir, img_dir=img_dir,
                                       create_visualization=False)
    gt_results = eval_results[0]
    dt_results = eval_results[1]
    precision_before = eval_results[2]
    recall_before = eval_results[3]
    thrs = eval_results[4]

    Y = []
    X_list = []
    n_features = -1
    for dt_result1 in dt_results:
        for dt_item in dt_result1:
            word_score = dt_item[3]
            char_bbs = dt_item[4]
            features = ComputeWordFeatures(char_bbs, word_score)
            if n_features < 0:
                n_features = len(features)
            X_list.append(features)
            if dt_item[1]:
                Y.append(1)
            else:
                Y.append(-1)

    assert n_features > 0
    
    # scale features
    X_mat = np.vstack(X_list)
    min_vals = np.min(X_mat,axis=0)
    X_mat = X_mat - min_vals
    max_vals = np.max(X_mat,axis=0)    
    X_mat = X_mat / max_vals
    alpha_min_max = (alpha, min_vals, max_vals)
    with open(settings.word_clf_meta_name,'wb') as fid:
        cPickle.dump(alpha_min_max,fid)

    X = [dict(zip(range(n_features), x_i)) for x_i in X_mat.tolist()]

    svm_model = TrainSvmLinear2(Y, X)
    svm.svm_save_model(settings.word_clf_name, svm_model)

    svm_model_poly = TrainSvmPoly2(Y, X)
    svm.svm_save_model(settings.word_clf_poly_name, svm_model_poly)
def get_cross_val(x, y, x_val, y_val, gamma_c):
    prob  = svmutil.svm_problem(y, x)
    param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma))
    m = svmutil.svm_train(prob, param)

    svmutil.svm_save_model("model", m)

    p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m)

    return p_acc_validation[0]
Beispiel #16
0
    def get_svm_bytes(self):
        svm_model = self.iqr_session.rel_index.get_model()
        tmp_file_name = "tmp_svm.model"

        svmutil.svm_save_model(tmp_file_name.encode(), svm_model)
        with open(tmp_file_name, "rb") as f:
            model_file = f.read()
            b = bytearray(model_file)
        os.remove(tmp_file_name)
        return b
Beispiel #17
0
    def get_svm_bytes(self):
        svm_model = self.iqr_session.rel_index.get_model()
        tmp_file_name = "tmp_svm.model"

        svmutil.svm_save_model(tmp_file_name, svm_model)
        with open(tmp_file_name, "rb") as f:
            model_file = f.read()
            b = bytearray(model_file)
        os.remove(tmp_file_name)
        return b
Beispiel #18
0
def iqr_model_train(filepath_model,
                    matrix_kernel_train, labels_train, idx2clipid,
                    svm_para = '-w1 50 -t 4 -b 1 -c 1'):
    """
    Light-weighted SVM learning module for online IQR

    @param filepath_model: a full path to save the learned SVM model
    @param matrix_kernel_train: n-by-n square numpy array with kernel values between training data
    @param labels_train: row-wise labels of training data (1 or True indicates positive, 0 or False otherwise
    @param idx2clipid: idx2clipid(row_idx) returns the clipid for the 0-base row in matrix
    @param svm_para: (optional) SVM learning parameter
    @rtype: dictionary with 'clipids_SV': list of clipids for support vectors
    @return: output as a dictionary with 'clipids_SV'
    """

    # set training inputs
    matrix_kernel_train = np.vstack((np.arange(1, len(matrix_kernel_train)+1), matrix_kernel_train)).T
    print "Done matrix_kernel_train"
    problem = svm.svm_problem(labels_train.tolist(), matrix_kernel_train.tolist(), isKernel=True)
    print "Done problem"
    svm_param = svm.svm_parameter(svm_para)
    print "Done svm_param"

    # train model
    model = svmutil.svm_train(problem, svm_param)
    print "Done train model"

    # release memory
    del problem
    del svm_param
    print "Done release memory"

    # check learning failure
    if model.l == 0:
        raise Exception('svm model learning failure')

    n_SVs = model.l
    clipids_SVs = []
    idxs_train_SVs = svmtools.get_SV_idxs_nonlinear_svm(model)
    for i in range(n_SVs):
        _idx_1base = idxs_train_SVs[i]
        _idx_0base = _idx_1base - 1
        clipids_SVs.append(idx2clipid[_idx_0base])
        model.SV[i][0].value = i+1 # within SVM model, index needs to be 1-base

    print "Done checking learning failure"

    svmutil.svm_save_model(filepath_model, model)

    output = dict()
    output['model'] = model
    output['clipids_SVs'] = clipids_SVs

    return output
Beispiel #19
0
def learn_compact_nonlinear_svm(file_libsvm_model0,
                                file_SVs,
                                file_libsvm_model1,
                                file_svm_compact,
                                str_kernel, options_train,
                                target_class,
                                labels,
                                data, file_data,
                                kernel_matrix, file_kernel_matrix, kernel_matrix_type, flag_kernel_compute,
                                splits, func_sort, logfile):
    """
    @param file_libsvm_model0: file path for the leanred SVM model to be saved in libsvm format
    @param file_SVs: filename of support vectors to be saved in numpy format
    @param file_libsvm_model1: file path for compact SVM, still stored in libsvm format
    @param file_svm_compact: file path to the full compact svm model to be written (with many other info)
    @param str_kernel: string of kernel function to be used (e.g., kernels.ngd etc)
    @param options_train: list of lisbsvm training strings to be tried, e.g., ['-b 1 -c 1','-b 1 -c 1000']
    @param options_test: libsvm test option string to be used during cross-validation, e.g., '-b 1'
    @param target_class: target positive class
    @param labels: ground truth labels in integers.
                   Positive integers for event kit positives, Negatives for event kit negs, zero for None.
    @param data: training data, numpy row-wise matrix. If None and kernel_matrix does not exist, then, read from file_data
    @param file_data: file path to the input training 'data'. If data is None, then read from this file
    @param kernel_matrix: kernel matrix
    @param file_kernel_matrix: if kernel matrix is None, and this path is not, then, loaded from this file.
                               if flag_kernel_compute==True, then, computed kernel is saved to this file.
    @param kernel_matrix_type: 'numpy' (square numpy matrix) or 'libsvm' (2dim list-type ready for libsvm)
    @param flag_kernel_compute: if True, re-compute kernel matrix
    @param splits: integer-based splits in numpy vector, e.g., [1 1 2 2 3 3] for 6 data in three splits
    @param file_scores: if not None, save the scores generated by SVM during cross validation
    @param func_error: func_error(labels, scores, target_label) outputs error to sort svm parameters
    @param logfile: log file where info will be written, e.g., the pairs of options_train & func_error outputs
    """

    _log = None
    if logfile:
        _log = open(logfile, 'wb')

    # Learn nonlinear SVM model & save this initial model (before compactization) in libsvm format
    model0 = None
    # add training code with full data training
    svmutil.svm_save_model(file_libsvm_model0, model0)
    _log.write('Saved initial nonlinear SVM (model0) at: %s\n'%file_libsvm_model0)

    # computed compact SVM model 'model1'
    (model1, SVs) = get_compact_nonlinear_svm(model0, data)

    # write compact SVM model, with all the required information
    write_compact_nonlinear_svm(file_svm_compact, target_class,
                                file_libsvm_model1, svm_model=model1,
                                file_SVs=file_SVs, SVs=SVs,
                                str_kernel=str_kernel)
    _log.write('Saved compact nonlinear SVM at: %s\n'%file_svm_compact)
    _log.close()
Beispiel #20
0
def run(history):
    #
    w, x, y = history.grab(nsample, ntracks, 0)
    timer = timer_c()
    lsvm = svmutil.svm_train(y, x, options)
    svmutil.svm_save_model('temp.svm', lsvm)
    print 'svm trained successfully in %s sec.' % (str(float('{0:.3f}'.format(timer.lag()))))
    #
    timer = timer_c()
    p_labels, p_acc, p_vals = svmutil.svm_predict(y, x, lsvm, '')
    print 'svm predicted successfully in %s sec.' % (str(float('{0:.3f}'.format(timer.lag()))))
    plotdraw(p_labels, y)
Beispiel #21
0
def train_grasp(grasp_type, side):
    """
    train_grasp(grasp_type): 
    train linear svm classifier for specific grasp type\n
    grasp_type: hand grasping type\n
    side: left hand or right hand\n
    """
    #train
    datafile = "model/traindata_grasp_" + grasp_type + "_" + side
    if not os.path.isfile(datafile):
        srcfile = "data/feature_grasp_train.csv"
        write_svmdata_grasp(srcfile, datafile, grasp_type, side, 0)
    label_train, data_train = svmutil.svm_read_problem(datafile)
    modelfile = "model/model_grasp_" + grasp_type + "_" + side
    m = []
    if not os.path.isfile(modelfile):
        print("train model: " + grasp_type + "_" + side)
        label_weight = {}
        for v in label_train:
            if label_weight.has_key(v):
                label_weight[v] += 1
            else:
                label_weight[v] = 1
        sorted_label = sorted(label_weight)
        param_weight = ' '
        for v in sorted_label:
            label_weight[v] = float(
                len(label_train)) / len(sorted_label) / label_weight[v]
            param_weight += '-w%d %f ' % (v, label_weight[v])
        prob = svmutil.svm_problem(label_train, data_train)
        param = svmutil.svm_parameter('-t 0 -b 1 -q' + param_weight)
        print '-t 0 -b 1 -q' + param_weight
        #        param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q')
        m = svmutil.svm_train(prob, param)
        svmutil.svm_save_model(modelfile, m)
    else:
        print("load model: " + grasp_type + "_" + side)
        m = svmutil.svm_load_model(modelfile)
    #test
    grasp_info = read_info("data/feature_grasp_test.csv", side)
    datafile = "model/testdata_grasp_" + grasp_type + "_" + side
    if not os.path.isfile(datafile):
        srcfile = "data/feature_grasp_test.csv"
        write_svmdata_grasp(srcfile, datafile, grasp_type, side, 1)
    label_test, data_test = svmutil.svm_read_problem(datafile)
    p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m,
                                                '-b 1')
    f_result = open("result/grasp_" + grasp_type + "_" + side + ".csv", "w")
    for i in range(len(p_label)):
        f_result.write(grasp_info[i] + ", " + str(int(label_test[i])) + ", " +
                       str(int(p_label[i])) + ", ")
        f_result.write("[%.4f]\n" % p_val[i][0])
    f_result.close()
Beispiel #22
0
def learn_compact_nonlinear_svm(
        file_libsvm_model0, file_SVs, file_libsvm_model1, file_svm_compact,
        str_kernel, options_train, target_class, labels, data, file_data,
        kernel_matrix, file_kernel_matrix, kernel_matrix_type,
        flag_kernel_compute, splits, func_sort, logfile):
    """
    @param file_libsvm_model0: file path for the leanred SVM model to be saved in libsvm format
    @param file_SVs: filename of support vectors to be saved in numpy format
    @param file_libsvm_model1: file path for compact SVM, still stored in libsvm format
    @param file_svm_compact: file path to the full compact svm model to be written (with many other info)
    @param str_kernel: string of kernel function to be used (e.g., kernels.ngd etc)
    @param options_train: list of lisbsvm training strings to be tried, e.g., ['-b 1 -c 1','-b 1 -c 1000']
    @param options_test: libsvm test option string to be used during cross-validation, e.g., '-b 1'
    @param target_class: target positive class
    @param labels: ground truth labels in integers.
                   Positive integers for event kit positives, Negatives for event kit negs, zero for None.
    @param data: training data, numpy row-wise matrix. If None and kernel_matrix does not exist, then, read from file_data
    @param file_data: file path to the input training 'data'. If data is None, then read from this file
    @param kernel_matrix: kernel matrix
    @param file_kernel_matrix: if kernel matrix is None, and this path is not, then, loaded from this file.
                               if flag_kernel_compute==True, then, computed kernel is saved to this file.
    @param kernel_matrix_type: 'numpy' (square numpy matrix) or 'libsvm' (2dim list-type ready for libsvm)
    @param flag_kernel_compute: if True, re-compute kernel matrix
    @param splits: integer-based splits in numpy vector, e.g., [1 1 2 2 3 3] for 6 data in three splits
    @param file_scores: if not None, save the scores generated by SVM during cross validation
    @param func_error: func_error(labels, scores, target_label) outputs error to sort svm parameters
    @param logfile: log file where info will be written, e.g., the pairs of options_train & func_error outputs
    """

    _log = None
    if logfile:
        _log = open(logfile, 'wb')

    # Learn nonlinear SVM model & save this initial model (before compactization) in libsvm format
    model0 = None
    # add training code with full data training
    svmutil.svm_save_model(file_libsvm_model0, model0)
    _log.write('Saved initial nonlinear SVM (model0) at: %s\n' %
               file_libsvm_model0)

    # computed compact SVM model 'model1'
    (model1, SVs) = get_compact_nonlinear_svm(model0, data)

    # write compact SVM model, with all the required information
    write_compact_nonlinear_svm(file_svm_compact,
                                target_class,
                                file_libsvm_model1,
                                svm_model=model1,
                                file_SVs=file_SVs,
                                SVs=SVs,
                                str_kernel=str_kernel)
    _log.write('Saved compact nonlinear SVM at: %s\n' % file_svm_compact)
    _log.close()
Beispiel #23
0
	def trainmodel(self,train,cv,test,modelsavepath):
		y,x = svmutil.svm_read_problem(train)#读入训练数据
		# ycv,xcv = svm_read_problem(cv)#读入验证集
		# ytest,xtest=svm_read_problem(test)#读入测试集
		prob  = svm.svm_problem(y, x)
		param = svm.svm_parameter('-t 2 -c 0.5 -g 0.125 -b 1')		
		model = svmutil.svm_train(prob, param)				
		yt,xt = svmutil.svm_read_problem(train)#???????????
		p_labs, p_acc, p_vals = svmutil.svm_predict(yt, xt, model,'-b 1')
		svmutil.svm_save_model(modelsavepath, model)#save model
		# model = svmutil.svm_load_model('model_file')#读取model
		pass
Beispiel #24
0
    def train(self, document):
        """
        Train the SVM model

        :param document: document object
        """
        targs, tkwargs = svm_datas.svm_opts() # model training arguments
        self._vocab = self.generate_vocab_dict(document)
        y, x  =  self.fit(document, keep_on_doc=True)
        m = svmutil.svm_train(y, x, *targs)
        svmutil.svm_save_model(self.model_file, m)
        generatePickle(self._vocab, self.vocab_file)
Beispiel #25
0
def train_model_main2(model_path,file):
	# LibSVM
	# 按照 libSVM 指定的格式生成一组带特征值和标记值的向量文件
	svm_path = r"C:\Python36\risk_down\libsvm"
	sys.path.append(svm_path + r"\python")
	import svmutil
	# y, x = svmutil.svm_read_problem('./' + str(n) + '_feature.txt')
	y, x = svmutil.svm_read_problem(file)
	# ---------------------------------------------------
	model = svmutil.svm_train(y[:50], x[:50], '-c 4')
	# print(model_path,file.split('/')[2][0])
	model_path = './'+model_path+'/'+file.split('/')[2][0]+"_feature.model"
	svmutil.svm_save_model(model_path, model)
Beispiel #26
0
def example_make_model(img_kind, svm_params):
	subdir = "data/"
	problem = build_problem(img_kind)
	print "Prob built"

	param = svm.svm_parameter(svm_params)
	print "Params Set"

	problem_model = svmutil.svm_train(problem, param)
	print "Model built"

	svmutil.svm_save_model(subdir + img_kind + '.model', problem_model)
	print "Done"
def train():
    print("Starting train process.")
    '''
    for line in open(__FEATURE_FILE):
        line = line.split(None, 1)
        if len(line) == 1: line += ['']
        print(line)
    '''
    y, x = svmutil.svm_read_problem(__FEATURE_FILE)
    model = svmutil.svm_train(y, x)
    svmutil.svm_save_model(Function.MODEL_FILE, model)
    print("train process done.")
    return
def train(train_file, fold_num, mapping=None, parameters={'-c': 1}, multilabel=None, output_folder=None):
	'''
	Given a training instance file and (optionally) a label mapping, adapt the
	training vectors to fit the mapping and build an SVM model.

	'''
	global classifier
	if not output_folder:
		output_folder = 'models'
	output_folder = os.path.join(output_folder, 'fold-{0:02d}'.format(fold_num+1))
	if not os.path.exists(output_folder):
		os.makedirs(output_folder)

	if multilabel:
		temp_labels, instances = alt_read_problem(train_file)
		temp_labels = [[mapping[l] for l in label] for label in temp_labels]

		labels = []
		for temp_labs in temp_labels:
			if multilabel[1] in temp_labs:
				labels.append(multilabel[1])
			else:
				assert len(set(temp_labs)) == 1, "Something appears to be wrong with the intermediate mapping. There's still more than one label present for an instance: {0}".format(temp_labs)
				labels.append([l for l in temp_labs if l != multilabel[1]][0])
	else:
		labels, instances = svm_read_problem(train_file)
	labels = reMap(labels, mapping)

	# Exclude instances which have 0 as their label
	labels, instances = zip(*[(label, instance) for label, instance in zip(labels, instances) if label != 0])

	distribution = {}
	for label in set(labels):
		distribution[label] = float(labels.count(label))/len(labels)

	paramstring = ''
	for param, value in parameters.items():
		paramstring += ' {0} {1}'.format(param, value)
	if classifier == 'libsvm' and '-b' not in parameters.keys():
		paramstring += ' -b 1'
	paramstring += ' -q'

	if multilabel:
		model_file = os.path.join(output_folder, os.path.basename(train_file) + '.{0}.model'.format(multilabel[0]))
	else:
		model_file = os.path.join(output_folder, os.path.basename(train_file) + '.model')
	print '---training'
	model = svm_train(labels, instances, paramstring)
	svm_save_model(model_file, model)

	return model_file, distribution
Beispiel #29
0
	def create_model(self,datasets,opt,opp,part_ids = None):
		# Should groups and ngroups be idch ?
		if part_ids is None:
			part_ids = datasets.pids
		ptrn,itrain = train2svm_prob(datasets.mkTrain(part_ids))
		print "create model ..."
		#opt = svm.svm_parameter(opt)
		model = svmutil.svm_train(ptrn,opt)
		# create saving direcotry
		#self._mkdir(cnt)
		# create log files
		#self._save_log(itest,plbl,pval,cnt)
		model_name = "%s/model/%s.model" % (self._dir,self._name)
		svmutil.svm_save_model(model_name, model)
Beispiel #30
0
 def create_model(self, datasets, opt, opp, part_ids=None):
     # Should groups and ngroups be idch ?
     if part_ids is None:
         part_ids = datasets.pids
     ptrn, itrain = train2svm_prob(datasets.mkTrain(part_ids))
     print "create model ..."
     #opt = svm.svm_parameter(opt)
     model = svmutil.svm_train(ptrn, opt)
     # create saving direcotry
     #self._mkdir(cnt)
     # create log files
     #self._save_log(itest,plbl,pval,cnt)
     model_name = "%s/model/%s.model" % (self._dir, self._name)
     svmutil.svm_save_model(model_name, model)
def main():
    carpeta_pedestres = '../pedestres/'
    carpeta_no_pedestres = '../no_pedestres/'
    carpeta_no_pedestres = '../floresRecortadas/'
    carpeta_salida = '../salidas/'
    archivo_modelo_svm = '../modelo_svm.model'
    num_positivas = 600
    num_negativas = 600
    k_val = 10
    print('leyendo imagenes')
    imagenes_positivas = obtener_imagenes(carpeta_pedestres)
    imagenes_negativas = obtener_imagenes(carpeta_no_pedestres)
    imagenes_positivas = imagenes_positivas[:num_positivas]
    imagenes_negativas = imagenes_negativas[:num_negativas]
    print('obteniendo_descriptores')
    descriptores_positivos = obtener_descriptores(imagenes_positivas)
    descriptores_negativos = obtener_descriptores(imagenes_negativas)
    imagenes = imagenes_positivas + imagenes_negativas
    entradas = descriptores_positivos + descriptores_negativos
    salidas  = [1]*len(descriptores_positivos) + [0]*len(descriptores_negativos)
    print('intentando cargar modelo desde archivo')
    modelo_svm = svmutil.svm_load_model(archivo_modelo_svm)
    if modelo_svm is None:
        print('ejecutando cross validation')
        (promedio, desviacion_estandar, modelo_svm) =\
                cross_validation(entradas, salidas, k_val)
        print('Promedio:', promedio)
        print('Desviación estándar:', desviacion_estandar)
        print('Guardando modelo en archivo')
        svmutil.svm_save_model(archivo_modelo_svm, modelo_svm)
    #Aquí tengo las pruebas de imagenes
    print('Clasificando imagenes')
    for i in range(len(imagenes)):
        #pos = random.randint(0, len(imagenes))
        pos = i
        imagen = imagenes[pos].copy()
        entrada = entradas[pos]
        salida_esperada = salidas[pos]
        salida = clasificar_imagen(modelo_svm, entrada, salida_esperada)
        val_pixel = [0,255,0] if salida == salida_esperada else [0,0,255]
        for j in range(len(imagen)-5, len(imagen)):
            for k in range(len(imagen[j])):
                imagen[j][k][:] = val_pixel;
        if salida != salida_esperada:
            nombre_archivo = carpeta_salida + str(i+1).zfill(3) + '.jpg'
            cv2.imwrite(nombre_archivo, imagen)
#        cv2.imshow('imagen', imagen)
#        cv2.waitKey()
    sys.exit(0)
Beispiel #32
0
def g_value_fun(g):
    fmodel = 'c_{c:.2f}_g_{g:.3f}.model'.format(c=0.25, g=g)
    if os.path.exists(fmodel):
        print("model %s found, loading ..." % fmodel)
        m = svmutil.svm_load_model(fmodel)
        print("model %s loaded" % fmodel)
    else:
        print("model %s not found, training ..." % fmodel)
        opt_str = '-c {c} -t 2 -g {g} -b 1'.format(c=0.25, g=g)
        m = svmutil.svm_train(y_svm, x_svm, opt_str)
        print("saving model %s ..." % fmodel)
        svmutil.svm_save_model(fmodel, m)
    clsfr = classifier.SVM_Classifier(m)
    roc_curve = clsfr.CalROC(x_svm_test, y_svm_test)
    auc = clsfr.CalAUC()
    return [m, [], roc_curve, auc]
Beispiel #33
0
def train_manipulation(mnp_type):
    """  
    """
    #train
    datafile = "manipulate/model/traindata_mnp_"+mnp_type
    if not os.path.isfile(datafile):
        srcfile = "manipulate/data/feature_mnp_train.csv"
        write_svmdata_mnp(srcfile, datafile, mnp_type, 0)    
    label_train,data_train = svmutil.svm_read_problem(datafile)
    modelfile = "manipulate/model/model_mnp_"+mnp_type
    m = []
    if not os.path.isfile(modelfile):
        print("train model: " + mnp_type)
        label_weight = {}
        for v in label_train:
            if label_weight.has_key(v):
                label_weight[v]+=1
            else:
                label_weight[v]=1
        sorted_label = sorted(label_weight)
        param_weight = ' '
        for v in sorted_label:
            label_weight[v] = float(len(label_train))/len(sorted_label)/label_weight[v]
            param_weight += '-w%d %f ' % (v, label_weight[v])
        prob = svmutil.svm_problem(label_train, data_train)        
        param = svmutil.svm_parameter('-t 0 -b 1 -q'+param_weight)
        print '-t 0 -b 1 -q'+param_weight
        m = svmutil.svm_train(prob, param)        
        svmutil.svm_save_model(modelfile, m)
    else:
        print("load model: " + mnp_type)
        m = svmutil.svm_load_model(modelfile)
#    weight = read_model_linearSVM(modelfile, len(data_train[0]))
#    print weight
    #test    
    mnp_info = read_info("manipulate/data/feature_mnp_test.csv")
    datafile = "manipulate/model/testdata_mnp_"+mnp_type
    if not os.path.isfile(datafile):
        srcfile = "manipulate/data/feature_mnp_test.csv"
        write_svmdata_mnp(srcfile, datafile, mnp_type, 1)    
    label_test,data_test = svmutil.svm_read_problem(datafile)
    p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1')
    f_result = open("manipulate/result/mnp_" + mnp_type + ".csv", "w")
    for i in range(len(p_label)):
        f_result.write(mnp_info[i]+", "+str(int(label_test[i]))+", "+str(int(p_label[i]))+", ")
        f_result.write("[%.4f]\n" % p_val[i][0])
    f_result.close()
Beispiel #34
0
def main():
    timer = timer_c()
    res = numpy.array([], dtype='i')
    mat = numpy.empty((0, len(key) * ntracks), dtype='f')
    with open('../data/dbf/2017.dbf', 'r') as dbf:
        history = json.load(dbf)
        seven = history['layout'].index('PCHG')
        print 'seven at %d.' % (seven)
        first = None
        for code, table in history['record'].iteritems():
            if len(table) < 180:
                continue
            if first is None or first < table[89][0]:
                first = table[89][0]
        for code, table in history['record'].iteritems():
            # skip the stock has less than 180 rows of record
            if len(table) < 180:
                continue
            # skip the stock has suspended in 90 trading days
            if table[89][0] != first:
                continue
            # form mat & res
            for i in xrange(nsample):
                # mat++
                row = []
                for r in xrange(i + 1, i + ntracks + 1):
                    for offset in key:
                        row.append(table[r][offset])
                mat = numpy.append(mat, numpy.array([row], dtype='f'), axis=0)
                # res++
                hg = table[i][seven] + 10.0
                hg = min(hg, 19.99)
                hg = max(hg, 0.0)
                res = numpy.append(res, int(hg / (20.0 / nlabels)))
    print 'mat & res loaded successfully in %s sec.' % (str(
        float('{0:.3f}'.format(timer.lag()))))
    #
    timer.reset()
    lsvm = svmutil.svm_train(res.tolist(), mat.tolist(),
                             '-s 0 -t 0 -g 1.00 -c 1000000.00 -b 0 -q')
    svmutil.svm_save_model('temp.svm', lsvm)
    print 'svm trained successfully in %s sec.' % (str(
        float('{0:.3f}'.format(timer.lag()))))
    p_labels, p_acc, p_vals = svmutil.svm_predict(res.tolist(), mat.tolist(),
                                                  lsvm, '')
Beispiel #35
0
    def test_model_from_byte(self):
        # The original model
        svm_model_1 = self.iqr_session.rel_index.get_model()
        model_1_file, model_2_file = "tmp_svm_1.model", "tmp_svm_2.model"
        svmutil.svm_save_model(model_1_file, svm_model_1)

        # Get the bytes for the model first.
        bytes = self.get_svm_bytes()
        # Use the bytes to created a model
        svm_model_2 = self.get_model_from_bytes(bytes)
        # Save the model created using the bytes
        svmutil.svm_save_model(model_2_file, svm_model_2)

        # Check that the model created using the bytes is the same as the
        # original model.
        assert (filecmp.cmp(model_1_file, model_2_file) is True)
        os.remove(model_1_file)
        os.remove(model_2_file)
Beispiel #36
0
    def test_model_from_byte(self):
        # The original model
        svm_model_1 = self.iqr_session.rel_index.get_model()
        model_1_file, model_2_file = "tmp_svm_1.model", "tmp_svm_2.model"
        svmutil.svm_save_model(model_1_file.encode(), svm_model_1)

        # Get the bytes for the model first.
        bytes = self.get_svm_bytes()
        # Use the bytes to created a model
        svm_model_2 = self.get_model_from_bytes(bytes)
        # Save the model created using the bytes
        svmutil.svm_save_model(model_2_file.encode(), svm_model_2)

        # Check that the model created using the bytes is the same as the
        # original model.
        assert(filecmp.cmp(model_1_file, model_2_file) is True)
        os.remove(model_1_file)
        os.remove(model_2_file)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-r', '--resdir', type=str, required=True,
            help="Results directory")
    parser.add_argument('-f', '--feature', type=str, required=True,
            help='feature to use to learn')
    args = parser.parse_args()
    SCORES_FPATH = os.path.join(args.resdir, 'scores.txt')
    FEAT_DIR = os.path.join(args.resdir, 'features', args.feature)

    scores = np.fromfile(SCORES_FPATH, sep='\n')
    feats = []
    for i in range(1, len(scores) + 1):
        feats.append(np.fromfile(os.path.join(FEAT_DIR, str(i) + '.txt'), sep='\n').tolist())
#   feats = np.array(feats)
    print('Read all features')
    params = svmutil.svm_parameter('-s 4 -t 2')
    model = svmutil.svm_train(svmutil.svm_problem(scores, feats), params)
    svmutil.svm_save_model(os.path.join(args.resdir, 'svr.model'), model)
    print svmutil.svm_predict(scores, feats, model)
Beispiel #38
0
def main(path):
	
	label = []
	points = []
	for u in os.listdir(path): 
		if u[-2:] == 'WC':
			
			filePath = path+u
			WC = pickle.load(open(filePath, 'rb'))
			label.append(u[1])
			points.append(WC)
	label = [int(i) for i in label]
	
	prob = svmutil.svm_problem(label, points)
	param = svmutil.svm_parameter('-t 0 -c 4 -b 1')
	
	m = svmutil.svm_train(prob, param)
	svmutil.svm_save_model('n.model', m)
	
	p_label, p_acc, p_val = svmutil.svm_predict(label, points, m, '-b 1')
	
	return p_acc
def train_attribute(attribute, side):
    """
    train_attribute(str, float): 
    train linear svm classifier for specific attribute\n
    attribute: should be one from ["prismatic", "sphere", "flat", "rigid"]
    """
    #train
    datafile = "model/traindata_attribute_"+attribute+"_"+side
    if not os.path.isfile(datafile):
        srcfile = "data/feature_attribute_train.csv"
        write_svmdata_attribute(srcfile, datafile, attribute, side, 0)    
    label_train,data_train = svmutil.svm_read_problem(datafile)    
    modelfile = "model/model_attribute_"+attribute+"_"+side
    m = []
    if not os.path.isfile(modelfile):
        print("train model: " + attribute+"_"+side)
        prob = svmutil.svm_problem(label_train, data_train)
        param = svmutil.svm_parameter('-t 0 -c 4 -b 1 -q')
        m = svmutil.svm_train(prob, param)        
        svmutil.svm_save_model(modelfile, m)
    else:
        print("load model: " + attribute+"_"+side)
        m = svmutil.svm_load_model(modelfile)
    #test
    attribute_info = read_info("data/feature_attribute_test.csv", side)
    datafile = "model/testdata_attribute_"+attribute+"_"+side
    if not os.path.isfile(datafile):
        srcfile = "data/feature_attribute_test.csv"
        write_svmdata_attribute(srcfile, datafile, attribute, side, 1)    
    label_test,data_test = svmutil.svm_read_problem(datafile)
    p_label, p_acc, p_val = svmutil.svm_predict(label_test, data_test, m, '-b 1')
    [precision, recall, f1, accuracy] = getF1(label_test, p_label)
    print "F1: [%.4f, %.4f, %.4f] Accuracy: %.4f" % (precision, recall, f1, accuracy)
    f_result = open("result/attribute_"+attribute+"_"+side+".csv", "w")
    for i in range(len(p_label)):
        f_result.write(attribute_info[i]+", "+str(int(label_test[i]))+", "+str(int(p_label[i]))+", ")
        f_result.write("[%.4f]\n" % p_val[i][0])
    f_result.close()
Beispiel #40
0
    def __getstate__(self):
        # If we don't have a model, or if we have one but its not being saved
        # to files.
        if not self.has_model() or (self.svm_model_fp is not None and
                                    self.svm_label_map_fp is not None):
            return self.get_config()
        else:
            self._log.debug("Saving model to temp file for pickling")
            fd, fp = tempfile.mkstemp()
            try:
                os.close(fd)

                state = self.get_config()
                state['__LOCAL__'] = True
                state['__LOCAL_LABELS__'] = self.svm_label_map

                svmutil.svm_save_model(fp, self.svm_model)
                with open(fp, 'rb') as model_f:
                    state['__LOCAL_MODEL__'] = model_f.read()

                return state
            finally:
                os.remove(fp)
Beispiel #41
0
 def OnUpdate(self,event):
     import sys
     import string
     sys.path.append('/home/guojian/Workspaces/FindFakeWeb/libsvm/python')
     import svmutil
     self.infoTxt.AppendText('模块导入成功' + os.linesep)
     wtf = open(self.wTxt.Value,'r')
     btf = open(self.bTxt.Value,'r')
     wf = wtf.read()
     bf = btf.read()
     wtf.close()
     btf.close()
     tf = open('train/t.train','w')
     tf.write(wf+bf)
     tf.close()
     self.infoTxt.AppendText(wf+bf+os.linesep)
     self.infoTxt.AppendText('文件合并完成' + os.linesep)
     self.infoTxt.ShowPosition(self.infoTxt.GetLastPosition())
     y, x = svmutil.svm_read_problem('train/t.train')
     model = svmutil.svm_train(y, x, '-c 5')
     svmutil.svm_save_model('model_file.model',model)
     self.infoTxt.AppendText('训练模型构造完成,并且保存为文件model_file.model' + os.linesep)
     self.infoTxt.ShowPosition(self.infoTxt.GetLastPosition())
Beispiel #42
0
    def __getstate__(self):
        # If we don't have a model, or if we have one but its not being saved
        # to files.
        if not self.has_model() or (self.svm_model_fp is not None
                                    and self.svm_label_map_fp is not None):
            return self.get_config()
        else:
            self._log.debug("Saving model to temp file for pickling")
            fd, fp = tempfile.mkstemp()
            try:
                os.close(fd)

                state = self.get_config()
                state['__LOCAL__'] = True
                state['__LOCAL_LABELS__'] = self.svm_label_map

                svmutil.svm_save_model(fp, self.svm_model)
                with open(fp, 'rb') as model_f:
                    state['__LOCAL_MODEL__'] = model_f.read()

                return state
            finally:
                os.remove(fp)
Beispiel #43
0
def write_compact_nonlinear_svm(file_compact_svm,
                                target_class,
                                file_svm_model,
                                svm_model=None,
                                file_SVs=None,
                                SVs=None,
                                str_kernel=None):
    """
    Writes a textfile with all the necessary file locations for (nonlinear) libSVM agent
    All the component files of 'file_compact_svm' will be written in the same directory

    @param file_compact_svm: file to be written with all the information below
    @param target_class: integer target class, e.g., 0 or 30.
    @param file_svm_model: filename to the compact svm model to be written
    @param file_SVs: filename to the support vectors (only applicable if nonlinear SVM)
    @param str_kernel: string of kernel function to be used (e.g., kernels.ngd etc)
    @param svm_model: actual svm_model from get_compact_nonlinear_svm, which will be saved at file_svm_model (if not already)
    @param SVs: actual support vectors in numpy format to be saved (if not already), generated by get_compact_linear_svm
    @return: 1 if success
    """

    dir_compact = os.path.dirname(file_compact_svm)

    if svm_model:
        svmutil.svm_save_model(os.path.join(dir_compact, file_svm_model),
                               svm_model)
    if SVs is not None:
        np.save(os.path.join(dir_compact, file_SVs), SVs)

    with open(file_compact_svm, 'wb') as fin:
        fin.write('file_svm_model=%s\n' % file_svm_model)
        fin.write('target_class=%d\n' % target_class)
        if file_SVs:
            fin.write('file_SVs=%s\n' % file_SVs)
        if str_kernel:
            fin.write('str_kernel=%s\n' % str_kernel)
        fin.flush()
Beispiel #44
0
def do_training(classifier_name, train_x, train_y, test_x, test_y):
    model_save_file = str('./models/') + classifier_name + str('.model')
    if classifier_name == 'LIBSVM':
        prob = svm_problem(
            np.array(train_y).tolist(),
            np.array(train_x).tolist())
        param = svm_parameter('-s 1 -t 1 -q -d 3')
        # param = svm_parameter('-t 2 -q')
        model = svm_train(prob, param)
        svm_save_model('./models/{}.model'.format(classifier_name), model)
        svm_predict(
            np.array(test_y).tolist(),
            np.array(test_x).tolist(), model)
        return model

    model_save = {}
    classifiers = {
        'NB': naive_bayes_classifier,
        'KNN': knn_classifier,
        'LR': logistic_regression_classifier,
        'RF': random_forest_classifier,
        'DT': decision_tree_classifier,
        'SVM': svm_classifier,
        'SVMCV': svm_cross_validation,
        'GBDT': gradient_boosting_classifier,
        'ADA': ada_boosting_classifier,
        'MLP': mlp_classifier,
        'XGBOOST': xgboost_classifier
    }
    model = classifiers[classifier_name](train_x, train_y)
    model_save[classifier_name] = model
    predict = model.predict(test_x)
    accuracy = metrics.accuracy_score(test_y, predict)
    print('accuracy: %.2f%%' % (100 * accuracy))
    jl.dump(model_save, model_save_file)
    return model
Beispiel #45
0
def train(request):

    points = models.Point2d.objects.all()

    # Storing the information to be presented to SVM
    labels = []
    inputs = []

    # For each point, store the information into arrays
    for p in points:
        labels.append(p.label)
        inputs.append([p.x, p.y])

    prob = svm.svm_problem(labels, inputs)
    param = svm.svm_parameter('-t 2 -c 100')
    model = svmutil.svm_train(prob, param)

    try:
        svmutil.svm_save_model('libsvm.model', model)
    except Exception as e:
        print "error: ", e, "\n"

    data = {"status": "trained"}
    return json(data)
Beispiel #46
0
def train(request):
    
    points = models.Point2d.objects.all()
    
    # Storing the information to be presented to SVM
    labels = []
    inputs = []
    
    # For each point, store the information into arrays
    for p in points:
        labels.append( p.label )
        inputs.append([p.x, p.y])
    
    prob = svm.svm_problem(labels, inputs)
    param = svm.svm_parameter('-t 2 -c 100')
    model = svmutil.svm_train(prob, param)
    
    try:
        svmutil.svm_save_model('libsvm.model', model)
    except Exception as e:
        print "error: ", e, "\n"
    
    data = {"status": "trained"}
    return json(data)
Beispiel #47
0
 def save_model(self, filename):
     svmutil.svm_save_model(filename, self.model)
Beispiel #48
0
 def test_conversion_from_filesystem(self):
     libsvm_model_path = tempfile.mktemp(suffix = 'model.libsvm')
     svmutil.svm_save_model(libsvm_model_path, self.libsvm_model)
     spec = libsvm.convert(libsvm_model_path, 'data', 'target')
Beispiel #49
0
    def _train(self, class_examples, **extra_params):
        """
        Internal method that trains the classifier implementation.

        This method is called after checking that there is not already a model
        trained, thus it can be assumed that no model currently exists.

        The class labels will have already been checked before entering this
        method, so it can be assumed that the ``class_examples`` will container
        at least two classes.

        :param class_examples: Dictionary mapping class labels to iterables of
            DescriptorElement training examples.
        :type class_examples: dict[collections.abc.Hashable,
                 collections.abc.Iterable[smqtk.representation.DescriptorElement]]

        :param extra_params: Dictionary with extra parameters for training.
            This is not used by this implementation.
        :type extra_params: None | dict[basestring, object]

        """

        # Offset from 0 for positive class labels to use
        # - not using label of 0 because we think libSVM wants positive labels
        CLASS_LABEL_OFFSET = 1

        # Stuff for debug reporting
        param_debug = {'-q': ''}
        if self._log.getEffectiveLevel() <= logging.DEBUG:
            param_debug = {}

        # Form libSVM problem input values
        self._log.debug("Formatting problem input")
        train_labels = []
        train_vectors = []
        train_group_sizes = []  # number of examples per class
        self.svm_label_map = {}
        # Making SVM label assignment deterministic to alphabetic order
        for i, l in enumerate(sorted(class_examples), CLASS_LABEL_OFFSET):
            # Map integer SVM label to semantic label
            self.svm_label_map[i] = l

            self._log.debug('-- class %d (%s)', i, l)
            # requires a sequence, so making the iterable ``g`` a tuple
            g = class_examples[l]
            if not isinstance(g, collections.abc.Sequence):
                self._log.debug('   (expanding iterable into sequence)')
                g = tuple(g)

            train_group_sizes.append(float(len(g)))
            x = numpy.array(DescriptorElement.get_many_vectors(g))
            x = self._norm_vector(x)
            train_labels.extend([i] * x.shape[0])
            train_vectors.extend(x.tolist())
            del g, x

        assert len(train_labels) == len(train_vectors), \
            "Count mismatch between parallel labels and descriptor vectors" \
            "being sent to libSVM (%d != %d)" \
            % (len(train_labels), len(train_vectors))

        self._log.debug("Forming train params")
        #: :type: dict
        params = deepcopy(self.train_params)
        params.update(param_debug)
        # Calculating class weights if set to C-SVC type SVM
        if '-s' not in params or int(params['-s']) == 0:
            # (john.moeller): The weighting should probably be the geometric
            # mean of the number of examples over the classes divided by the
            # number of examples for the current class.
            gmean = scipy.stats.gmean(train_group_sizes)
            for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET):
                w = gmean / n
                params['-w' + str(i)] = w
                self._log.debug("-- class '%s' weight: %s",
                                self.svm_label_map[i], w)

        self._log.debug("Making parameters obj")
        svm_params = svmutil.svm_parameter(self._gen_param_string(params))
        self._log.debug("Creating SVM problem")
        svm_problem = svm.svm_problem(train_labels, train_vectors)
        del train_vectors
        self._log.debug("Training SVM model")
        self.svm_model = svmutil.svm_train(svm_problem, svm_params)
        self._log.debug("Training SVM model -- Done")

        if self.svm_label_map_elem and self.svm_label_map_elem.writable():
            self._log.debug("saving labels to element (%s)",
                            self.svm_label_map_elem)
            self.svm_label_map_elem.set_bytes(
                pickle.dumps(self.svm_label_map, -1)
            )
        if self.svm_model_elem and self.svm_model_elem.writable():
            self._log.debug("saving model to element (%s)",
                            self.svm_model_elem)
            # LibSvm I/O only works with filepaths, thus the need for an
            # intermediate temporary file.
            fd, fp = tempfile.mkstemp()
            try:
                svmutil.svm_save_model(fp, self.svm_model)
                # Use the file descriptor to create the file object.
                # This avoids reopening the file and will automatically
                # close the file descriptor on exiting the with block.
                # fdopen() is required because in Python 2 open() does
                # not accept a file descriptor.
                with os.fdopen(fd, 'rb') as f:
                    self.svm_model_elem.set_bytes(f.read())
            finally:
                os.remove(fp)
Beispiel #50
0
    def _train(self, class_examples, **extra_params):
        """
        Internal method that trains the classifier implementation.

        This method is called after checking that there is not already a model
        trained, thus it can be assumed that no model currently exists.

        The class labels will have already been checked before entering this
        method, so it can be assumed that the ``class_examples`` will container
        at least two classes.

        :param class_examples: Dictionary mapping class labels to iterables of
            DescriptorElement training examples.
        :type class_examples: dict[collections.Hashable,
                 collections.Iterable[smqtk.representation.DescriptorElement]]

        :param extra_params: Dictionary with extra parameters for training.
            This is not used by this implementation.
        :type extra_params: None | dict[basestring, object]

        """

        # Offset from 0 for positive class labels to use
        # - not using label of 0 because we think libSVM wants positive labels
        CLASS_LABEL_OFFSET = 1

        # Stuff for debug reporting
        etm_ri = None
        param_debug = {'-q': ''}
        if self._log.getEffectiveLevel() <= logging.DEBUG:
            etm_ri = 1.0
            param_debug = {}

        # Form libSVM problem input values
        self._log.debug("Formatting problem input")
        train_labels = []
        train_vectors = []
        train_group_sizes = []  # number of examples per class
        self.svm_label_map = {}
        # Making SVM label assignment deterministic to alphabetic order
        for i, l in enumerate(sorted(class_examples), CLASS_LABEL_OFFSET):
            # Map integer SVM label to semantic label
            self.svm_label_map[i] = l

            self._log.debug('-- class %d (%s)', i, l)
            # requires a sequence, so making the iterable ``g`` a tuple
            g = class_examples[l]
            if not isinstance(g, collections.Sequence):
                self._log.debug('   (expanding iterable into sequence)')
                g = tuple(g)

            train_group_sizes.append(float(len(g)))
            x = elements_to_matrix(g, report_interval=etm_ri)
            x = self._norm_vector(x)
            train_labels.extend([i] * x.shape[0])
            train_vectors.extend(x.tolist())
            del g, x

        assert len(train_labels) == len(train_vectors), \
            "Count mismatch between parallel labels and descriptor vectors" \
            "being sent to libSVM (%d != %d)" \
            % (len(train_labels), len(train_vectors))

        self._log.debug("Forming train params")
        #: :type: dict
        params = deepcopy(self.train_params)
        params.update(param_debug)
        # Calculating class weights if set to C-SVC type SVM
        if '-s' not in params or int(params['-s']) == 0:
            # (john.moeller): The weighting should probably be the geometric
            # mean of the number of examples over the classes divided by the
            # number of examples for the current class.
            gmean = scipy.stats.gmean(train_group_sizes)
            for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET):
                w = gmean / n
                params['-w' + str(i)] = w
                self._log.debug("-- class '%s' weight: %s",
                                self.svm_label_map[i], w)

        self._log.debug("Making parameters obj")
        svm_params = svmutil.svm_parameter(self._gen_param_string(params))
        self._log.debug("Creating SVM problem")
        svm_problem = svm.svm_problem(train_labels, train_vectors)
        del train_vectors
        self._log.debug("Training SVM model")
        self.svm_model = svmutil.svm_train(svm_problem, svm_params)
        self._log.debug("Training SVM model -- Done")

        if self.svm_label_map_elem and self.svm_label_map_elem.writable():
            self._log.debug("saving labels to element (%s)",
                            self.svm_label_map_elem)
            self.svm_label_map_elem.set_bytes(
                cPickle.dumps(self.svm_label_map, -1)
            )
        if self.svm_model_elem and self.svm_model_elem.writable():
            self._log.debug("saving model to element (%s)",
                            self.svm_model_elem)
            # LibSvm I/O only works with filepaths, thus the need for an
            # intermediate temporary file.
            fd, fp = tempfile.mkstemp()
            try:
                svmutil.svm_save_model(fp, self.svm_model)
                # Use the file descriptor to create the file object.
                # This avoids reopening the file and will automatically
                # close the file descriptor on exiting the with block.
                # fdopen() is required because in Python 2 open() does
                # not accept a file descriptor.
                with os.fdopen(fd, 'rb') as f:
                    self.svm_model_elem.set_bytes(f.read())
            finally:
                os.remove(fp)
Beispiel #51
0
    svms = []
    ws = []
    bs = []
    werrs = 0
    for n in range(hl.n_out):
        print "Hidden neuron: %d" % n,
        print " Problem...",
        if n == 0:
            probs.append(svmutil.svm_problem(svm_targets[n], svm_inputs))
        else:
            probs.append(svmutil.svm_problem(svm_targets[n], None, tmpl=probs[0]))
        params.append(svmutil.svm_parameter("-q -s 0 -t 0 -c 100"))
        print " Training...",
        svms.append(svmutil.svm_train(probs[n], params[n]))
        print " Saving...",
        svmutil.svm_save_model("hidden%04d.svm" % n, svms[n])

        print " Testing..."
        # get weights from SVM
        w, b = get_svm_weights(svms[n], hl.n_in)
        ws.append(w)
        bs.append(b)

        # test model
        predv = numpy.dot(w, trsx.T) + b
        pred = numpy.sign(predv)
        pos = 0
        neg = 0        
        for i in range(pred.size):
            if svm_targets[n][i] > 0:
                pos += 1
Beispiel #52
0
    def train(self, class_examples=None, **kwds):
        """
        Train the supervised classifier model.

        If a model is already loaded, we will raise an exception in order to
        prevent accidental overwrite.

        If the same label is provided to both ``class_examples`` and ``kwds``,
        the examples given to the reference in ``kwds`` will prevail.

        :param class_examples: Dictionary mapping class labels to iterables of
            DescriptorElement training examples.
        :type class_examples: dict[collections.Hashable,
                 collections.Iterable[smqtk.representation.DescriptorElement]]

        :param kwds: Keyword assignment of labels to iterables of
            DescriptorElement training examples.
        :type kwds: dict[str,
                 collections.Iterable[smqtk.representation.DescriptorElement]]

        :raises ValueError: There were no class examples provided.
        :raises ValueError: Less than 2 classes were given.
        :raises RuntimeError: A model already exists in this instance.Following
            through with training would overwrite this model. Throwing an
            exception for information protection.

        """
        class_examples = \
            super(LibSvmClassifier, self).train(class_examples, **kwds)

        # Offset from 0 for positive class labels to use
        # - not using label of 0 because we think libSVM wants positive labels
        CLASS_LABEL_OFFSET = 1

        # Stuff for debug reporting
        etm_ri = None
        param_debug = {'-q': ''}
        if self._log.getEffectiveLevel() <= logging.DEBUG:
            etm_ri = 1.0
            param_debug = {}

        # Form libSVM problem input values
        self._log.debug("Formatting problem input")
        train_labels = []
        train_vectors = []
        train_group_sizes = []  # number of examples per class
        self.svm_label_map = {}
        # Making SVM label assignment deterministic to alphabetic order
        for i, l in enumerate(sorted(class_examples), CLASS_LABEL_OFFSET):
            # Map integer SVM label to semantic label
            self.svm_label_map[i] = l

            self._log.debug('-- class %d (%s)', i, l)
            # requires a sequence, so making the iterable ``g`` a tuple
            g = class_examples[l]
            if not isinstance(g, collections.Sequence):
                g = tuple(g)

            train_group_sizes.append(float(len(g)))
            x = elements_to_matrix(g, report_interval=etm_ri)
            x = self._norm_vector(x)
            train_labels.extend([i] * x.shape[0])
            train_vectors.extend(x.tolist())
            del g, x

        assert len(train_labels) == len(train_vectors), \
            "Count miss-match between parallel labels and descriptor vectors" \
            "being sent to libSVM (%d != %d)" \
            % (len(train_labels), len(train_vectors))

        self._log.debug("Forming train params")
        #: :type: dict
        params = deepcopy(self.train_params)
        params.update(param_debug)
        # Calculating class weights for C-SVC SVM
        if '-s' not in params or int(params['-s']) == 0:
            total_examples = sum(train_group_sizes)
            for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET):
                # weight is the ratio of between number of other-class examples
                # to the number of examples in this class.
                other_class_examples = total_examples - n
                w = max(1.0, other_class_examples / float(n))
                params['-w' + str(i)] = w
                self._log.debug("-- class '%s' weight: %s",
                                self.svm_label_map[i], w)

        self._log.debug("Making parameters obj")
        svm_params = svmutil.svm_parameter(self._gen_param_string(params))
        self._log.debug("Creating SVM problem")
        svm_problem = svm.svm_problem(train_labels, train_vectors)
        self._log.debug("Training SVM model")
        self.svm_model = svmutil.svm_train(svm_problem, svm_params)
        self._log.debug("Training SVM model -- Done")

        if self.svm_label_map_fp:
            self._log.debug("saving file -- labels -- %s",
                            self.svm_label_map_fp)
            with open(self.svm_label_map_fp, 'wb') as f:
                cPickle.dump(self.svm_label_map, f, -1)
        if self.svm_model_fp:
            self._log.debug("saving file -- model -- %s", self.svm_model_fp)
            svmutil.svm_save_model(self.svm_model_fp, self.svm_model)
Beispiel #53
0
 def saveModel(self, fname):
     svm_save_model( fname, self._model )
        p_label_train, p_acc_train, p_val_train = svmutil.svm_predict(y[:i], x[:i], m)
        p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y[i:], x[i:], m)
        print p_acc_train[0], "\t", p_acc_validation[0], "\n"

        training_examples.append(i)
        train_accuracy.append(p_acc_train[0])
        validation_accuracy.append(p_acc_validation[0])

    return training_examples, train_accuracy, validation_accuracy

def get_cross_val(x, y, x_val, y_val, gamma_c):
    prob  = svmutil.svm_problem(y, x)
    param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(gamma_c.C, gamma_c.gamma))
    m = svmutil.svm_train(prob, param)

    svmutil.svm_save_model("model", m)

    p_label_validation, p_acc_validation, p_val_validation = svmutil.svm_predict(y_val, x_val, m)

    return p_acc_validation[0]


if __name__ == '__main__':
    y, x = svmutil.svm_read_problem("char_recon_shuffled.db")
    gamma = 1.0 / (2.0 * (3.0 ** 7) ** 2)
    C = 3.0 ** 3.0
    prob  = svmutil.svm_problem(y, x)
    param = svmutil.svm_parameter('-t 2 -q -c {0} -g {1}'.format(C, gamma))
    m = svmutil.svm_train(prob, param)
    svmutil.svm_save_model("model", m)
Beispiel #55
0
    def train(self, positive_classes, negatives):
        """
        Train the supervised SVM classifier model.

        The class label ``negative`` is reserved for the negative class.

        If a model is already loaded, we will raise an exception in order to
        prevent accidental overwrite.

        NOTE:
            This abstract method provides generalized error checking and
            should be called via ``super`` in implementing methods.

        :param positive_classes: Dictionary mapping positive class labels to
            iterables of DescriptorElement training examples.
        :type positive_classes:
            dict[collections.Hashable,
                 collections.Iterable[smqtk.representation.DescriptorElement]]

        :param negatives: Iterable of negative DescriptorElement examples.
        :type negatives: collections.Iterable[smqtk.representation.DescriptorElement]

        :raises ValueError: The ``negative`` label was found in the
            ``positive_classes`` dictionary. This is reserved for the negative
            example class.
        :raises ValueError: There were no positive or negative examples.
        :raises RuntimeError: A model already exists in this instance.Following
            through with training would overwrite this model. Throwing an
            exception for information protection.


        """
        super(LibSvmClassifier, self).train(positive_classes, negatives)

        # Offset from 0 for positive class labels to use
        # - not using label of 0 because we think libSVM wants positive labels
        CLASS_LABEL_OFFSET = 1

        # Stuff for debug reporting
        etm_ri = None
        param_debug = {"-q": ""}
        if self._log.getEffectiveLevel() <= logging.DEBUG:
            etm_ri = 1.0
            param_debug = {}

        # Form libSVM problem input values
        self._log.debug("Formatting problem input")
        train_labels = []
        train_vectors = []
        train_group_sizes = []
        self.svm_label_map = {}
        # Making SVM label assignment deterministic to alphabetic order
        for i, l in enumerate(sorted(positive_classes), CLASS_LABEL_OFFSET):
            # Map integer SVM label to semantic label
            self.svm_label_map[i] = l

            self._log.debug("-- class %d (%s)", i, l)
            # requires a sequence, so making the iterable ``g`` a tuple
            g = positive_classes[l]
            if not isinstance(g, collections.Sequence):
                g = tuple(g)

            train_group_sizes.append(float(len(g)))
            x = elements_to_matrix(g, report_interval=etm_ri)
            x = self._norm_vector(x)
            train_labels.extend([i] * x.shape[0])
            train_vectors.extend(x.tolist())
            del g, x

        self._log.debug("-- negatives (-1)")
        # Map integer SVM label to semantic label
        self.svm_label_map[-1] = self.NEGATIVE_LABEL
        # requires a sequence, so making the iterable ``negatives`` a tuple
        if not isinstance(negatives, collections.Sequence):
            negatives = tuple(negatives)
        negatives_size = float(len(negatives))
        x = elements_to_matrix(negatives, report_interval=etm_ri)
        x = self._norm_vector(x)
        train_labels.extend([-1] * x.shape[0])
        train_vectors.extend(x.tolist())
        del negatives, x

        self._log.debug(
            "Training elements: %d labels, %d vectors " "(should be the same)", len(train_labels), len(train_vectors)
        )

        self._log.debug("Forming train params")
        #: :type: dict
        params = deepcopy(self.train_params)
        params.update(param_debug)
        # Only need to calculate positive class weights when C-SVC type
        if "-s" not in params or int(params["-s"]) == 0:
            for i, n in enumerate(train_group_sizes, CLASS_LABEL_OFFSET):
                params["-w" + str(i)] = max(1.0, negatives_size / float(n))

        self._log.debug("Making parameters obj")
        svm_params = svmutil.svm_parameter(self._gen_param_string(params))
        self._log.debug("Creating SVM problem")
        svm_problem = svm.svm_problem(train_labels, train_vectors)
        self._log.debug("Training SVM model")
        self.svm_model = svmutil.svm_train(svm_problem, svm_params)
        self._log.debug("Training SVM model -- Done")

        if self.svm_label_map_fp:
            self._log.debug("saving file -- labels -- %s", self.svm_label_map_fp)
            with open(self.svm_label_map_fp, "wb") as f:
                cPickle.dump(self.svm_label_map, f)
        if self.svm_model_fp:
            self._log.debug("saving file -- model -- %s", self.svm_model_fp)
            svmutil.svm_save_model(self.svm_model_fp, self.svm_model)
Beispiel #56
0
def save_new_model(filename):
	mapped,labels,range,mapping = training_data_from_matchvotes()
	model = train_classifier(mapped,labels)
	svmutil.svm_save_model(filename+".model",model)
	pickle.dump(range,file(filename+".range","w"))
	pickle.dump(mapping,file(filename+".mapping","w"))
#!/usr/bin/env python
if __name__ == '__main__':
    import numpy as np
    # svm 3.20
    import svmutil as SVM
    from os.path import isfile
    from itertools import izip
    from final_utils import read_hwfile

    # initialize data
    datTrn, labTrn, nTrn = read_hwfile('ml14fall_train_align.dat.hog.dat', 169)
    datTst, labTst, nTst = read_hwfile('ml14fall_test1_no_answer_align.dat.hog.dat', 169)

    save_model_name = 'svm_train.model'
    if isfile(save_model_name):
        model = SVM.svm_load_model(save_model_name)
    else:
        param = SVM.svm_parameter('-t 2 -c 5 -g 2 -h 0')
        problem = SVM.svm_problem(labTrn.tolist(), datTrn.tolist())
        model = SVM.svm_train(problem, param)
        SVM.svm_save_model(save_model_name, model)

    p_label, p_acc, p_val = SVM.svm_predict(labTst.tolist(), datTst.tolist(), model)

    with open("result.txt", 'w') as fp:
        for label in p_label:
            fp.write('{:d}\n'.format(int(label)))
def main():
    dpark_ctx = dpark.DparkContext('mesos')
    assert os.path.isdir(BASE_PATH) and os.path.isdir(MODEL_PATH)
    
    # Read the weights and bias of SDAE from MODEL_PATH
    W, b = load_ae(MODEL_PATH)

    # SVM
    print 'Will adopt layer No. %i' % FEA_LAYER
    lyr = W.keys()
    lyr.sort()
    lyr_last = lyr[FEA_LAYER]
    lyr = lyr[:FEA_LAYER]
    lyr.append(lyr_last)

    # SVM training and validating data
    svm_data_tr, svm_label_tr, svm_data_va, svm_label_va = \
            load_svm_tr_va_data(TRAIN_DATA_PATH, dpark_ctx, (lyr, W, b))
    # SVM testing data
    svm_data_te, svm_label_te = load_svm_te_data(TEST_DATA_PATH, dpark_ctx, (lyr, W, b))

    # Process data, view the GID distribution in tr or te sets
    print 'Processing data here.'
    GID_adjust = range(len(GID)) # GID adjust to 0-13
    print '=' * 100
    print 'Training data distributions:'
    tr_hist = data_dist(GID_adjust, svm_label_tr)
    print '=' * 100
    print 'Testing data distributions:'
    te_hist = data_dist(GID_adjust, svm_label_te)
    print '=' * 100
    print 'Validation data distributions:'
    va_hist = data_dist(GID_adjust, svm_label_va)

    # Binary Test on two classes in validation set
    if BINARY_TEST:
        print 'Doing a binary class test using validation set'
        lbl1 = 7
        lbl2 = 11
        svm_data_va_bin = []
        svm_label_va_bin = []
        for lbl_elem, data_elem in zip(svm_label_va, svm_data_va):
            if lbl_elem == lbl1:
                svm_label_va_bin.append(-1)
                svm_data_va_bin.append(data_elem)
            elif lbl_elem == lbl2:
                svm_label_va_bin.append(1)
                svm_data_va_bin.append(data_elem)
            else:
                pass
        print 'Binary classes data was prepared.'
        for svm_c in [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1e6, 1e7, 1e8]:
            svm_opt = '-c ' + str(svm_c) + ' -w-1 3 -w1 2 -v 5 -q'
            svm_model = svm.svm_train(svm_label_va_bin, svm_data_va_bin, svm_opt)

    # Cross Validation on whole validation set
    elif CROSS_VALIDATION:
        print 'SVM model starts cross validating.'
        for svm_c in [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]:
            svm_opt = '-c ' + str(svm_c) + ' '
            for gid_elem, va_hist_elem in zip(GID_adjust, va_hist):
                wgt_tmp = max(va_hist) / float(va_hist_elem)
                '''
                if wgt_tmp < 3.0:
                    wgt = 1
                elif wgt_tmp < 10:
                    wgt = 4
                elif wgt_tmp < 40:
                    wgt = 16
                else:
                    wgt = 32
                '''
                if wgt_tmp < 10.0:
                    wgt = int(wgt_tmp)
                elif wgt_tmp < 40:
                    wgt = 16
                else:
                    wgt = 32
                svm_opt += ('-w' + str(gid_elem) + ' ' + str(wgt) + ' ')
            svm_opt += '-v 5 -q'
            print svm_opt
            svm_model = svm.svm_train(svm_label_va, svm_data_va, svm_opt)

    # SVM running on whole Training / Testing sets
    else:
        fn_svm = 'svm_model_c1_wgt'
        if SAVE_OR_LOAD: # True
            print 'SVM model starts training.'
            svm_opt = '-c 1 '
            for gid_elem, tr_hist_elem in zip(GID_adjust, tr_hist):
                wgt_tmp = max(tr_hist) / float(tr_hist_elem)
                if wgt_tmp < 3.0:
                    wgt = 1
                elif wgt_tmp < 10:
                    wgt = 2
                elif wgt_tmp < 40:
                    wgt = 4
                else:
                    wgt = 8
                svm_opt += ('-w' + str(gid_elem) + ' ' + str(wgt) + ' ')
            print svm_opt
            svm_model = svm.svm_train(svm_label_tr, svm_data_tr, svm_opt)
            # save SVM model
            svm.svm_save_model(fn_svm, svm_model)
        else: # False
            print 'SVM model loading.'
            # load SVM model
            svm_model = svm.svm_load_model(fn_svm)
        print 'SVM model training or loading done'
        p_label, p_acc, p_val = svm.svm_predict(svm_label_te, svm_data_te, svm_model)
        fid = open('res_tmp.pkl', 'wb')
        pickle.dump((p_label, p_acc, p_val), fid)
        fid.close()