def ctm_train_model(sample_save_path,svm_type,param,model_save_path):
    '''训练模型,输入样本文件,训练的参数,模型的保存地址,最后会给出模型在训练样本上的测试结果。'''
    tms_svm.set_svm_type(svm_type)
    y,x = tms_svm.read_problem(sample_save_path)
    m = tms_svm.train(y,x,param)
    tms_svm.save_model(model_save_path,m)
    labels = {}.fromkeys(y).keys()
    if len(labels)>2:
        pred_labels, (Micro, Macro, ACC), pred_values = tms_svm.predict(y,x,m)
        print "(Micro=%g, Macro=%g, ACC=%g)"%(Micro, Macro, ACC)
    else:
        pred_labels, (f_score,recall,presion), pred_values=tms_svm.predict(y,x,m)
        print "(f_score=%g,recall=%g,presion=%g)"%(f_score,recall,presion)
    return m
Example #2
0
def predict_rsl(config_file, data_file_name, result_save_path, param):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
     返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
     '''

    print "-----------------正在加载训练模型-------------------\n"
    """
     model = libsvm.svm_load_model(model_file_name)
     if not model: 
	print("can't open model file %s" % model_file_name)
	model=None
     else:
           model = toPyModel(model)
           print "Find model"
     """
    option = '-b ' + str(param)
    print config_file
    model = load_tms_model(config_file)[3]
    print model
    print "-----------------正在对样本进行预测-------------------\n"
    i = 1
    f_data = open(data_file_name, 'r')
    lines = f_data.readlines()
    f_data.close()
    fs = open(result_save_path, 'w')
    for line in lines:
        line = line.split(None, 1)
        #print line
        # In case an instance with all zero features
        if len(line) == 1: line += ['']
        label, features = line
        xi = {}
        y = [float(label)]
        #print "--------------y--------------:\n",y
        for e in features.split():
            ind, val = e.split(":")
            xi[int(ind)] = float(val)

#print "----------------x----------------:\n",pprint(xi)
#在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
        p_lab, p_acc, p_sc = tms_svm.predict(y, [xi], model, option)
        label = p_lab[0]
        if param:
            #sc=tms_svm.classer_value(p_sc[0])
            if str(label) == str(1):
                sc = p_sc[0][1]
            else:
                sc = p_sc[0][0]
        #sc=p_sc[0]
        else:
            sc = tms_svm.classer_value(p_sc[0])
        print sc
        fs.write(str(label) + "\t" + str(sc) + "\t")
        #print '-------------%d---------------'%i
        i += 1
        fs.write(str(y[0]) + "\t")
        fs.write("\n")
    fs.close()
    print "-----------------预测完毕-------------------"
Example #3
0
def cal_sc_optim(lab,m,text,dic_list,local_fun,global_weight,str_splitTag):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
    返回的是一个预测标签与得分
    '''
    local_fun = measure.local_f(local_fun)
    y,x = cons_pro_for_svm(lab,text.strip().split(str_splitTag),dic_list,local_fun,global_weight)
    p_lab,p_acc,p_sc=tms_svm.predict(y,x,m)  
    return p_lab[0],tms_svm.classer_value(p_sc[0])
def predict_rsl(config_file,data_file_name,result_save_path,param):
     '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
     返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
     '''

     print "-----------------正在加载训练模型-------------------\n"
     """
     model = libsvm.svm_load_model(model_file_name)
     if not model: 
	print("can't open model file %s" % model_file_name)
	model=None
     else:
           model = toPyModel(model)
           print "Find model"
     """
     option = '-b '+str(param)
     print config_file
     model=load_tms_model(config_file)[3]
     print model
     print "-----------------正在对样本进行预测-------------------\n"
     i=1
     f_data=open(data_file_name,'r')
     lines=f_data.readlines()
     f_data.close()
     fs = open(result_save_path,'w')
     for line in lines:
	line = line.split(None, 1)
	#print line
		# In case an instance with all zero features
	if len(line) == 1: line += ['']
	label, features = line
	xi = {}
 	y= [float(label)]
 	#print "--------------y--------------:\n",y
	for e in features.split():
	    ind, val = e.split(":")
	    xi[int(ind)] = float(val)
    #print "----------------x----------------:\n",pprint(xi)
    #在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
	p_lab,p_acc,p_sc=tms_svm.predict(y,[xi],model,option)  
	label=p_lab[0]
	if param:
	#sc=tms_svm.classer_value(p_sc[0])           
 	    if str(label) == str(1):
	        sc=p_sc[0][1]
  	    else:
                   sc=p_sc[0][0]
	#sc=p_sc[0]
	else:
                    sc=tms_svm.classer_value(p_sc[0])
 	print sc
	fs.write(str(label)+"\t"+str(sc)+"\t")
        #print '-------------%d---------------'%i
        i+=1
        fs.write(str(y[0])+"\t")
	fs.write("\n")
     fs.close()
     print "-----------------预测完毕-------------------"
def cal_sc_optim(lab,m,text,dic_list,local_fun,global_weight,str_splitTag):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
    返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
    '''
    local_fun = measure.local_f(local_fun)
    y,x = ctmutil.cons_pro_for_svm(lab,text.strip().split(str_splitTag),dic_list,local_fun,global_weight)
    p_lab,p_acc,p_sc=tms_svm.predict(y,x,m)  
    #在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
    return p_lab[0],tms_svm.classer_value(p_sc[0])
def extract_im_feature(filename,content_indexs,feature_indexs,dic_path,svm_model,delete,str_splitTag,tc_splitTag):
    ''''''
    m = tms_svm.load_model(svm_model)
    f = file(filename,'r')
    for line in f.readlines():
        text = line.strip().split(tc_splitTag)
        text_temp=""
        for i in content_indexs:
          text_temp+=str_splitTag+text[i]  
          p_lab,p_acc,p_sc =tms_svm.predict() 
Example #7
0
def cal_sc_optim(lab, m, text, dic_list, local_fun, global_weight,
                 str_splitTag):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
    返回的是一个预测标签与得分
    '''
    local_fun = measure.local_f(local_fun)
    y, x = cons_pro_for_svm(lab,
                            text.strip().split(str_splitTag), dic_list,
                            local_fun, global_weight)
    p_lab, p_acc, p_sc = tms_svm.predict(y, x, m)
    return p_lab[0], tms_svm.classer_value(p_sc[0])
Example #8
0
def save_train_for_lsa(test_path,model_save_path,lsa_train_save_path):
    '''predict trainset using the initial classifier  ,and save the trainset with
    lsa format : label score feature
    '''
    y,x = tms_svm.read_problem(test_path)
    m = tms_svm.load_model(model_save_path)
    p_lab,p_acc,p_sc = tms_svm.predict(y,x,m)
    f= file(lsa_train_save_path,'w')
    for i  in range(len(y)):
        f.write(str(int(y[i]))+"\t"+str(p_sc[i][0])+"\t")
        dic =x[i]
        sorted_x = sorted(dic.items(),key = lambda dic:dic[0])
        for key in sorted_x:
            f.write(str(key[0])+":"+str(key[1])+"\t")
        f.write("\n")
    f.close()   
def save_train_for_lsa(test_path, model_save_path, lsa_train_save_path):
    '''predict trainset using the initial classifier  ,and save the trainset with
    lsa format : label score feature
    '''
    y, x = tms_svm.read_problem(test_path)
    m = tms_svm.load_model(model_save_path)
    p_lab, p_acc, p_sc = tms_svm.predict(y, x, m)
    f = file(lsa_train_save_path, 'w')
    for i in range(len(y)):
        f.write(str(int(y[i])) + "\t" + str(p_sc[i][0]) + "\t")
        dic = x[i]
        sorted_x = sorted(dic.items(), key=lambda dic: dic[0])
        for key in sorted_x:
            f.write(str(key[0]) + ":" + str(key[1]) + "\t")
        f.write("\n")
    f.close()
def cal_sc_optim(model_file_name, data_file_name, result_save_path):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
     返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
     '''
    #local_fun = measure.local_f(local_fun)
    #y,x = ctmutil.cons_pro_for_svm(lab,text.strip().split(str_splitTag),dic_list,local_fun,global_weight)
    """
	svm_load_model(model_file_name) -> model
	
     Load a LIBSVM model from model_file_name and return.
	"""
    print "-----------------正在加载训练模型-------------------\n"
    model = libsvm.svm_load_model(model_file_name)
    if not model:
        print("can't open model file %s" % model_file_name)
        model = None
    else:
        model = toPyModel(model)
    print "-----------------正在对样本进行预测-------------------\n"
    for line in open(data_file_name):
        line = line.split(None, 1)
        # In case an instance with all zero features
        if len(line) == 1: line += ['']
        label, features = line
        xi = {}
        for e in features.split():
            ind, val = e.split(":")
            xi[int(ind)] = float(val)
            y = [float(label)]
            x = xi
            p_lab, p_acc, p_sc = tms_svm.predict(y, x, m)
            #在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
            label = p_lab[0]
            sc = tms_svm.classer_value(p_sc[0])
            fs = file(result_save_path, 'w')
            fs.write(str(label) + "\t" + str(sc) + "\t")
            #for index in result_indexes:
            #fs.write(text[index]+"\t")
            fs.write("\n")
    fs.close()
    print "-----------------预测完毕-------------------"
def cal_sc_optim(model_file_name,data_file_name,result_save_path):
     '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
     返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
     '''
     #local_fun = measure.local_f(local_fun)
     #y,x = ctmutil.cons_pro_for_svm(lab,text.strip().split(str_splitTag),dic_list,local_fun,global_weight)
     """
	svm_load_model(model_file_name) -> model
	
     Load a LIBSVM model from model_file_name and return.
	"""
     print "-----------------正在加载训练模型-------------------\n"
     model = libsvm.svm_load_model(model_file_name)
     if not model: 
	print("can't open model file %s" % model_file_name)
	model=None
     else:
           model = toPyModel(model)
     print "-----------------正在对样本进行预测-------------------\n"
     for line in open(data_file_name):
	line = line.split(None, 1)
		# In case an instance with all zero features
	if len(line) == 1: line += ['']
	label, features = line
	xi = {}
	for e in features.split():
	    ind, val = e.split(":")
	    xi[int(ind)] = float(val)
	    y= [float(label)]
	    x = xi   
	    p_lab,p_acc,p_sc=tms_svm.predict(y,x,m)  
    #在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
	    label=p_lab[0]
	    sc=tms_svm.classer_value(p_sc[0])
	    fs = file(result_save_path,'w')
	    fs.write(str(label)+"\t"+str(sc)+"\t")
	    #for index in result_indexes:
                    #fs.write(text[index]+"\t")
	    fs.write("\n")
     fs.close()
     print "-----------------预测完毕-------------------"
Example #12
0
def ctm_model_predict(test_path, m):
    '''模型预测,输入测试样本,然后读入进行测试'''
    y, x = tms_svm.read_problem(test_path)
    return tms_svm.predict(y, x, m)
def ctm_model_predict(test_path,m):
    '''模型预测,输入测试样本,然后读入进行测试'''
    y,x = tms_svm.read_problem(test_path)
    return tms_svm.predict(y,x,m)