Beispiel #1
0
def predict_rsl(config_file, data_file_name, result_save_path, param):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
     返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
     '''

    print "-----------------正在加载训练模型-------------------\n"
    """
     model = libsvm.svm_load_model(model_file_name)
     if not model: 
	print("can't open model file %s" % model_file_name)
	model=None
     else:
           model = toPyModel(model)
           print "Find model"
     """
    option = '-b ' + str(param)
    print config_file
    model = load_tms_model(config_file)[3]
    print model
    print "-----------------正在对样本进行预测-------------------\n"
    i = 1
    f_data = open(data_file_name, 'r')
    lines = f_data.readlines()
    f_data.close()
    fs = open(result_save_path, 'w')
    for line in lines:
        line = line.split(None, 1)
        #print line
        # In case an instance with all zero features
        if len(line) == 1: line += ['']
        label, features = line
        xi = {}
        y = [float(label)]
        #print "--------------y--------------:\n",y
        for e in features.split():
            ind, val = e.split(":")
            xi[int(ind)] = float(val)

#print "----------------x----------------:\n",pprint(xi)
#在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
        p_lab, p_acc, p_sc = tms_svm.predict(y, [xi], model, option)
        label = p_lab[0]
        if param:
            #sc=tms_svm.classer_value(p_sc[0])
            if str(label) == str(1):
                sc = p_sc[0][1]
            else:
                sc = p_sc[0][0]
        #sc=p_sc[0]
        else:
            sc = tms_svm.classer_value(p_sc[0])
        print sc
        fs.write(str(label) + "\t" + str(sc) + "\t")
        #print '-------------%d---------------'%i
        i += 1
        fs.write(str(y[0]) + "\t")
        fs.write("\n")
    fs.close()
    print "-----------------预测完毕-------------------"
Beispiel #2
0
def cal_sc_optim(lab,m,text,dic_list,local_fun,global_weight,str_splitTag):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
    返回的是一个预测标签与得分
    '''
    local_fun = measure.local_f(local_fun)
    y,x = cons_pro_for_svm(lab,text.strip().split(str_splitTag),dic_list,local_fun,global_weight)
    p_lab,p_acc,p_sc=tms_svm.predict(y,x,m)  
    return p_lab[0],tms_svm.classer_value(p_sc[0])
def predict_rsl(config_file,data_file_name,result_save_path,param):
     '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
     返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
     '''

     print "-----------------正在加载训练模型-------------------\n"
     """
     model = libsvm.svm_load_model(model_file_name)
     if not model: 
	print("can't open model file %s" % model_file_name)
	model=None
     else:
           model = toPyModel(model)
           print "Find model"
     """
     option = '-b '+str(param)
     print config_file
     model=load_tms_model(config_file)[3]
     print model
     print "-----------------正在对样本进行预测-------------------\n"
     i=1
     f_data=open(data_file_name,'r')
     lines=f_data.readlines()
     f_data.close()
     fs = open(result_save_path,'w')
     for line in lines:
	line = line.split(None, 1)
	#print line
		# In case an instance with all zero features
	if len(line) == 1: line += ['']
	label, features = line
	xi = {}
 	y= [float(label)]
 	#print "--------------y--------------:\n",y
	for e in features.split():
	    ind, val = e.split(":")
	    xi[int(ind)] = float(val)
    #print "----------------x----------------:\n",pprint(xi)
    #在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
	p_lab,p_acc,p_sc=tms_svm.predict(y,[xi],model,option)  
	label=p_lab[0]
	if param:
	#sc=tms_svm.classer_value(p_sc[0])           
 	    if str(label) == str(1):
	        sc=p_sc[0][1]
  	    else:
                   sc=p_sc[0][0]
	#sc=p_sc[0]
	else:
                    sc=tms_svm.classer_value(p_sc[0])
 	print sc
	fs.write(str(label)+"\t"+str(sc)+"\t")
        #print '-------------%d---------------'%i
        i+=1
        fs.write(str(y[0])+"\t")
	fs.write("\n")
     fs.close()
     print "-----------------预测完毕-------------------"
def cal_sc_optim(lab,m,text,dic_list,local_fun,global_weight,str_splitTag):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
    返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
    '''
    local_fun = measure.local_f(local_fun)
    y,x = ctmutil.cons_pro_for_svm(lab,text.strip().split(str_splitTag),dic_list,local_fun,global_weight)
    p_lab,p_acc,p_sc=tms_svm.predict(y,x,m)  
    #在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
    return p_lab[0],tms_svm.classer_value(p_sc[0])
Beispiel #5
0
def cal_sc_optim(lab, m, text, dic_list, local_fun, global_weight,
                 str_splitTag):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
    返回的是一个预测标签与得分
    '''
    local_fun = measure.local_f(local_fun)
    y, x = cons_pro_for_svm(lab,
                            text.strip().split(str_splitTag), dic_list,
                            local_fun, global_weight)
    p_lab, p_acc, p_sc = tms_svm.predict(y, x, m)
    return p_lab[0], tms_svm.classer_value(p_sc[0])
def cal_sc_optim(model_file_name, data_file_name, result_save_path):
    '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
     返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
     '''
    #local_fun = measure.local_f(local_fun)
    #y,x = ctmutil.cons_pro_for_svm(lab,text.strip().split(str_splitTag),dic_list,local_fun,global_weight)
    """
	svm_load_model(model_file_name) -> model
	
     Load a LIBSVM model from model_file_name and return.
	"""
    print "-----------------正在加载训练模型-------------------\n"
    model = libsvm.svm_load_model(model_file_name)
    if not model:
        print("can't open model file %s" % model_file_name)
        model = None
    else:
        model = toPyModel(model)
    print "-----------------正在对样本进行预测-------------------\n"
    for line in open(data_file_name):
        line = line.split(None, 1)
        # In case an instance with all zero features
        if len(line) == 1: line += ['']
        label, features = line
        xi = {}
        for e in features.split():
            ind, val = e.split(":")
            xi[int(ind)] = float(val)
            y = [float(label)]
            x = xi
            p_lab, p_acc, p_sc = tms_svm.predict(y, x, m)
            #在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
            label = p_lab[0]
            sc = tms_svm.classer_value(p_sc[0])
            fs = file(result_save_path, 'w')
            fs.write(str(label) + "\t" + str(sc) + "\t")
            #for index in result_indexes:
            #fs.write(text[index]+"\t")
            fs.write("\n")
    fs.close()
    print "-----------------预测完毕-------------------"
def cal_sc_optim(model_file_name,data_file_name,result_save_path):
     '''输入标签,模型,待预测的文本,词典,以及词分词用的符号
     返回的是一个预测标签与得分,如果是二分类,返回的是直接得分,如果为多分类,返回的是经过计算的综合分数。
     '''
     #local_fun = measure.local_f(local_fun)
     #y,x = ctmutil.cons_pro_for_svm(lab,text.strip().split(str_splitTag),dic_list,local_fun,global_weight)
     """
	svm_load_model(model_file_name) -> model
	
     Load a LIBSVM model from model_file_name and return.
	"""
     print "-----------------正在加载训练模型-------------------\n"
     model = libsvm.svm_load_model(model_file_name)
     if not model: 
	print("can't open model file %s" % model_file_name)
	model=None
     else:
           model = toPyModel(model)
     print "-----------------正在对样本进行预测-------------------\n"
     for line in open(data_file_name):
	line = line.split(None, 1)
		# In case an instance with all zero features
	if len(line) == 1: line += ['']
	label, features = line
	xi = {}
	for e in features.split():
	    ind, val = e.split(":")
	    xi[int(ind)] = float(val)
	    y= [float(label)]
	    x = xi   
	    p_lab,p_acc,p_sc=tms_svm.predict(y,x,m)  
    #在这里要判定是二分类还是多分类,如果为二分类,返回相应的分数,如果为多分类,则返回预测的标签。
	    label=p_lab[0]
	    sc=tms_svm.classer_value(p_sc[0])
	    fs = file(result_save_path,'w')
	    fs.write(str(label)+"\t"+str(sc)+"\t")
	    #for index in result_indexes:
                    #fs.write(text[index]+"\t")
	    fs.write("\n")
     fs.close()
     print "-----------------预测完毕-------------------"