def main(): usage = "usage: %prog [options] filename config_path version= %prog 1.0" parser = OptionParser(usage=usage) parser.add_option("-i", "--indexes", dest="indexes", action="callback", type="string", default=[0], callback=list_callback) parser.add_option( "-r", "--result_indexes", dest="result_indexes", action="callback", type="string", default=[0], callback=list_callback, help="specify the content indexes that output with the predicted score" ) parser.add_option("-R", "--result_save", dest="result_save") parser.add_option("-e", "--segment", type="choice", dest="segment", default=0, choices=[0, 1, 2]) parser.add_option("-T", "--tc_splitTag", dest="tc_splitTag", type="string", default="\t") parser.add_option("-S", "--str_splitTag", dest="str_splitTag", type="string", default="^") options, args = parser.parse_args() if options.indexes: indexes = [int(i) for i in options.indexes] if options.result_indexes: result_indexes = [int(i) for i in options.result_indexes] # if options.save_main_path: # if os.path.exists(options.save_main_path): # if os.path.exists(options.save_main_path+"result/") is False: # os.mkdir(options.save_main_path+"result/") # result_save_path = options.save_main_path+"result/score.result" # else: result_save_path =os.getcwd()+"/score.result" filename = args[0] config_path = args[1] predict_model.ctm_predict(args[0], config_path, indexes, options.result_save, result_indexes, options.str_splitTag, options.tc_splitTag, options.segment)
def tms_predict(filename,config_file,result_save_path="../tms.result",indexes=[1],result_indexes=[0],str_splitTag="^",tc_splitTag="\t",seg=0,delete=False,change_decode=False,in_decode="GBK",out_encode="UTF-8"): '''模型预测程序.输入需要预测的文件,以及模型的配置文件,既可利用已经训练好的模型对文件进行预测。 必须参数: filename:带预测文件的路径以及名称 config_file:已经训练好的模型的配置文件 可选参数: result_save_path:预测结果保存路径及名称。默认为"../tms.result" indexes:预测文件中需要预测的字段。默认为[1] result_indexes:需要和预测结果一起输出的源文件中的字段。默认为[0] str_splitTag 分词所用的分割符号 ,默认"^" seg:是否进行分词。seg=0表示不对源文件进行分词,seg=1代表使用pymmseg进行分词。seg=2代表使用aliws进行分词 delete:代表是否要把所有特征都为0样本删除。默认在预测时候不删除。 change_decode:是否要进行编码转换,预测的样本要和训练的样本编码保持一致。默认不转换 in_decode:如果要进行编码转换,原先的编码符号。默认gbk. out_encode:需要转换的编码,默认为utf-8 ''' predict_model.ctm_predict(filename,config_file,indexes,result_save_path,result_indexes,str_splitTag,tc_splitTag,seg,delete=False,change_decode=False,in_decode="UTF-8",out_encode="GBK")
def main(): usage="usage: %prog [options] filename config_path version= %prog 1.0" parser = OptionParser(usage=usage) parser.add_option("-i","--indexes",dest="indexes",action="callback",type="string",default=[0],callback=list_callback) parser.add_option("-r","--result_indexes",dest="result_indexes",action="callback",type="string",default=[0],callback=list_callback,help="specify the content indexes that output with the predicted score") parser.add_option("-R","--result_save",dest="result_save") parser.add_option("-e","--segment",type="choice",dest="segment",default=0,choices=[0,1,2]) parser.add_option("-T","--tc_splitTag",dest="tc_splitTag",type="string",default="\t") parser.add_option("-S","--str_splitTag",dest="str_splitTag",type="string",default="^") options, args = parser.parse_args() if options.indexes : indexes =[int(i) for i in options.indexes] if options.result_indexes: result_indexes =[int(i) for i in options.result_indexes] # if options.save_main_path: # if os.path.exists(options.save_main_path): # if os.path.exists(options.save_main_path+"result/") is False: # os.mkdir(options.save_main_path+"result/") # result_save_path = options.save_main_path+"result/score.result" # else: result_save_path =os.getcwd()+"/score.result" filename = args[0] config_path = args[1] predict_model.ctm_predict(args[0], config_path, indexes, options.result_save, result_indexes, options.str_splitTag,options.tc_splitTag,options.segment)