def process(parse_result): ''' desc: This function use to process the word segmentation params: the instance returned by function--parser() return: None date: 2013-10-30 ''' input_file = parse_result.source_file.strip() # The default outputfile to store the results output_file = "segmented.temp" p_flag=parse_result.p_flag if parse_result.dest_file is not None: output_file = parse_result.dest_file.strip() if os.path.isfile(input_file): print "Start......\n" # The key method to process the words segmentation tms.tms_segment(input_file,out_filename=output_file) print "第一步处理结果:\n" time.sleep(1) # Create a configuration file to store the trainning parameters if p_flag is not None: with open('../data/model/post.config','w') as f1_config: f1_config.write("-----Configuration-----"+"\n") f1_config.close() else : pass # Store the the method of word segments with open('../data/model/post.config','a+') as f_config: lines=f_config.readlines() for line in lines: if "WordSeg:" in line: break if line == lines[-1]: f_config.write("WordSeg:"+str(1).strip()+"\n") if output_file == "segmented.temp": print "分词结果为:\n" time.sleep(1) with open(output_file,'r') as f: print f.read() else: print "分词结果保存为: %s\n"%output_file else: print "输入文件不存在!\n"
def process(parse_result): ''' desc: This function use to process the word segmentation params: the instance returned by function--parser() return: None date: 2013-10-30 ''' input_file = parse_result.source_file.strip() # The default outputfile to store the results output_file = "segmented.temp" p_flag = parse_result.p_flag if parse_result.dest_file is not None: output_file = parse_result.dest_file.strip() if os.path.isfile(input_file): print "Start......\n" # The key method to process the words segmentation tms.tms_segment(input_file, out_filename=output_file) print "第一步处理结果:\n" time.sleep(1) # Create a configuration file to store the trainning parameters if p_flag is not None: with open('../data/model/post.config', 'w') as f1_config: f1_config.write("-----Configuration-----" + "\n") f1_config.close() else: pass # Store the the method of word segments with open('../data/model/post.config', 'a+') as f_config: lines = f_config.readlines() for line in lines: if "WordSeg:" in line: break if line == lines[-1]: f_config.write("WordSeg:" + str(1).strip() + "\n") if output_file == "segmented.temp": print "分词结果为:\n" time.sleep(1) with open(output_file, 'r') as f: print f.read() else: print "分词结果保存为: %s\n" % output_file else: print "输入文件不存在!\n"
# -*- coding: utf-8 -*- import tms import datetime import time def timediff(timestart, timestop): t = timestop - timestart time_day = t.days s_time = t.seconds ms_time = t.microseconds / 1000000 usedtime = int(s_time + ms_time) time_hour = usedtime / 60 / 60 time_minute = (usedtime - time_hour * 3600) / 60 time_second = usedtime - time_hour * 3600 - time_minute * 60 time_micsecond = (t.microseconds - t.microseconds / 1000000) / 1000 retstr = "%d天%d小时%d分%d秒%d毫秒" % (time_day, time_hour, time_minute, time_second, time_micsecond) return retstr if __name__ == "__main__": beginTime = datetime.datetime.now() tms.tms_train("../twitterData/twitterTrain.txt", main_save_path="./", seg=1, global_fun="idf") tms.tms_segment("../twitterData/twitterTestData.txt", [1], "../twitterData/twitterTestData1.txt", "^", "\t", 1) tms.tms_predict( "../twitterData/twitterTestData1.txt", "./model/tms.config", result_save_path="../twitterData/tms.result" ) endTime = datetime.datetime.now() d = timediff(beginTime, endTime) print d
#-*- coding: utf-8 -*- import tms import datetime import time def timediff(timestart, timestop): t = (timestop-timestart) time_day = t.days s_time = t.seconds ms_time = t.microseconds / 1000000 usedtime = int(s_time + ms_time) time_hour = usedtime / 60 / 60 time_minute = (usedtime - time_hour * 3600 ) / 60 time_second = usedtime - time_hour * 3600 - time_minute * 60 time_micsecond = (t.microseconds - t.microseconds / 1000000) / 1000 retstr = "%d天%d小时%d分%d秒%d毫秒" %(time_day, time_hour, time_minute, time_second, time_micsecond) return retstr if __name__ == '__main__': beginTime = datetime.datetime.now() tms.tms_train("../twitterData/twitterTrain.txt",main_save_path="./",seg=1,global_fun ='idf') tms.tms_segment("../twitterData/twitterTestData.txt",[1],"../twitterData/twitterTestData1.txt","^","\t",1) tms.tms_predict("../twitterData/twitterTestData1.txt","./model/tms.config",result_save_path="../twitterData/tms.result") endTime = datetime.datetime.now() d = timediff(beginTime , endTime) print d