import tms tms.tms_train("../data/Traindata_noseg.txt",main_save_path="./",seg=1) tms.tms_predict("../data/Testdata.txt","./model/tms.config",result_save_path="./tms.result")
def predicting(filename, configfile, savepath): tms.tms_predict(filename, configfile, result_save_path=savepath, seg=1)
# -*- coding: utf-8 -*- import tms import datetime import time def timediff(timestart, timestop): t = timestop - timestart time_day = t.days s_time = t.seconds ms_time = t.microseconds / 1000000 usedtime = int(s_time + ms_time) time_hour = usedtime / 60 / 60 time_minute = (usedtime - time_hour * 3600) / 60 time_second = usedtime - time_hour * 3600 - time_minute * 60 time_micsecond = (t.microseconds - t.microseconds / 1000000) / 1000 retstr = "%d天%d小时%d分%d秒%d毫秒" % (time_day, time_hour, time_minute, time_second, time_micsecond) return retstr if __name__ == "__main__": beginTime = datetime.datetime.now() tms.tms_train("../twitterData/twitterTrain.txt", main_save_path="./", seg=1, global_fun="idf") tms.tms_segment("../twitterData/twitterTestData.txt", [1], "../twitterData/twitterTestData1.txt", "^", "\t", 1) tms.tms_predict( "../twitterData/twitterTestData1.txt", "./model/tms.config", result_save_path="../twitterData/tms.result" ) endTime = datetime.datetime.now() d = timediff(beginTime, endTime) print d
#!/usr/bin/python import sys sys.path.insert( 0, "D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\src") sys.path.insert( 0, "D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\") import tms #_*_ coding: utf-8 _*_ tms.tms_train("D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\ad.txt",\ main_save_path="./data/",\ seg=1,indexes=[2],\ str_splitTag=" ",\ stopword_filename="./chinese_stopword.txt",\ ratio=1) tms.tms_predict("D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\adtrain3.txt","D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\model\\tms.config",\ result_save_path="./data/pre.result",\ seg=1,\ indexes=[2]) tms.tms_analysis(".\\data\\pre.result")
#-*- coding: utf-8 -*- import tms import datetime import time def timediff(timestart, timestop): t = (timestop-timestart) time_day = t.days s_time = t.seconds ms_time = t.microseconds / 1000000 usedtime = int(s_time + ms_time) time_hour = usedtime / 60 / 60 time_minute = (usedtime - time_hour * 3600 ) / 60 time_second = usedtime - time_hour * 3600 - time_minute * 60 time_micsecond = (t.microseconds - t.microseconds / 1000000) / 1000 retstr = "%d天%d小时%d分%d秒%d毫秒" %(time_day, time_hour, time_minute, time_second, time_micsecond) return retstr if __name__ == '__main__': beginTime = datetime.datetime.now() tms.tms_train("../twitterData/twitterTrain.txt",main_save_path="./",seg=1,global_fun ='idf') tms.tms_segment("../twitterData/twitterTestData.txt",[1],"../twitterData/twitterTestData1.txt","^","\t",1) tms.tms_predict("../twitterData/twitterTestData1.txt","./model/tms.config",result_save_path="../twitterData/tms.result") endTime = datetime.datetime.now() d = timediff(beginTime , endTime) print d
import tms tms.tms_train("../data/Traindata_noseg.txt", main_save_path="./", seg=1) tms.tms_predict("../data/Testdata.txt", "./model/tms.config", result_save_path="./tms.result")
fout=open("before_predict.txt","wt") for i in MicroBlog.objects: print >>fout,i.mid.encode('utf-8')," ".encode('utf-8'),\ i.content.encode('utf-8')," ".encode('utf-8'),\ i.n_likes+i.n_forwards+i.n_comments," ".encode('utf-8'),\ i.created," ".encode('utf-8'),\ i.content.encode('utf-8') fout.close() import tms #tms.tms_segment("before_predict.txt",out_filename="before_predict_seg.txt",\ # seg=1,indexes=[1],str_splitTag=" ",) tms.tms_predict("before_predict.txt","D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\model\\tms.config",\ result_save_path="predicted.txt",\ seg=1,str_splitTag=" ",\ indexes=[1],\ result_indexes=[0,2,3,4]) #result_indexes=[0,1,2]) filterout=open("predicted.txt","r") afterfilter=open("filter.csv","wt") print >>afterfilter,"mid,focus,created,content" lines=filterout.readlines() for i in lines: list=i.split() label=list[0] mic_id=list[2] mic_focus=list[3] mic_created=list[4]+" "+list[5] list_length = len(list)
fout = open("before_predict.txt", "wt") for i in MicroBlog.objects: print >>fout,i.mid.encode('utf-8')," ".encode('utf-8'),\ i.content.encode('utf-8')," ".encode('utf-8'),\ i.n_likes+i.n_forwards+i.n_comments," ".encode('utf-8'),\ i.created," ".encode('utf-8'),\ i.content.encode('utf-8') fout.close() import tms #tms.tms_segment("before_predict.txt",out_filename="before_predict_seg.txt",\ # seg=1,indexes=[1],str_splitTag=" ",) tms.tms_predict("before_predict.txt","D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\model\\tms.config",\ result_save_path="predicted.txt",\ seg=1,str_splitTag=" ",\ indexes=[1],\ result_indexes=[0,2,3,4]) #result_indexes=[0,1,2]) filterout = open("predicted.txt", "r") afterfilter = open("filter.csv", "wt") print >> afterfilter, "mid,focus,created,content" lines = filterout.readlines() for i in lines: list = i.split() label = list[0] mic_id = list[2] mic_focus = list[3] mic_created = list[4] + " " + list[5] list_length = len(list)