Example #1
0
import tms
tms.tms_train("../data/Traindata_noseg.txt",main_save_path="./",seg=1)
tms.tms_predict("../data/Testdata.txt","./model/tms.config",result_save_path="./tms.result")
def predicting(filename, configfile, savepath):
    tms.tms_predict(filename, configfile, result_save_path=savepath, seg=1)
# -*- coding: utf-8 -*-
import tms
import datetime
import time


def timediff(timestart, timestop):
    t = timestop - timestart
    time_day = t.days
    s_time = t.seconds
    ms_time = t.microseconds / 1000000
    usedtime = int(s_time + ms_time)
    time_hour = usedtime / 60 / 60
    time_minute = (usedtime - time_hour * 3600) / 60
    time_second = usedtime - time_hour * 3600 - time_minute * 60
    time_micsecond = (t.microseconds - t.microseconds / 1000000) / 1000
    retstr = "%d天%d小时%d分%d秒%d毫秒" % (time_day, time_hour, time_minute, time_second, time_micsecond)
    return retstr


if __name__ == "__main__":
    beginTime = datetime.datetime.now()
    tms.tms_train("../twitterData/twitterTrain.txt", main_save_path="./", seg=1, global_fun="idf")
    tms.tms_segment("../twitterData/twitterTestData.txt", [1], "../twitterData/twitterTestData1.txt", "^", "\t", 1)
    tms.tms_predict(
        "../twitterData/twitterTestData1.txt", "./model/tms.config", result_save_path="../twitterData/tms.result"
    )
    endTime = datetime.datetime.now()
    d = timediff(beginTime, endTime)
    print d
Example #4
0
#!/usr/bin/python

import sys
sys.path.insert(
    0,
    "D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\src")
sys.path.insert(
    0, "D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\")
import tms
#_*_ coding: utf-8 _*_
tms.tms_train("D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\ad.txt",\
              main_save_path="./data/",\
              seg=1,indexes=[2],\
              str_splitTag=" ",\
              stopword_filename="./chinese_stopword.txt",\
              ratio=1)

tms.tms_predict("D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\adtrain3.txt","D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\model\\tms.config",\
                result_save_path="./data/pre.result",\
                seg=1,\
                indexes=[2])

tms.tms_analysis(".\\data\\pre.result")
Example #5
0
#-*- coding: utf-8 -*-
import tms
import datetime
import time

def timediff(timestart, timestop):
    t  = (timestop-timestart)
    time_day = t.days
    s_time = t.seconds
    ms_time = t.microseconds / 1000000
    usedtime = int(s_time + ms_time)
    time_hour = usedtime / 60 / 60
    time_minute = (usedtime - time_hour * 3600 ) / 60
    time_second =  usedtime - time_hour * 3600 - time_minute * 60
    time_micsecond = (t.microseconds - t.microseconds / 1000000) / 1000
    retstr = "%d天%d小时%d分%d秒%d毫秒"  %(time_day, time_hour, time_minute, time_second, time_micsecond)
    return retstr

if __name__ == '__main__':
    beginTime = datetime.datetime.now()
    tms.tms_train("../twitterData/twitterTrain.txt",main_save_path="./",seg=1,global_fun ='idf')
    tms.tms_segment("../twitterData/twitterTestData.txt",[1],"../twitterData/twitterTestData1.txt","^","\t",1)
    tms.tms_predict("../twitterData/twitterTestData1.txt","./model/tms.config",result_save_path="../twitterData/tms.result")
    endTime = datetime.datetime.now()
    d = timediff(beginTime , endTime)
    print d
Example #6
0
def predicting(filename, configfile, savepath):
    tms.tms_predict(filename, configfile, result_save_path=savepath, seg=1)
Example #7
0
import tms
tms.tms_train("../data/Traindata_noseg.txt", main_save_path="./", seg=1)
tms.tms_predict("../data/Testdata.txt",
                "./model/tms.config",
                result_save_path="./tms.result")
    fout=open("before_predict.txt","wt")
    for i in MicroBlog.objects:
        print >>fout,i.mid.encode('utf-8'),"	".encode('utf-8'),\
              i.content.encode('utf-8'),"	".encode('utf-8'),\
              i.n_likes+i.n_forwards+i.n_comments,"	".encode('utf-8'),\
              i.created,"	".encode('utf-8'),\
              i.content.encode('utf-8')
    fout.close()

    import tms
#tms.tms_segment("before_predict.txt",out_filename="before_predict_seg.txt",\
#              seg=1,indexes=[1],str_splitTag=" ",)
    tms.tms_predict("before_predict.txt","D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\model\\tms.config",\
                    result_save_path="predicted.txt",\
                    seg=1,str_splitTag=" ",\
                    indexes=[1],\
                    result_indexes=[0,2,3,4])
                    #result_indexes=[0,1,2])

    filterout=open("predicted.txt","r")
    afterfilter=open("filter.csv","wt")
    print >>afterfilter,"mid,focus,created,content"
    lines=filterout.readlines()
    for i in lines:
        list=i.split()
        label=list[0]
        mic_id=list[2]
        mic_focus=list[3]
        mic_created=list[4]+" "+list[5]
        list_length = len(list)
    fout = open("before_predict.txt", "wt")
    for i in MicroBlog.objects:
        print >>fout,i.mid.encode('utf-8'),"	".encode('utf-8'),\
              i.content.encode('utf-8'),"	".encode('utf-8'),\
              i.n_likes+i.n_forwards+i.n_comments,"	".encode('utf-8'),\
              i.created,"	".encode('utf-8'),\
              i.content.encode('utf-8')
    fout.close()

    import tms
    #tms.tms_segment("before_predict.txt",out_filename="before_predict_seg.txt",\
    #              seg=1,indexes=[1],str_splitTag=" ",)
    tms.tms_predict("before_predict.txt","D:\\09Limited_buffer\\earlywarningbyci\\classification\\trainmodel\\data\\model\\tms.config",\
                    result_save_path="predicted.txt",\
                    seg=1,str_splitTag=" ",\
                    indexes=[1],\
                    result_indexes=[0,2,3,4])
    #result_indexes=[0,1,2])

    filterout = open("predicted.txt", "r")
    afterfilter = open("filter.csv", "wt")
    print >> afterfilter, "mid,focus,created,content"
    lines = filterout.readlines()
    for i in lines:
        list = i.split()
        label = list[0]
        mic_id = list[2]
        mic_focus = list[3]
        mic_created = list[4] + " " + list[5]
        list_length = len(list)