def process(parse_result):
    '''
    desc: This function use to process the word segmentation
    params: the instance returned by function--parser()
    return: None
    date: 2013-10-30
    '''
    input_file = parse_result.source_file.strip()
    # The default outputfile to store the results
    output_file = "segmented.temp"
    p_flag=parse_result.p_flag
    if parse_result.dest_file is not None:
        output_file = parse_result.dest_file.strip()    
    if os.path.isfile(input_file): 
        print "Start......\n"
        # The key method to process the words segmentation
        tms.tms_segment(input_file,out_filename=output_file)
        print "第一步处理结果:\n"
        time.sleep(1)
        # Create a configuration file to store the trainning parameters
        if p_flag is not None:
            with open('../data/model/post.config','w') as f1_config:
                f1_config.write("-----Configuration-----"+"\n")
                f1_config.close()
        else :
            pass
        # Store the the method of word segments 
        with open('../data/model/post.config','a+') as f_config:
            lines=f_config.readlines()
            for line in lines: 
                if "WordSeg:" in line:         
                    break
                if line == lines[-1]:
                    f_config.write("WordSeg:"+str(1).strip()+"\n")
        if output_file == "segmented.temp":
            print "分词结果为:\n"
            time.sleep(1)
            with open(output_file,'r') as f:
                print f.read()        
        else:
            print "分词结果保存为: %s\n"%output_file
    else:
        print "输入文件不存在!\n"
Example #2
0
def process(parse_result):
    '''
    desc: This function use to process the word segmentation
    params: the instance returned by function--parser()
    return: None
    date: 2013-10-30
    '''
    input_file = parse_result.source_file.strip()
    # The default outputfile to store the results
    output_file = "segmented.temp"
    p_flag = parse_result.p_flag
    if parse_result.dest_file is not None:
        output_file = parse_result.dest_file.strip()
    if os.path.isfile(input_file):
        print "Start......\n"
        # The key method to process the words segmentation
        tms.tms_segment(input_file, out_filename=output_file)
        print "第一步处理结果:\n"
        time.sleep(1)
        # Create a configuration file to store the trainning parameters
        if p_flag is not None:
            with open('../data/model/post.config', 'w') as f1_config:
                f1_config.write("-----Configuration-----" + "\n")
                f1_config.close()
        else:
            pass
        # Store the the method of word segments
        with open('../data/model/post.config', 'a+') as f_config:
            lines = f_config.readlines()
            for line in lines:
                if "WordSeg:" in line:
                    break
                if line == lines[-1]:
                    f_config.write("WordSeg:" + str(1).strip() + "\n")
        if output_file == "segmented.temp":
            print "分词结果为:\n"
            time.sleep(1)
            with open(output_file, 'r') as f:
                print f.read()
        else:
            print "分词结果保存为: %s\n" % output_file
    else:
        print "输入文件不存在!\n"
# -*- coding: utf-8 -*-
import tms
import datetime
import time


def timediff(timestart, timestop):
    t = timestop - timestart
    time_day = t.days
    s_time = t.seconds
    ms_time = t.microseconds / 1000000
    usedtime = int(s_time + ms_time)
    time_hour = usedtime / 60 / 60
    time_minute = (usedtime - time_hour * 3600) / 60
    time_second = usedtime - time_hour * 3600 - time_minute * 60
    time_micsecond = (t.microseconds - t.microseconds / 1000000) / 1000
    retstr = "%d天%d小时%d分%d秒%d毫秒" % (time_day, time_hour, time_minute, time_second, time_micsecond)
    return retstr


if __name__ == "__main__":
    beginTime = datetime.datetime.now()
    tms.tms_train("../twitterData/twitterTrain.txt", main_save_path="./", seg=1, global_fun="idf")
    tms.tms_segment("../twitterData/twitterTestData.txt", [1], "../twitterData/twitterTestData1.txt", "^", "\t", 1)
    tms.tms_predict(
        "../twitterData/twitterTestData1.txt", "./model/tms.config", result_save_path="../twitterData/tms.result"
    )
    endTime = datetime.datetime.now()
    d = timediff(beginTime, endTime)
    print d
Example #4
0
#-*- coding: utf-8 -*-
import tms
import datetime
import time

def timediff(timestart, timestop):
    t  = (timestop-timestart)
    time_day = t.days
    s_time = t.seconds
    ms_time = t.microseconds / 1000000
    usedtime = int(s_time + ms_time)
    time_hour = usedtime / 60 / 60
    time_minute = (usedtime - time_hour * 3600 ) / 60
    time_second =  usedtime - time_hour * 3600 - time_minute * 60
    time_micsecond = (t.microseconds - t.microseconds / 1000000) / 1000
    retstr = "%d天%d小时%d分%d秒%d毫秒"  %(time_day, time_hour, time_minute, time_second, time_micsecond)
    return retstr

if __name__ == '__main__':
    beginTime = datetime.datetime.now()
    tms.tms_train("../twitterData/twitterTrain.txt",main_save_path="./",seg=1,global_fun ='idf')
    tms.tms_segment("../twitterData/twitterTestData.txt",[1],"../twitterData/twitterTestData1.txt","^","\t",1)
    tms.tms_predict("../twitterData/twitterTestData1.txt","./model/tms.config",result_save_path="../twitterData/tms.result")
    endTime = datetime.datetime.now()
    d = timediff(beginTime , endTime)
    print d