Python LineSpliter 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: decoder

메소드/함수: LineSpliter

hotexamples.com에서의 예제들: 3

Python LineSpliter - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 decoder.LineSpliter에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def lineproc(id, text):
    global word_dict_root
    global grouptree

    grouptree.StartCountGroup()
    line = weibo_bot.RemoveWeiboRubbish(text)
    if len(line) == 0:
        return None
    spliter = decoder.LineSpliter(word_dict_root)
    spliter.SplitLine(line)
    spliter.AfterProcess()
    words = spliter.found_word
    signwordpos.ProcessSentence(words)
    grouptree.ProcessOneLine(words)
    return grouptree.group_count

예제 #2

파일 보기

파일: decoder_server.py 프로젝트: ckj1234/chinese_decode

    def RequestWork(self, params, body):
        if params.get('zip'):
            body = gzip.GzipFile(fileobj=StringIO(body), mode='r').read()
        if isinstance(body, unicode) == False and 'encode' in params:
            body = body.decode(params['encode'])

        text_pice = re.split(u"[\s!?,。；，：“ ”（ ）、？《》·]+", body)
        text_list = []
        for tp in text_pice:
            tp = tp.strip()
            if len(tp) > 0:
                text_list.append(tp)

        result_text_list = []
        for tp in text_list:
            spliter = decoder.LineSpliter(self.word_dict_root)
            spliter.SplitLine(tp)
            spliter.AfterProcess()
            words = spliter.found_word
            self.signwordpos.ProcessSentence(words)
            #self.grouptree.ProcessOneLine(words)
            """for word in words:
                groupstr=None
                if word.info:
                    groups=word.info.get('group')
                    if groups:
                        groupstr=','.join(groups)"""
            word_list = []
            for word in words:
                word_list.append({
                    'pos': word.pos,
                    'txt': word.word,
                    'type': word.word_type_list,
                    'nocn': word.is_no_cn
                })
            result_text_list.append({'pice': tp, 'words': word_list})

        outbuf = StringIO()
        json.dump(result_text_list, gzip.GzipFile(fileobj=outbuf, mode='w'))
        return {'zip': True}, outbuf.getvalue()

예제 #3

파일 보기

파일: count_word_freq.py 프로젝트: ckj1234/chinese_decode

#-*-coding:utf-8-*-
import sqlite3
import codecs
import json
import decoder
import gzip

if __name__ == '__main__':
    """
    使用新浪新闻来测试词频 上一步是 fetch_hudongbaike/fetch_sina_news.py
    """
    dbtext = sqlite3.connect("../fetch_hudongbaike/data/sina_news.db")

    dc = dbtext.cursor()
    dc.execute('select content from sina_news where content is not null')

    word_dic = {}
    word_dict_root = decoder.LoadDefaultWordDic()
    for content, in dc:
        spliter = decoder.LineSpliter(word_dict_root)
        spliter.SplitLine(content)
        spliter.CheckCantantPre()
        spliter.CheckTail()
        for word in spliter.found_word:
            if word.is_no_cn:
                continue
            word_dic[word.word] = word_dic.get(word.word, 0) + 1

    fp = gzip.open('data/dictbase/word_freq.txt.gz', 'w')
    json.dump(word_dic, fp)
    fp.close()