Python Tagger.cut_pos Exemples, crfseg.Tagger.cut_pos Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : word_test.py Projet : jannson/crfseg

def test4():
    corpus = [u'SCANV网址安全中心(http://scanv.com)是一个综合性的网址安全服务平台。通过网址安全中心，用户可以方便的查询到要访问的网址是否存在恶意行为，同时可以在SCANV中在线举报曝光违法恶意网站。',
            u'工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作',]

    tagger = Tagger()
    for str in corpus:
        print '\nCPPJIEBA:'
        for s in tagger.pos(list(cppjiebapy.cut(str))):
            print s[0],'/',s[1], " ",

        print '\nCRF:'
        for s in tagger.cut_pos(str):
            print s[0],'/',s[1], " ",
        print '\n'

Exemple #2

0

Afficher le fichier

def test4():
    corpus = [
        u'SCANV网址安全中心(http://scanv.com)是一个综合性的网址安全服务平台。通过网址安全中心，用户可以方便的查询到要访问的网址是否存在恶意行为，同时可以在SCANV中在线举报曝光违法恶意网站。',
        u'工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作',
    ]

    tagger = Tagger()
    for str in corpus:
        print '\nCPPJIEBA:'
        for s in tagger.pos(list(cppjiebapy.cut(str))):
            print s[0], '/', s[1], " ",

        print '\nCRF:'
        for s in tagger.cut_pos(str):
            print s[0], '/', s[1], " ",
        print '\n'

Exemple #3

0

Afficher le fichier

Fichier : similary.py Projet : TPLink32/nlp

def mksogou_(num):
    copus_sample = '/opt/projects/packages/sogou_corpus/Sample'
    print 'processing ', num
    tagger = Tagger()
    fout = codecs.open('sogou_out%d'%num, 'w', 'utf-8')
    with open('sogou_%d'%num) as fw:
        for f in fw:
            with open(f.replace('\n', '')) as file:
                for line in file:
                    content = line.decode('gb2312', 'ignore').encode('utf-8').decode('utf-8', 'replace')
                    line = []
                    for word_pos in tagger.cut_pos(content):
                        m = en_word_re.match(word_pos[0])
                        if m:
                            thew = m.group()+word_pos[1].strip()
                            line.append(thew)
                    if len(line) > 3:
                        fout.write(u' '.join(line)+u'\n')