Exemple #1
0
testtesttest#!/usr/bin/python
# -*- coding:utf-8 -*-

from __future__ import unicode_literals # compatible with python3 unicode

import deepnlp
deepnlp.download('ner')  # download the NER pretrained models from github if installed from pip

from deepnlp import ner_tagger
tagger = ner_tagger.load_model(name = 'zh')    # Base LSTM Based Model
tagger.load_dict("zh_o2o")                     # Change to other dict

text = "北京 望京 最好吃 的 黑椒 牛排 在哪里"
words = text.split(" ")

# Use the prefix dict and merging function to combine separated words
tagging = tagger._predict_ner_tags_dict(words, merge = True)
print ("DEBUG: NER tagger zh_o2o dictionary")
for (w,t) in tagging:
    pair = w + "/" + t
    print (pair)

#北京/city
#望京/area
#最好吃/nt
#的/nt
#黑椒牛排/dish
#在哪里/nt


# Word Sense Disambuguition
#coding:utf-8
from __future__ import unicode_literals

import deepnlp
deepnlp.download(
    'pos'
)  # download the POS pretrained models from github if installed from pip

from deepnlp import pos_tagger
tagger = pos_tagger.load_model(
    lang='en')  # Loading English model, lang code 'en'

#Segmentation
text = "I want to see a funny movie"
words = text.split(" ")
print(" ".join(words).encode('utf-8'))

#POS Tagging
tagging = tagger.predict(words)
for (w, t) in tagging:
    str = w + "/" + t
    print(str.encode('utf-8'))

#Results
#I/nn
#want/vb
#to/to
#see/vb
#a/at
#funny/jj
#movie/nn
Exemple #3
0
#coding:utf-8
from __future__ import unicode_literals # compatible with python3 unicode

import sys,os
import codecs

import deepnlp
deepnlp.download('segment')   # download all the required pretrained models from github if installed from pip
deepnlp.download('pos')       
deepnlp.download('ner')

from deepnlp import pipeline
p = pipeline.load_model('zh')

# concatenate tuples into one string "w1/t1 w2/t2 ..."
def _concat_tuples(tagging):
  TOKEN_BLANK = " "
  wl = [] # wordlist
  for (x, y) in tagging:
    wl.append(x + "/" + y) # unicode
  concat_str = TOKEN_BLANK.join(wl)
  return concat_str

# input file
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
docs = []
file = codecs.open(os.path.join(BASE_DIR, 'docs_test.txt'), 'r', encoding='utf-8')
for line in file:
    line = line.replace("\n", "").replace("\r", "")
    docs.append(line)
Exemple #4
0
#coding=utf-8
from __future__ import unicode_literals

import tensorflow as tf
import deepnlp

# Download module and domain-specific model
deepnlp.download(module='segment', name='zh_entertainment')
deepnlp.download(module='pos', name='en')
deepnlp.download(module='ner', name='zh_o2o')

# Download module
deepnlp.download('segment')
deepnlp.download('pos')
deepnlp.download('ner')
deepnlp.download('parse')

# deepnlp.download()

## 测试 load model
from deepnlp import segmenter
try:
    tokenizer = segmenter.load_model(name='zh')
    tokenizer = segmenter.load_model(name='zh_o2o')
    tokenizer = segmenter.load_model(name='zh_entertainment')
except Exception as e:
    print("DEBUG: ERROR Found...")
    print(e)

## pos
from deepnlp import pos_tagger
Exemple #5
0
import deepnlp
deepnlp.download('ner')

from deepnlp import segmenter
from deepnlp import ner_tagger

tokenizer = segmenter.load_model(name='zh')
tagger = ner_tagger.load_model(name='zh')

file_r = open('./data/person_resume.txt', 'r')
file_w = open('./data/person_resume02.txt', 'w')

for line in file_r:
    text = line.rstrip()
    words = tokenizer.seg(text)
    tagging = tagger.predict(words)
    for (w, t) in tagging:
        pair = w + ' ' + t
        file_w.write(pair)

file_w.close()
file_r.close()
Exemple #6
0
                       .::::.
                     .::::::::.
                    :::::::::::
                ..:::::::::::'
             '::::::::::::'
                .::::::::::
           '::::::::::::::..
                ..::::::::::::.
             ``::::::::::::::::
               ::::``:::::::::'        .:::.
              ::::'   ':::::'       .::::::::.
            .::::'      ::::     .:::::::'::::.
           .:::'       :::::  .:::::::::' ':::::.
          .::'        :::::.:::::::::'      ':::::.
         .::'         ::::::::::::::'         ``::::.
     ...:::           ::::::::::::'              ``::.
    ```` ':.          ':::::::::'                  ::::..
                       '.:::::'                    ':'````..
"""
#coding=utf-8
import deepnlp
# Download all the modules
deepnlp.download()
#coding=utf-8
from deepnlp import segmenter

tokenizer = segmenter.load_model(name = 'zh_entertainment')
text = "我刚刚在浙江卫视看了电视剧老九门,觉得陈伟霆很帅"
segList = tokenizer.seg(text)
text_seg = " ".join(segList)
Exemple #7
0
#coding:utf-8
from __future__ import unicode_literals

import deepnlp
deepnlp.download(module='pos',name='en')                     # download the POS pretrained models from github if installed from pip

from deepnlp import pos_tagger
tagger = pos_tagger.load_model(name = 'en')  # Loading English model, lang code 'en'

#Segmentation
text = "I want to see a funny movie"
words = text.split(" ")
print (" ".join(words))

#POS Tagging
tagging = tagger.predict(words)
for (w,t) in tagging:
    pair = w + "/" + t
    print (pair)

#Results
#I/nn
#want/vb
#to/to
#see/vb
#a/at
#funny/jj
#movie/nn
Exemple #8
0
#!/usr/bin/python
# -*- coding:utf-8 -*-

import deepnlp

from deepnlp import segmenter
try:
    deepnlp.download("segment", "zh_finance")
except Exception as e:
    print (e)

deepnlp.register_model("segment", "zh_finance")
deepnlp.download("segment", "zh_finance")
try:
    seg_tagger = segmenter.load_model("zh_finance")
except Exception as e:
    print (e)

from deepnlp import pos_tagger
try:
    deepnlp.download("pos", "zh_finance")
except Exception as e:
    print (e)

deepnlp.register_model("pos", "zh_finance")
deepnlp.download("pos", "zh_finance")
try:
    pos_tagger.load_model("zh_finance")
except Exception as e:
    print (e)