Exemple #1
0
 def __init__(self, kg, env):
     self.kg = kg
     # self.input_bucket = input_bucket
     # self.output_bucket = output_bucket
     self.trie = marisa_trie.Trie(list(kg.entity_industry))
     self.vocab = Vocab(env['GRAPH_BUCKET'], env['KG_VOCAB_KEY'])
     self.model = FastHan()
Exemple #2
0
def get_info(sentence, n=-1):
    '''
    人物与观点的信息提取
    sentence: 待提取观点的句子
    answer: 字典,key为人物,value为任务相应的观点
    '''
    s_simp = to_simplified(sentence)  #繁转简
    sim_words = load_similar_words(n)  #载入'说'的近义词

    #依存关系分析
    model = FastHan()
    dep_answer = model(s_simp, target="Parsing")
    dep_tree = sen2tree(dep_answer)

    #命名实体识别
    ner = model(s_simp, target="NER")
    ner_word = [ner[0][i].word for i in range(len(ner[0]))]
    ner_ind = [i for i in range(len(dep_tree)) if dep_tree[i].word in ner_word]

    #信息提取
    answer = {}
    for node in dep_tree:
        if node.pos == 'VV' and node.word in sim_words:
            speaker, point = extrat_info(dep_tree, node, ner_ind)
            answer[speaker] = point

    return answer
Exemple #3
0
    def test_call(self):
        
        sentence=['一行人下得山来,走不多时,忽听前面猛兽大吼之声一阵阵的传来。',
        '韩宝驹一提缰,胯下黄马向前窜出,奔了一阵,忽地立定,不论如何催迫,黄马只是不动。',
        '韩宝驹心知有异,远远望去,只见前面围了一群人,有几头猎豹在地上乱抓乱扒。'
        '他知坐骑害怕豹子,跃下马来,抽出金龙鞭握在手中。']

        targets=['CWS','POS','NER','Parsing']
        model = FastHan('large')
        for target in targets:
            model(sentence,target)
Exemple #4
0
 def __init__(self, model_name, ip=None):
     config = Config()
     self.model_name = model_name
     if self.model_name == "fasthan":
         self.nltk_model = FastHan(model_type="base")
     if self.model_name == "stanford":
         path = config.project_dir
         self.stanford_model = StanfordCoreNLP(os.path.join(path, 'model\stanford-corenlp-full-2016-10-31'),
                                               lang='zh')
     if self.model_name == "bbc":
         if not ip:
             raise ValueError("bbc模型必须填入ip")
         self.bbc_model = BertClient(ip, ner_model_dir=None, show_server_config=False, check_version=False,
             check_length=False, mode='NER')
def reprocess(path, file):
    model_fastHan = FastHan(model_type='large')
    result = {}
    with open(os.path.join(path, file), 'r', encoding='utf-8') as f:
        data = json.load(f)
        cnt = 0
        for k, v in data.items():
            try:
                if v['is_process'] == False:
                    print(k, v['sentence'])
                    cnt += 1
                    subjects, tuples, simplified_tuples = extract_subjects(
                        v['sentence'], model_fastHan)
                    v['subjects'] = subjects
                    v['tuples'] = tuples
                    v['simplified_tuples'] = simplified_tuples
                    v['is_process'] = False
                    result[k] = v
            except:
                pass
        print(cnt)
    with open(os.path.join(path, 'new_{}'.format(file)), 'w',
              encoding='utf-8') as f:
        json.dump(result, f)
Exemple #6
0
import sys
import io
from fastHan import FastHan
sys.stdout= io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')#改变标准输出的默认编码
model = FastHan()

sentance =sys.argv[1]

answer = model(sentance,target="Parsing")

str = ''
for i,sentance in enumerate(answer):
    for token in sentance:
        # print(token.word)
        str = str + token.word + ','
print(str)
Exemple #7
0
    def test_init(self):
        # 测试是否可以正确initialize

        model = FastHan()

        model = FastHan('large')
Exemple #8
0
 def __init__(self, kg):
     self.kg = kg
     self.trie = marisa_trie.Trie(list(kg.entity_industry))
     self.vocab = Vocab()
     self.model = FastHan()
Exemple #9
0
                next_word, _, _, next_pos = tuples[j]
                if next_pos in ['DEC', 'DEV']:
                    subject = subject + next_word
                    subj_pos = '+'.join([subj_pos, next_pos])
                break
            subjects.append([subject, subj_pos])
        position += len(word)
    return subjects, tuples, simplified_tuples

def write_data(data, index):
	with open('static/data/data_{}.json'.format(index), 'w') as f:
		data = json.dumps(data, ensure_ascii=False)
		f.write(data)

if __name__ == '__main__':
	model_fastHan = FastHan()
	result = {}
	with open('0subject_rand5000.txt', 'r', encoding='utf-8') as f:
	    cnt = 0
	    temp = enumerate(f.readlines())
	    for index, line in tqdm(temp):
	    	line = line.strip()
	    	subjects, tuples, simplified_tuples = extract_subjects(line.split('::')[1], model_fastHan)
	    	result[index] = {
	    		'id': str(index),
	    		'is_process': False,
	    		'tokens': [],
	    		'type': None,
	    		'validity': None,
	    		'subjects': subjects,
	    		'tuples': tuples,
Exemple #10
0
def fasthan_cws():
    model = FastHan()
    sentence = "郭靖是金庸笔下的一名男主。"
    result = model(sentence, 'CWS', use_dict=False)
    print(result)
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: 
"""

from fastHan import FastHan

model = FastHan()
sentence = "郭靖是金庸笔下的一名男主。"
answer = model(sentence, target="Parsing")
print(answer)
answer = model(sentence, target="NER")
print(answer)

sentence = "一个苹果。"
print(model(sentence, 'CWS'))
model.set_cws_style('cnc')
print(model(sentence, 'CWS'))

sentence = ["我爱踢足球。", "林丹是冠军"]
answer = model(sentence, 'Parsing')
for i, sentence in enumerate(answer):
    print(i)
    for token in sentence:
        print(token, token.pos, token.head, token.head_label)
Exemple #12
0
 def __init__(self):
     from fastHan import FastHan
     self.model = FastHan()