예제 #1
0
class pyltp_model():
    def __init__(self, LTP_DATA_DIR='/Users/didi/Desktop/ltp_data_v3.4.0'):
        cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')
        pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')
        ner_model_path = os.path.join(
            LTP_DATA_DIR, 'ner.model')  # 命名实体识别模型路径,模型名称为`pos.model`
        self.segmentor = Segmentor()  # 初始化实例
        self.postagger = Postagger()  # 初始化实例
        self.recognizer = NamedEntityRecognizer()  # 初始化实例

        self.segmentor.load(cws_model_path)  # 加载模型
        self.postagger.load(pos_model_path)  # 加载模型
        self.recognizer.load(ner_model_path)  # 加载模型

    def token(self, sentence):
        words = self.segmentor.segment(sentence)  # 分词
        words = list(words)
        postags = self.postagger.postag(words)  # 词性标注
        postags = list(postags)
        netags = self.recognizer.recognize(words, postags)  # 命名实体识别
        netags = list(netags)
        result = []
        for i, j in zip(words, netags):
            if j in ['S-Nh', 'S-Ni', 'S-Ns']:
                result.append(j)
                continue
            result.append(i)
        return result

    def close(self):
        self.segmentor.release()
        self.postagger.release()
        self.recognizer.release()  # 释放模型
예제 #2
0
def ltp_ner_data():
    """使用 LTP 进行命名实体识别"""
    LTP_DATA_DIR = 'D:\BaiduNetdiskDownload\ltp_data_v3.4.0'  # ltp模型目录的路径
    ner_model_path = os.path.join(LTP_DATA_DIR,
                                  'ner.model')  # 命名实体识别模型路径,模型名称为`pos.model`

    from pyltp import NamedEntityRecognizer
    recognizer = NamedEntityRecognizer()  # 初始化实例
    recognizer.load(ner_model_path)  # 加载模型

    result = []
    file = [(const.qc_train_pos, const.qc_train_ner),
            (const.qc_test_pos, const.qc_test_ner)]
    for i in range(2):
        with open(file[i][0], 'r', encoding='utf-8') as f:
            for line in f.readlines():
                attr = line.strip().split('\t')
                words_pos = attr[1].split(" ")
                words = [word.split('/_')[0] for word in words_pos]
                postags = [word.split('/_')[1] for word in words_pos]
                netags = recognizer.recognize(words, postags)  # 命名实体识别
                res = ' '.join([
                    "{}/_{}".format(words[i], netags[i])
                    for i in range(len(words))
                ])
                result.append("{}\t{}\n".format(attr[0], res))
        with open(file[i][1], 'w', encoding='utf-8') as f:
            f.writelines(result)
        result.clear()
    recognizer.release()  # 释放模型
def get_ner_list(words_list, postag_list):

    ner = NamedEntityRecognizer()
    ner.load(ner_model_path)
    ner_list = list(ner.recognize(words_list, postag_list))
    ner.release()
    return ner_list
예제 #4
0
def ner(words, postags):

    #    print('命名实体开始!')
    recognizer = NamedEntityRecognizer()
    recognizer.load('D:\\ltp_data\\ner.model')  #加载模型
    netags = recognizer.recognize(words, postags)  #命名实体识别
    for word, ntag in zip(words, netags):
        pass


#        print(word+'/'+ ntag)

#        while(ntag == "B-Ni" or ntag == "I-Ni" or ntag=="E-Ni"):
#            ntag_company1.append(word)
#            if(ntag=="E-Ni"):
#                break
##
##
#        while(ntag == "B-Ni" or ntag == "I-Ni" or ntag=="E-Ni"):
#            ntag_company2.append(word)
#            if(ntag=="E-Ni"):
#                break

    recognizer.release()  #释放模型
    nerttags = list(netags)
    nerwords = list(words)

    return nerttags, nerwords
예제 #5
0
def name_recognition(words, postags):
    """
    命名实体识别
    :param words:分词
    :param postags:标注
    :return:
    """
    recognizer = NamedEntityRecognizer()  # 初始化实例
    recognizer.load(
        'D:/Program Files/ltp-models/3.3.1/ltp-data-v3.3.1/ltp_data/ner.model'
    )  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别

    # 地名标签为 ns
    result = ''
    for i in range(0, len(netags)):
        if i < len(words) - 2:
            if 's' in netags[i]:
                if 'O' in netags[
                        i + 1] and words[i + 1] != ',' and words[i + 1] != ',':
                    if 's' in netags[i + 2]:
                        result += words[i] + words[i + 1] + words[i + 2] + " "
    print
    result
    # for word, ntag in zip(words, netags):
    #     print word + '/' + ntag
    recognizer.release()  # 释放模型
    return netags
예제 #6
0
class LTP_word():
    """docstring for parser_word
    deal处理文本,返回词表、词性及依存关系,语义,命名实体五个值
    release释放缓存"""
    def __init__(self, model_path):
        self.model_path = model_path
        self.segmentor = Segmentor()  # 分词初始化实例
        self.segmentor.load_with_lexicon(path.join(self.model_path, 'cws.model'), path.join(self.model_path, 'dictionary_kfc.txt'))
        self.postagger = Postagger() # 词性标注初始化实例
        self.postagger.load(path.join(self.model_path, 'pos.model') ) # 加载模型
        self.recognizer = NamedEntityRecognizer() # 命名实体识别初始化实例
        self.recognizer.load(path.join(self.model_path, 'ner.model'))
        self.parser = Parser() # 依存句法初始化实例 s
        self.parser.load(path.join(self.model_path, 'parser.model'))  # 加载模型
        self.labeller = SementicRoleLabeller() # 语义角色标注初始化实例
        self.labeller.load(path.join(self.model_path, 'srl'))
    def deal (self, text):  #把所有该要使用的东西都提取出来
        words =self.segmentor.segment(text)    # 分词 
        postags = self.postagger.postag(words)  # 词性标注
        netags = self.recognizer.recognize(words, postags)	#命名实体
        arcs = self.parser.parse(words, postags)  # 句法分析
        roles = self.labeller.label(words, postags, netags, arcs)  # 语义角色标注
        return words,postags,arcs,roles,netags
    def release(self):
        self.segmentor.release()
        self.postagger.release()
        self.recognizer.release()
        self.parser.release()
        self.labeller.release()
예제 #7
0
class Parse_Util(object):
    def __init__(self, lexicon_path='./data/lexicon'):
        # 分词
        self.segmentor = Segmentor()
        # self.segmentor.load_with_lexicon(cws_model_path, lexicon_path)
        self.segmentor.load(cws_model_path)
        # 词性标注
        self.postagger = Postagger()
        self.postagger.load(pos_model_path)
        # 依存句法分析
        self.parser = Parser()
        self.parser.load(par_model_path)
        # 命名实体识别
        self.recognizer = NamedEntityRecognizer()
        self.recognizer.load(ner_model_path)
        # jieba 分词
        # jieba.load_userdict(lexicon_path)

    def __del__(self):
        self.segmentor.release()
        self.postagger.release()
        self.recognizer.release()
        self.parser.release()

    # 解析句子
    def parse_sentence(self, sentence):
        words = self.segmentor.segment(sentence)
        postags = self.postagger.postag(words)
        netags = self.recognizer.recognize(words, postags)
        arcs = self.parser.parse(words, postags)
        # child_dict_list = ParseUtil.build_parse_child_dict(words, arcs)

        return words, postags, netags, arcs
def namedEntityRecognize(sentence):
    '''
        使用pyltp模块进行命名实体识别
        返回:1)命名实体和类别元组列表、2)实体类别列表
    '''
    namedEntityTagTupleList = []

    segmentor = Segmentor()
    # segmentor.load(inout.getLTPPath(index.CWS))
    segmentor.load_with_lexicon(inout.getLTPPath(index.CWS),
                                inout.getResourcePath('userDic.txt'))
    words = segmentor.segment(sentence)
    segmentor.release()
    postagger = Postagger()
    postagger.load(inout.getLTPPath(index.POS))
    postags = postagger.postag(words)
    postagger.release()
    recognizer = NamedEntityRecognizer()
    recognizer.load(inout.getLTPPath(index.NER))
    netags = recognizer.recognize(words, postags)
    recognizer.release()

    # 封装成元组形式
    for word, netag in zip(words, netags):
        namedEntityTagTupleList.append((word, netag))

    neTagList = '\t'.join(netags).split('\t')

    return namedEntityTagTupleList, neTagList
예제 #9
0
def segmentsentence(sentence):
    segmentor = Segmentor()
    postagger = Postagger()
    parser = Parser()
    recognizer = NamedEntityRecognizer()

    segmentor.load("./ltpdata/ltp_data_v3.4.0/cws.model")
    postagger.load("./ltpdata/ltp_data_v3.4.0/pos.model")
    # parser.load("./ltpdata/ltp_data_v3.4.0/parser.model")
    recognizer.load("./ltpdata/ltp_data_v3.4.0/ner.model")
    #############
    word_list = segmentor.segment(sentence)
    postags_list = postagger.postag(word_list)
    nertags = recognizer.recognize(word_list, postags_list)
    ############
    for word, ntag in zip(word_list, nertags):
        if ntag == 'Nh':
            entity_list.append(word)
    print(" ".join(word_list))
    print(' '.join(nertags))
    ############
    segmentor.release()
    postagger.release()
    # parser.release()
    recognizer.release()
    return word_list
예제 #10
0
def name_recognition(words, postags):
    '''
    命名实体识别
    :param words:分词结果
    :param postags:标注结果
    :return:
    '''
    recognizer = NamedEntityRecognizer()
    #初始化实例
    recognizer.load('E:\\NLP-homework\\ltp-data-v3.3.1\\ltp_data\\ner.model')
    #模型加载
    netags = recognizer.recognize(words, postags)
    #识别命名实体

    result = ''
    for i in range(0, len(netags)):
        if i < len(words) - 2:
            if 's' in netags[i]:
                if 'O' in netags[
                        i + 1] and words[i + 1] != '' and words[i + 1] != ',':
                    if 's' in netags[i + 2]:
                        result += words[i] + words[i + 1] + words[i + 2] + ""
    print(result)
    # for word, ntag in zip(words, netags):
    #     print word + '/' + ntag
    recognizer.release()
    return netags
예제 #11
0
    def ltp_word(self):
        """创建一个方法,用来进行句子的分词、词性分析等处理。"""
        # 分词
        segmentor = Segmentor()
        segmentor.load(os.path.join(MODELDIR, "cws.model"))
        words = segmentor.segment(self.content)
        #print("*************分词*****************")
        #print("\t".join(words))

        # 词性标注
        postagger = Postagger()
        postagger.load(os.path.join(MODELDIR, "pos.model"))
        postags = postagger.postag(words)
        #print("*************词性标注*************")
        #print(type(postags))
        #print("\t".join(postags))

        # 依存句法分析
        parser = Parser()
        parser.load(os.path.join(MODELDIR, "parser.model"))
        arcs = parser.parse(words, postags)
        #print("*************依存句法分析*************")
        #print(type(arcs))
        #print("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))

        # 把依存句法分析结果的head和relation分离出来
        arcs_head = []
        arcs_relation = []
        for arc in arcs:
            arcs_head.append(arc.head)
            arcs_relation.append(arc.relation)

        # 命名实体识别
        recognizer = NamedEntityRecognizer()
        recognizer.load(os.path.join(MODELDIR, "ner.model"))
        netags = recognizer.recognize(words, postags)
        #print("*************命名实体识别*************")
        #print("\t".join(netags))
        """
        # 语义角色标注
        labeller = SementicRoleLabeller()
        labeller.load(os.path.join(MODELDIR, "pisrl.model"))
        roles = labeller.label(words, postags, arcs)
        print("*************语义角色标注*************")
        for role in roles:
            print(role.index, "".join(
                ["%s:(%d,%d)" % (arg.name, arg.range.start, arg.range.end) for arg in role.arguments]))
        """

        segmentor.release()
        postagger.release()
        parser.release()
        recognizer.release()
        #labeller.release()

        # 调用list_conversion函数,把处理结果列表化
        words_result = list_conversion(words, postags, netags, arcs_head,
                                       arcs_relation)

        return words_result
예제 #12
0
파일: hit_ltp.py 프로젝트: we1l1n/ch2sql
 def entity_recognize(cutting_list, tagging_list):
     ner_model_path = os.path.join(LtpParser.ltp_path, 'ner.model')
     from pyltp import NamedEntityRecognizer
     recognizer = NamedEntityRecognizer()
     recognizer.load(ner_model_path)
     ne_tags = recognizer.recognize(cutting_list, tagging_list)
     recognizer.release()
     return ne_tags
예제 #13
0
def ner(words, postags):
    recognizer = NamedEntityRecognizer()  # 初始化实例
    recognizer.load(os.path.join(LTP_DATA_DIR, 'ner.model'))  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    for word, ntag in zip(words, netags):
        print(word + '/' + ntag)
    recognizer.release()  # 释放模型
    return netags
예제 #14
0
def get_ner(words, postags):
    """ ltp 命名实体识别 """
    ner_model_path = os.path.join(LTP_TOP_DIR, 'ner.model')
    recognizer = NamedEntityRecognizer()
    recognizer.load(ner_model_path)
    netags = recognizer.recognize(words, postags)
    recognizer.release()
    return list(netags)
def ltp_name_entity_recognizer(LTP_DATA_DIR, words, postags):
    # 命名实体识别模型路径,模型名称为`ner.model`
    ner_model_path = os.path.join(LTP_DATA_DIR, 'ner.model')
    recognizer = NamedEntityRecognizer()  # 初始化实例
    recognizer.load(ner_model_path)  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    recognizer.release()  # 释放模型
    return netags
예제 #16
0
파일: yu06.py 프로젝트: Minggggggggg/nlp
def ner(words, postags):
    recognizer = NamedEntityRecognizer() # 初始化实例
    recognizer.load('/Users/chenming/Spyder/3.3.1/ltp_data/ner.model')  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    for word, ntag in zip(words, netags):
        print (word + '/' + ntag)
    recognizer.release()  # 释放模型
    return netags
예제 #17
0
def ner(words, postags):
    recognizer = NamedEntityRecognizer()  # 初始化实例
    recognizer.load('../ltp_data/ner.model')  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    # for word,tag in zip(words,netags):
    #     print word+'/'+tag
    recognizer.release()  # 释放模型
    return netags
예제 #18
0
def e_recognize(words, postags):
    recognizer = NamedEntityRecognizer()  # 初始化实例
    recognizer.load(ner_model_path)  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    #for word, ntag in zip(words, netags):
    #print(word + '/' + ntag)
    recognizer.release()  # 释放模型
    return netags
예제 #19
0
 def get_ner(self, word_list, postag_list, model):
     recognizer = NamedEntityRecognizer()
     recognizer.load(model)
     netags = recognizer.recognize(word_list, postag_list)  # 命名实体识别
     # for word, ntag in zip(word_list, netags):
     #     print(word + '/' + ntag)
     recognizer.release()  # 释放模型
     return list(netags)
예제 #20
0
    def segment(self, texts, use_tag_filter=True):
        # 初始化实例
        # global word_list, netags, postags, relation, heads
        words = []
        pos = []
        ner = []
        rel = []
        hea = []

        segmentor = Segmentor()
        segmentor.load_with_lexicon(self.cws_model_path, './dict/user_recg.dic')  # 加载模型,参数是自定义词典的文件路径  self.dic_list

        postagger = Postagger()
        postagger.load(self.pos_model_path)

        recognizer = NamedEntityRecognizer()
        recognizer.load(self.ner_model_path)

        parser = Parser()
        parser.load(self.pas_model_path)

        for text in texts:
            text = text.lower()

            word_list = segmentor.segment(text)
            word_list = [word for word in word_list if len(word) > 1]
            # word_list = [word for word in word_list if re.match("[\u0041-\u005a\u4e00-\u9fa5]+", word) != None]  # .decode('utf8') 保留中英文
            word_list = [word.strip() for word in word_list if word.strip() not in self.stop_words]  # 去除停用词

            # 词性标注
            posttags = postagger.postag(word_list)
            postags = list(posttags)

            # NER识别
            netags = recognizer.recognize(word_list, postags)

            # 句法分析
            arcs = parser.parse(word_list, postags)
            rely_id = [arc.head for arc in arcs]  # 提取依存父节点id
            relation = [arc.relation for arc in arcs]  # 提取依存关系
            heads = ['Root' if id == 0 else word_list[id - 1] for id in rely_id]  # 匹配依存父节点词语

            if use_tag_filter:
                dic = dict(zip(word_list, postags))
                word_list = [x for x in dic.keys() if dic[x] in self.tags_filter]

            words.append(word_list)
            pos.append(postags)
            ner.append(netags)
            rel.append(relation)
            hea.append(heads)

        segmentor.release()
        postagger.release()
        recognizer.release()
        parser.release()

        return words, pos, ner, rel, hea
예제 #21
0
파일: nlp.py 프로젝트: lgxt/web-crawler
def pyltp_ner(text):  # 识别机构名-pyltp
    LTP_DATA_DIR = Path.cwd().parent / 'ltp_model'  # ltp模型存放路径
    cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')
    # 分词
    segmentor = Segmentor()  # 初始化实例
    segmentor.load(cws_model_path)  # 加载模型
    words = segmentor.segment(text)  # 分词
    words_list = list(words)  # words_list列表保存着分词的结果
    segmentor.release()  # 释放模型

    # 词性标注
    pos_model_path = os.path.join(LTP_DATA_DIR,
                                  'pos.model')  # 词性标注模型路径,模型名称为`pos.model`
    postagger = Postagger()  # 初始化实例
    postagger.load(pos_model_path)  # 加载模型
    postags = postagger.postag(words)  # 词性标注
    postags_list = list(postags)  # postags_list保存着词性标注的结果
    postagger.release()  # 释放模型

    # 命名体识别
    ner_model_path = os.path.join(LTP_DATA_DIR,
                                  'ner.model')  # 命名实体识别模型路径,模型名称为`pos.model`
    recognizer = NamedEntityRecognizer()  # 初始化实例
    recognizer.load(ner_model_path)  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    netags_list = list(netags)  # netags_list保存着命名实体识别的结果
    data = {"reg": netags, "words": words, "tags": postags}
    # print(data)
    recognizer.release()  # 释放模型

    # 去除非命名实体
    a = len(words_list)
    words_list_1 = []
    postags_list_1 = []
    netags_list_1 = []
    for i in range(a):
        if netags_list[i] != 'O':
            words_list_1.append(words_list[i])
            postags_list_1.append(postags_list[i])
            netags_list_1.append(netags_list[i])

    # 提取机构名
    a1 = len(words_list_1)
    organizations = []
    for i in range(a1):
        if netags_list_1[i] == 'S-Ni':
            organizations.append(words_list_1[i])
        elif netags_list_1[i] == 'B-Ni':
            temp_s = ""
            temp_s += words_list_1[i]
            j = i + 1
            while j < a1 and (netags_list_1[j] == 'I-Ni'
                              or netags_list_1[j] == 'E-Ni'):
                temp_s += words_list_1[j]
                j = j + 1
            organizations.append(temp_s)
    orignizations = list(set(organizations))  # 对公司名去重
    return orignizations
def get_name_entity(sentence):
    recognizer = NamedEntityRecognizer()  # 初始化实例
    recognizer.load(ner_model_path)  # 加载模型
    words = list(pyltp_cut(sentence))  # 结巴分词
    postags = list(postagger.postag(words))  # 词性标注
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    tmp = [str(k + 1) + '-' + v for k, v in enumerate(netags)]
    print('\t'.join(tmp))
    recognizer.release()  # 释放模型
def ner(words, postags):
    recognizer = NamedEntityRecognizer() # 初始化实例
    recognizer.load(ner_model_path)  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    word_neg=[]
    for word,tag in zip(words,netags):
        word_neg.append((word,tag))
    recognizer.release()  # 释放模型
    return netags,word_neg
예제 #24
0
def ner(words, postags):
    recognizer = NamedEntityRecognizer() # 初始化实例
    model = "ner.model"
    recognizer.load(os.path.join(modelPath, model))  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    for word, ntag in zip(words, netags):
        print word + '/' + ntag
    recognizer.release()  # 释放模型
    return netags
예제 #25
0
    def net(self, word, post):
        # 命名实体识别
        recognizer = NamedEntityRecognizer()  # 初始化实例
        recognizer.load(self.ner_model_file)  # 加载模型
        netags = recognizer.recognize(word, post)  # 命名实体识别
        netags_list = list(netags)  # netags_list保存着命名实体识别的结果
        recognizer.release()  # 释放模型

        return netags_list
예제 #26
0
 def get_netags(self, words):
     # 命名实体识别
     postags = self.get_postags(words)
     recognizer = NamedEntityRecognizer()  # 初始化实例
     recognizer.load(self.ner_model_path)  # 加载模型
     netags = recognizer.recognize(list(words), list(postags))  # 命名实体识别
     print('\t'.join(netags))
     recognizer.release()  # 释放模型
     return list(netags)
예제 #27
0
   def recog_entity(request):
        biaodian=["。","?","!",",","、",";",':','“', '”','’','‘','(',')','【','】','{','}','[',']','——','……', '.','—','·','<','>','《','》','_____']
        print("lkdjfsdjf")
        st=request.POST.get('user_text')
        # 分句操作
        sents = SentenceSplitter.split(st)  # 分句
        str1 = '<br>'.join(sents)

        # 分词操作
        LTP_DATA_DIR = './itpmodel'  # ltp模型目录的路径
        cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')  # 分词模型路径,模型名称为`cws.model`
        segmentor = Segmentor()  # 初始化实例
        segmentor.load(cws_model_path)  # 加载模型
        words = segmentor.segment(st)  # 分词
        segmentor.release()  # 释放模型

        #词性标注
        pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')  # 词性标注模型路径,模型名称为`pos.model`
        postagger = Postagger()  # 初始化实例
        postagger.load(pos_model_path)  # 加载模型
        postags = postagger.postag(words)  # 词性标注
        postagger.release()  # 释放模型

        #命名实体识别
        # -*- coding: utf-8 -*-
        ner_model_path = os.path.join(LTP_DATA_DIR, 'ner.model')  # 命名实体识别模型路径,模型名称为`pos.model`
        recognizer = NamedEntityRecognizer()  # 初始化实例
        recognizer.load(ner_model_path)  # 加载模型
        netags = recognizer.recognize(words, postags)  # 命名实体识别
        str3='\t'.join(netags)
        recognizer.release()  # 释放模型

        s=""
        str2=""
        i=0
        j=0
        count=0
        wor=list(words)
        net=list(netags)
        #重组输入信息并添加链接(链接还未实现)
        while i < len(wor):
            if  net[i]!= 'O':
                s =s+ " <strong><small style='color:#02B7FE'>" + wor[i] + "</small></strong> "
            else:
                s =s+ wor[i]
            i+=1

        #处理分词结果
        while j < len(wor):
            if wor[j] in biaodian:
                j=j+1
            else:
                str2 =str2+"<h5 style='color:#FEB154;display: inline'>"+wor[j]+"</h5>"+"&nbsp&nbsp&nbsp"
                j=j+1
                count+=1
        return render_to_response('index.html', {'st':st,'rlt':s,"seg_word": "共计"+str(count)+"个:<br/>"+str2})
예제 #28
0
파일: test.py 프로젝트: WillSin/HIT
def recognize_word(words):
    """命名实体识别"""
    from pyltp import NamedEntityRecognizer
    recognizer = NamedEntityRecognizer()
    recognizer.load("../ltp/ner.model")

    postags = ['nh', 'r', 'r', 'v']
    netags = recognizer.recognize(['元芳', '你', '怎么', '看'], postags)
    print('\t'.join(netags))
    recognizer.release()
예제 #29
0
def ner(words, postags):
    print('命名实体开始!')
    recognizer = NamedEntityRecognizer()
    recognizer.load(r'D:\Corpus\ltp_data_v3.4.0\ner.model')  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    for word, ntag in zip(words, netags):
        print(word + '/' + ntag)
    recognizer.release()  # 释放模型
    nerttags = list(netags)
    return nerttags
예제 #30
0
def ner(words, postags):
    print('命名实体开始')
    recognizer = NamedEntityRecognizer()
    recognizer.load(ner_model_path)  #加载模型
    netags = recognizer.recognize(words, postags)  #命名实体识别
    for word, ntag in zip(words, netags):
        print(word + '/' + ntag)
    recognizer.release()  #释放模型
    nerttags = list(netags)
    return nerttags
예제 #31
0
    def __init__(self):
        self.cws_model_path = os.path.join(self.LTP_DATA_DIR, 'cws.model')  # 分词模型路径,模型名称为`cws.model`
        self.pos_model_path = os.path.join(self.LTP_DATA_DIR, 'pos.model')  # 词性标注模型路径,模型名称为`pos.model`
        self.ner_model_path = os.path.join(self.LTP_DATA_DIR, 'ner.model')  # 命名实体识别模型路径,模型名称为`pos.model`
        segmentor = Segmentor()
        segmentor.load(self.cws_model_path)
        self.words = segmentor.segment(data)
        # print("|".join(words))
        segmentor.release()


        postagger = Postagger() # 初始化实例
        postagger.load(self.pos_model_path)  # 加载模型
        self.postags = postagger.postag(self.words)  # 词性标注
        # print('\t'.join(postags))
        postagger.release()  # 释放模型


        recognizer = NamedEntityRecognizer() # 初始化实例
        recognizer.load(self.ner_model_path)  # 加载模型
        self.netags = recognizer.recognize(self.words, self.postags)  # 命名实体识别
        # print('\t'.join(netags))
        recognizer.release()  # 释放模型
예제 #32
0
파일: example.py 프로젝트: FrankBlood/pyltp
postagger.load(os.path.join(MODELDIR, "pos.model"))
postags = postagger.postag(words)
# list-of-string parameter is support in 0.1.5
# postags = postagger.postag(["中国","进出口","银行","与","中国银行","加强","合作"])
print "\t".join(postags)

parser = Parser()
parser.load(os.path.join(MODELDIR, "parser.model"))
arcs = parser.parse(words, postags)

print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)

recognizer = NamedEntityRecognizer()
recognizer.load(os.path.join(MODELDIR, "ner.model"))
netags = recognizer.recognize(words, postags)
print "\t".join(netags)

labeller = SementicRoleLabeller()
labeller.load(os.path.join(MODELDIR, "srl/"))
roles = labeller.label(words, postags, netags, arcs)

for role in roles:
    print role.index, "".join(
            ["%s:(%d,%d)" % (arg.name, arg.range.start, arg.range.end) for arg in role.arguments])

segmentor.release()
postagger.release()
parser.release()
recognizer.release()
labeller.release()