Example #1
0
def locationNER(text):
    #先分词
    segmentor = Segmentor()  # 初始化实例
    segmentor.load(cws_model_path)  # 加载模型
    words = segmentor.segment(text)  # 分词
    #print ('\t'.join(words))
    segmentor.release()

    #再词性标注
    postagger = Postagger() # 初始化实例
    postagger.load(pos_model_path)  # 加载模型
    postags = postagger.postag(words)  # 词性标注
    postagger.release()  # 释放模型

    #最后地理实体识别

    recognizer = NamedEntityRecognizer() # 初始化实例
    recognizer.load(ner_model_path)  # 加载模型
    netags = recognizer.recognize(words, postags)  # 命名实体识别
    for i in range (0,len(netags)):
       if 'I-Ns'in netags[i] or 'I-Ni'in netags[i]:
           results.append(words[i-1]+words[i]+words[i+1])
       if 'S-Ns'in netags[i] or 'S-Ni'in netags[i]:
           results.append(words[i])
    return results
def word_pos():
#ltp词性标注
    candidate=pd.read_csv(r'../data/candidate_sentiment.csv',header=None)
    can_word=candidate[0].tolist()
    # 新加一列存放词性
    candidate.insert(2,'ltp_pos',0)
    candidate.insert(3,'jieba_pos',0)
    candidate.columns=['word','freq','ltp_pos','jieba_pos']

    LTP_DATA_DIR = '../ltp_data_v3.4.0/ltp_data_v3.4.0'  # ltp模型目录的路径
    pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')  # 词性标注模型路径,模型名称为`pos.model`
    
    
    postagger = Postagger() # 初始化实例
    postagger.load(pos_model_path)  # 加载模型   
    postags = postagger.postag(can_word)  # 词性标注   
    postagger.release()  # 释放模型   
    postags=list(postags)
    candidate['ltp_pos']=postags
#jieba词性标注    
    
    jieba_pos=[]
    for index,row in candidate.iterrows():
        s=row['word']
        words=pseg.cut(s)
        pos=[]
        for w in words:
            pos.append(w.flag)
        pos=' '.join(pos)
        jieba_pos.append(pos)
    
    candidate['jieba_pos']=jieba_pos
#    添加表头
    candidate.to_csv(r'../data/candidate_sentiment.csv',index=None)
def cut_words():
    #分词+去除空行
    #词性标注集http://ltp.readthedocs.io/zh_CN/latest/appendix.html
    cont = open('resource_new.txt', 'r', encoding='utf-8')
    f = open('key/cut_resouce.txt', 'w', encoding='utf-8')
    segmentor = Segmentor()  # 初始化实例
    # segmentor.load('cws.model')  # 加载模型,不加载字典
    segmentor.load_with_lexicon('module/cws.model',
                                'userdict.txt')  # 加载模型,加载用户字典
    postagger = Postagger()  # 初始化实例
    postagger.load('module/pos.model')  # 加载模型
    for sentence in cont:
        if sentence.strip() != '':
            words = segmentor.segment(sentence)  # 分词
            pos_tags = postagger.postag(words)  # 词性标注
            for word, tag in zip(words, pos_tags):
                if tag != 'wp':
                    f.write(word)
                else:
                    f.write('\n')
            f.write('\n')
        else:
            continue
    f.close()
    segmentor.release()
    postagger.release()
Example #4
0
def segmentsentence(sentence):
    segmentor = Segmentor()
    postagger = Postagger()
    parser = Parser()
    recognizer = NamedEntityRecognizer()

    segmentor.load("./ltpdata/ltp_data_v3.4.0/cws.model")
    postagger.load("./ltpdata/ltp_data_v3.4.0/pos.model")
    # parser.load("./ltpdata/ltp_data_v3.4.0/parser.model")
    recognizer.load("./ltpdata/ltp_data_v3.4.0/ner.model")
    #############
    word_list = segmentor.segment(sentence)
    postags_list = postagger.postag(word_list)
    nertags = recognizer.recognize(word_list, postags_list)
    ############
    for word, ntag in zip(word_list, nertags):
        if ntag == 'Nh':
            entity_list.append(word)
    print(" ".join(word_list))
    print(' '.join(nertags))
    ############
    segmentor.release()
    postagger.release()
    # parser.release()
    recognizer.release()
    return word_list
Example #5
0
 def get_postags(self, words):
     postagger = Postagger()  # 初始化实例
     postagger.load(self.pos_model_path)  # 加载模型
     postags = postagger.postag(words)  # 词性标注
     print('\t'.join(postags))
     postagger.release()  # 释放模型
     return list(postags)
Example #6
0
class LTP:
    def __init__(self):
        self.segmentor = Segmentor()  # 分词器
        self.segmentor.load_with_lexicon(
            Config.SEGMENTOR_PATH, Config.PERSONAL_SEGMENTOR_PATH)  # 加载模型
        self.postagger = Postagger()  # 词性分析器
        self.postagger.load(Config.POSTAGGER_PATH)  # 加载模型
        self.parser = Parser()  # 句法分析器
        self.recognizer = NamedEntityRecognizer()
        self.recognizer.load(Config.NAMED_ENTITY_RECONGNTION_PATH)
        self.parser.load(Config.PARSER_PATH)  # 加载模型
        self.labeller = SementicRoleLabeller()  # 语义角色分析器
        self.labeller.load(Config.LABELLER_PATH)  # 加载模型
        self.negative_list = get_negative_list()
        self.no_list = get_no_list()
        self.limit_list = get_limit_list()
        self.special_list = get_special_list()
        self.key_sentences = []

    def __del__(self):
        """
        资源释放
        """
        self.segmentor.release()
        self.postagger.release()
        self.parser.release()
        self.labeller.release()
def get_postag_list(words_list):

    postag = Postagger()
    postag.load(pos_model_path)
    postag_list = list(postag.postag(words_list))
    postag.release()
    return postag_list
Example #8
0
class pyltp_model():
    def __init__(self, LTP_DATA_DIR='/Users/didi/Desktop/ltp_data_v3.4.0'):
        cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model')
        pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')
        ner_model_path = os.path.join(
            LTP_DATA_DIR, 'ner.model')  # 命名实体识别模型路径,模型名称为`pos.model`
        self.segmentor = Segmentor()  # 初始化实例
        self.postagger = Postagger()  # 初始化实例
        self.recognizer = NamedEntityRecognizer()  # 初始化实例

        self.segmentor.load(cws_model_path)  # 加载模型
        self.postagger.load(pos_model_path)  # 加载模型
        self.recognizer.load(ner_model_path)  # 加载模型

    def token(self, sentence):
        words = self.segmentor.segment(sentence)  # 分词
        words = list(words)
        postags = self.postagger.postag(words)  # 词性标注
        postags = list(postags)
        netags = self.recognizer.recognize(words, postags)  # 命名实体识别
        netags = list(netags)
        result = []
        for i, j in zip(words, netags):
            if j in ['S-Nh', 'S-Ni', 'S-Ns']:
                result.append(j)
                continue
            result.append(i)
        return result

    def close(self):
        self.segmentor.release()
        self.postagger.release()
        self.recognizer.release()  # 释放模型
Example #9
0
def extract_views(all_sents):
    segmentor = Segmentor()
    segmentor.load(r'/home/student/project-01/ltp_data/cws.model')
    postagger = Postagger()
    postagger.load(r'/home/student/project-01/ltp_data/pos.model')
    parser = Parser()
    parser.load(r'/home/student/project-01/ltp_data/parser.model')
    views_in_sents = []
    for i, sents in enumerate(all_sents):
        views_tmp = []
        for sent in sents:
            sent = sent.replace('\\n', '\n').strip()
            if len(sent) == 0:
                continue
            # words = list(jieba.cut(sent))
            words = list(segmentor.segment(sent))
            contains = contain_candidates(words)
            if len(contains) == 0:
                continue
            tags = list(postagger.postag(words))
            arcs = list(parser.parse(words, tags))
            sbv, head = get_sbv_head(arcs, words, tags)
            if sbv[0] is None or head[0] is None or head[0] not in contains:
                continue
            subj = sbv[0]
            view = clean_view(words[head[1] + 1:])
            views_tmp.append((subj, view, i))
        if len(views_tmp) > 0:
            views_in_sents.append({'sents': sents, 'views': views_tmp})
    segmentor.release()
    postagger.release()
    parser.release()
    return views_in_sents
Example #10
0
def postags_opt(words):
    # Set pyltp postagger model path
    LTP_DATA_DIR = '../ltp_data_v3.4.0'
    pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')

    # Init postagger
    postagger = Postagger()

    # Load model
    postagger.load(pos_model_path)

    # Get postags
    postags = postagger.postag(words)

    # Close postagger
    postagger.release()

    postags = list(postags)

    # Init result list
    saying_words = []

    # Filter with tag 'verb'
    for index, tag in enumerate(postags):
        if tag == 'v':
            saying_words.append(words[index])

    return saying_words
Example #11
0
    def ltp_word(self):
        """创建一个方法,用来进行句子的分词、词性分析等处理。"""
        # 分词
        segmentor = Segmentor()
        segmentor.load(os.path.join(MODELDIR, "cws.model"))
        words = segmentor.segment(self.content)
        #print("*************分词*****************")
        #print("\t".join(words))

        # 词性标注
        postagger = Postagger()
        postagger.load(os.path.join(MODELDIR, "pos.model"))
        postags = postagger.postag(words)
        #print("*************词性标注*************")
        #print(type(postags))
        #print("\t".join(postags))

        # 依存句法分析
        parser = Parser()
        parser.load(os.path.join(MODELDIR, "parser.model"))
        arcs = parser.parse(words, postags)
        #print("*************依存句法分析*************")
        #print(type(arcs))
        #print("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))

        # 把依存句法分析结果的head和relation分离出来
        arcs_head = []
        arcs_relation = []
        for arc in arcs:
            arcs_head.append(arc.head)
            arcs_relation.append(arc.relation)

        # 命名实体识别
        recognizer = NamedEntityRecognizer()
        recognizer.load(os.path.join(MODELDIR, "ner.model"))
        netags = recognizer.recognize(words, postags)
        #print("*************命名实体识别*************")
        #print("\t".join(netags))
        """
        # 语义角色标注
        labeller = SementicRoleLabeller()
        labeller.load(os.path.join(MODELDIR, "pisrl.model"))
        roles = labeller.label(words, postags, arcs)
        print("*************语义角色标注*************")
        for role in roles:
            print(role.index, "".join(
                ["%s:(%d,%d)" % (arg.name, arg.range.start, arg.range.end) for arg in role.arguments]))
        """

        segmentor.release()
        postagger.release()
        parser.release()
        recognizer.release()
        #labeller.release()

        # 调用list_conversion函数,把处理结果列表化
        words_result = list_conversion(words, postags, netags, arcs_head,
                                       arcs_relation)

        return words_result
Example #12
0
 def get_postag_list(self, word_list, model):
     # 得到词性标注
     postag = Postagger()
     postag.load(model)
     postag_list = list(postag.postag(word_list))
     postag.release()
     return postag_list
Example #13
0
class LtpLanguageAnalysis(object):
    def __init__(self, model_dir="/home/xxx/ltp-3.4.0/ltp_data/"):
        self.segmentor = Segmentor()
        self.segmentor.load(os.path.join(model_dir, "cws.model"))
        self.postagger = Postagger()
        self.postagger.load(os.path.join(model_dir, "pos.model"))
        self.parser = Parser()
        self.parser.load(os.path.join(model_dir, "parser.model"))

    def analyze(self, text):
        # 分词
        words = self.segmentor.segment(text)
        print '\t'.join(words)

        # 词性标注
        postags = self.postagger.postag(words)
        print '\t'.join(postags)

        # 句法分析
        arcs = self.parser.parse(words, postags)
        print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)

    def release_model(self):
        # 释放模型
        self.segmentor.release()
        self.postagger.release()
        self.parser.release()
def ltp_pos_data():
    """使用 LTP 进行词性标注"""
    LTP_DATA_DIR = 'D:\BaiduNetdiskDownload\ltp_data_v3.4.0'  # ltp模型目录的路径
    pos_model_path = os.path.join(LTP_DATA_DIR,
                                  'pos.model')  # 词性标注模型路径,模型名称为`pos.model`

    from pyltp import Postagger
    postagger = Postagger()  # 初始化实例
    postagger.load(pos_model_path)  # 加载模型
    result = []
    file = [(const.qc_train_seg, const.qc_train_pos),
            (const.qc_test_seg, const.qc_test_pos)]
    for i in range(2):
        with open(file[i][0], 'r', encoding='utf-8') as f:
            for line in f.readlines():
                attr = line.strip().split('\t')
                words = attr[1].split(" ")
                words_pos = postagger.postag(words)
                res = ' '.join([
                    "{}/_{}".format(words[i], words_pos[i])
                    for i in range(len(words))
                ])
                result.append("{}\t{}\n".format(attr[0], res))
        with open(file[i][1], 'w', encoding='utf-8') as f:
            f.writelines(result)
        result.clear()
    postagger.release()  # 释放模型
Example #15
0
class LTP_word():
    """docstring for parser_word
    deal处理文本,返回词表、词性及依存关系,语义,命名实体五个值
    release释放缓存"""
    def __init__(self, model_path):
        self.model_path = model_path
        self.segmentor = Segmentor()  # 分词初始化实例
        self.segmentor.load_with_lexicon(path.join(self.model_path, 'cws.model'), path.join(self.model_path, 'dictionary_kfc.txt'))
        self.postagger = Postagger() # 词性标注初始化实例
        self.postagger.load(path.join(self.model_path, 'pos.model') ) # 加载模型
        self.recognizer = NamedEntityRecognizer() # 命名实体识别初始化实例
        self.recognizer.load(path.join(self.model_path, 'ner.model'))
        self.parser = Parser() # 依存句法初始化实例 s
        self.parser.load(path.join(self.model_path, 'parser.model'))  # 加载模型
        self.labeller = SementicRoleLabeller() # 语义角色标注初始化实例
        self.labeller.load(path.join(self.model_path, 'srl'))
    def deal (self, text):  #把所有该要使用的东西都提取出来
        words =self.segmentor.segment(text)    # 分词 
        postags = self.postagger.postag(words)  # 词性标注
        netags = self.recognizer.recognize(words, postags)	#命名实体
        arcs = self.parser.parse(words, postags)  # 句法分析
        roles = self.labeller.label(words, postags, netags, arcs)  # 语义角色标注
        return words,postags,arcs,roles,netags
    def release(self):
        self.segmentor.release()
        self.postagger.release()
        self.recognizer.release()
        self.parser.release()
        self.labeller.release()
Example #16
0
class LtpTree(DepTree):
    def __init__(self, dict_path=None):
        super(DepTree, self).__init__()
        print("正在加载LTP模型... ...")
        self.segmentor = Segmentor()
        if dict_path is None:
            self.segmentor.load(os.path.join(MODELDIR, "cws.model"))
        else:
            self.segmentor.load_with_lexicon(os.path.join(MODELDIR, "cws.model"), dict_path)
        self.postagger = Postagger()
        self.postagger.load(os.path.join(MODELDIR, "pos.model"))
        self.parser = Parser()
        self.parser.load(os.path.join(MODELDIR, "parser.model"))
        print("加载模型完毕。")

    def parse(self, sentence):
        self.words = self.segmentor.segment(sentence)
        self.postags = self.postagger.postag(self.words)
        self.arcs = self.parser.parse(self.words, self.postags)
        for i in range(len(self.words)):
            if self.arcs[i].head == 0:
                self.arcs[i].relation = "ROOT"

    def release_model(self):
        # 释放模型
        self.segmentor.release()
        self.postagger.release()
        self.parser.release()
def namedEntityRecognize(sentence):
    '''
        使用pyltp模块进行命名实体识别
        返回:1)命名实体和类别元组列表、2)实体类别列表
    '''
    namedEntityTagTupleList = []

    segmentor = Segmentor()
    # segmentor.load(inout.getLTPPath(index.CWS))
    segmentor.load_with_lexicon(inout.getLTPPath(index.CWS),
                                inout.getResourcePath('userDic.txt'))
    words = segmentor.segment(sentence)
    segmentor.release()
    postagger = Postagger()
    postagger.load(inout.getLTPPath(index.POS))
    postags = postagger.postag(words)
    postagger.release()
    recognizer = NamedEntityRecognizer()
    recognizer.load(inout.getLTPPath(index.NER))
    netags = recognizer.recognize(words, postags)
    recognizer.release()

    # 封装成元组形式
    for word, netag in zip(words, netags):
        namedEntityTagTupleList.append((word, netag))

    neTagList = '\t'.join(netags).split('\t')

    return namedEntityTagTupleList, neTagList
Example #18
0
File: nlp.py Project: 89935/OpenRE
class NLP:
    default_model_dir = 'D:\python-file\knowledge_extraction-master-tyz\\ltp_data_v3.4.0\\'  #LTP模型文件目录

    def __init__(self, model_dir=default_model_dir):
        self.default_model_dir = model_dir

        #词性标注模型
        self.postagger = Postagger()
        postag_flag = self.postagger.load(
            os.path.join(self.default_model_dir, 'pos.model'))

    def get_postag(self, word):
        """获得单个词的词性标注
        Args:
            word:str,单词
        Returns:
            pos_tag:str,该单词的词性标注
        """
        pos_tag = self.postagger.postag([
            word,
        ])
        return pos_tag[0]

    def close(self):
        """
        关闭与释放
        """
        self.postagger.release()
Example #19
0
class Parse_Util(object):
    def __init__(self, lexicon_path='./data/lexicon'):
        # 分词
        self.segmentor = Segmentor()
        # self.segmentor.load_with_lexicon(cws_model_path, lexicon_path)
        self.segmentor.load(cws_model_path)
        # 词性标注
        self.postagger = Postagger()
        self.postagger.load(pos_model_path)
        # 依存句法分析
        self.parser = Parser()
        self.parser.load(par_model_path)
        # 命名实体识别
        self.recognizer = NamedEntityRecognizer()
        self.recognizer.load(ner_model_path)
        # jieba 分词
        # jieba.load_userdict(lexicon_path)

    def __del__(self):
        self.segmentor.release()
        self.postagger.release()
        self.recognizer.release()
        self.parser.release()

    # 解析句子
    def parse_sentence(self, sentence):
        words = self.segmentor.segment(sentence)
        postags = self.postagger.postag(words)
        netags = self.recognizer.recognize(words, postags)
        arcs = self.parser.parse(words, postags)
        # child_dict_list = ParseUtil.build_parse_child_dict(words, arcs)

        return words, postags, netags, arcs
Example #20
0
def test_ltp(document):

    LTP_DATA_DIR = r"D:\anaconda\envs\TF+3.5\Lib\site-packages\pyltp-model"
    # ltp模型目录的路径
    par_model_path = os.path.join(
        LTP_DATA_DIR, 'parser.model')  # 依存句法分析模型路径,模型名称为`parser.model`
    cws_model_path = os.path.join(LTP_DATA_DIR,
                                  'cws.model')  # 分词模型路径,模型名称为`cws.model`
    pos_model_path = os.path.join(LTP_DATA_DIR,
                                  'pos.model')  # 词性标注模型路径,模型名称为`pos.model`

    segmentor = Segmentor()  # 初始化实例
    segmentor.load(cws_model_path)  # 加载模型
    words = segmentor.segment(document)  # 分词
    print("\nA")
    print("分词结果:")
    print('\t'.join(words))
    segmentor.release()  # 释放模型

    postagger = Postagger()  # 初始化实例
    postagger.load(pos_model_path)  # 加载模型
    postags = postagger.postag(words)  # 词性标注
    print("\n")
    print("词性标注结果:")
    print('\t'.join(postags))
    postagger.release()  # 释放模型

    parser = Parser()  # 初始化实例
    parser.load(par_model_path)  # 加载模型
    arcs = parser.parse(words, postags)  # 句法分析
    print("\n")
    print("句法分析结果:")
    print("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))
    parser.release()  # 释放模型
def new_relation_find(words, sentence):
    """ 新关系发现

    :param words:
    :param sentence:
    :return:
    """
    # 存放三元组的字典
    tuple_dict = dict()
    index0 = -1
    index1 = -1
    bool = False
    for entity_word in entity_words:
        if sentence.find(entity_word) != -1:
            if tuple_dict:
                # 返回为true说明有重复部分
                if has_same(tuple_dict[index0], entity_word):
                    continue
                index1 = sentence.find(entity_word)
                tuple_dict[index1] = entity_word
                bool = True
                break
            else:
                index0 = sentence.find(entity_word)
                tuple_dict[index0] = entity_word
    if bool is False:
        return "", "", ""
    # 排序结果为list
    # tuple_dict = sorted(tuple_dict.items(), key=lambda d: d[0])
    words = "/".join(words).split("/")
    for key, value in tuple_dict.items():
        tuple_word = value
        words = init_words(tuple_word, words)
    # 对于已经重构的词进行词标注
    postagger = Postagger()  # 初始化实例
    pos_model_path = os.path.join(LTP_DATA_DIR,
                                  'pos.model')  # 词性标注模型路径,模型名称为`pos.model`
    postagger.load_with_lexicon(pos_model_path, 'data/postagger.txt')  # 加载模型
    postags = postagger.postag(words)  # 词性标注
    print('\t'.join(postags))
    postagger.release()  # 释放模型
    # 发现新关系
    relation_word = ""
    index_word = 0
    for index, postag in enumerate('\t'.join(postags).split('\t')):
        index_word += len(words[index])
        if index_word >= len(sentence):
            break
        if postag == 'v' and index_word - min(index0, index1) <= 2 and max(index0, index1) - index_word <= 2 \
                and not has_same(tuple_dict[index0], words[index]) and not has_same(tuple_dict[index1],
                                                                                    words[index]) \
                and words[index] not in wrong_relation:
            relation_word = words[index]
            break
    if relation_word == "":
        return "", "", ""
    return tuple_dict[min(index0,
                          index1)], tuple_dict[max(index0,
                                                   index1)], relation_word
Example #22
0
class Opinion(object):
    def __init__(self, Dsent, industry_id):
        self.industry_id = industry_id
        self.Dsent = Dsent
        self.postagger = Postagger()  # 初始化实例
        self.postagger.load_with_lexicon(pos_model_path,
                                         '%s/conf/posttags.txt' % dir_path)
        self.sql = mysqls()
        self.opinionword = read_opinion(self.industry_id)
        self.n_v = []

    def cut_word(self, sents):
        # 分词
        words = [i.encode('utf-8', 'ignore')
                 for i in norm_cut(sents)]  # HMM=False
        return words

    def word_sex(self, ):
        # 获取词性
        postags = list(self.postagger.postag(self.words))  # 词性标注
        num = 0
        #副词或者名词后面一个词
        for tag in postags:
            if tag in ['d']:
                if num + 1 < len(postags):
                    if num != 0 and postags[num + 1] in ['n', 'v']:
                        if self.words[num+1] not in self.opinionword \
                            and len(self.words[num + 1].decode('utf-8','ignore')) > 1:
                            self.n_v.append(self.words[num + 1])
            #动词或者n词
            if tag in ['a', 'i', 'b']:
                if self.words[num] not in self.opinionword\
                        and len(self.words[num].decode('utf-8','ignore')) > 1:
                    self.n_v.append(self.words[num])
            num += 1
        return postags

    def prepare(self, ):
        for id, sentences in self.Dsent.items():
            split_sentence = re.split(
                ur'[,,()()、: …~?。!. !?]?',
                sentences.decode('utf-8', 'ignore').strip())
            for sent in split_sentence:
                self.words = self.cut_word(sent.encode('utf-8', 'ignore'))
                self.postags = self.word_sex()
                cword = Counter(self.n_v)

                lresult = heapq.nlargest(500,
                                         cword.items(),
                                         key=lambda x: x[1])
                # lword = []
                # for rg in lresult:
                #     w, n = rg
                #     lword.append(w)
                # self.sql.insert(self.industry_id, lword)
        self.postagger.release()  # 释放模型
        # self.parser.release()  # 释放模型
        # outfile.close()
        return lresult
def ltp_postagger(LTP_DATA_DIR, words):
    # 词性标注模型路径,模型名称为`pos.model`
    pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')
    postagger = Postagger()  # 初始化实例
    postagger.load(pos_model_path)  # 加载模型
    postags = postagger.postag(words)  # 词性标注
    postagger.release()  # 释放模型
    return postags
def posttagger(words):
    postagger = Postagger()  # 初始化实例
    postagger.load(pos_model_path)  # 加载模型
    postags = postagger.postag(words)  # 词性标注
    #for word, tag in zip(words, postags):
    #print(word + '/' + tag)
    postagger.release()  # 释放模型
    return postags
Example #25
0
def posttagger(words):
    postagger = Postagger()  # 初始化实例
    postagger.load(os.path.join(LTP_DATA_DIR, 'pos.model'))  # 加载模型
    postags = postagger.postag(words)  # 词性标注
    for word, tag in zip(words, postags):
        print(word + '/' + tag)
    postagger.release()  # 释放模型
    return postags
Example #26
0
def posttagger(words):
    postagger = Postagger()
    postagger.load('E:\\git\\ltp-data-v3.3.1\\ltp_datapos.model')
    posttags = postagger.postag(words)  #词性标注
    postags = list(posttags)
    postagger.release()  #释放模型
    #print type(postags)
    return postags
Example #27
0
def posttagger(words):
    postagger = Postagger()
    postagger.load(r'D:\Corpus\ltp_data_v3.4.0\pos.model')
    posttags = postagger.postag(words)  # 词性标注
    postags = list(posttags)
    postagger.release()  # 释放模型
    # print type(postags)
    return postags
Example #28
0
 def pos_tagging(cutting_list):
     pos_model_path = os.path.join(LtpParser.ltp_path, 'pos.model')
     from pyltp import Postagger
     pos_tagger = Postagger()
     pos_tagger.load(pos_model_path)
     tags = pos_tagger.postag(cutting_list)
     pos_tagger.release()
     return tags
Example #29
0
def posttagger(words):
    postagger = Postagger()
    postagger.load('D:\\ltp_data\\pos.model')
    posttags = postagger.postag(words)  #词性标注
    postags = list(posttags)
    postagger.release()  #释放模型
    #    print(type(postags))
    return postags
Example #30
0
def posttagger(words):
    postagger = Postagger()  # 初始化实例
    postagger.load(
        r'D:\SUFE\ComputerContest\QASystem\DrQA-CN-master\data\ltp_data_v3.4.0\pos.model'
    )
    postags = postagger.postag(words)  # 词性标注
    postagger.release()
    return postags
Example #31
0
    def __init__(self):
        self.cws_model_path = os.path.join(self.LTP_DATA_DIR, 'cws.model')  # 分词模型路径,模型名称为`cws.model`
        self.pos_model_path = os.path.join(self.LTP_DATA_DIR, 'pos.model')  # 词性标注模型路径,模型名称为`pos.model`
        self.ner_model_path = os.path.join(self.LTP_DATA_DIR, 'ner.model')  # 命名实体识别模型路径,模型名称为`pos.model`
        segmentor = Segmentor()
        segmentor.load(self.cws_model_path)
        self.words = segmentor.segment(data)
        # print("|".join(words))
        segmentor.release()


        postagger = Postagger() # 初始化实例
        postagger.load(self.pos_model_path)  # 加载模型
        self.postags = postagger.postag(self.words)  # 词性标注
        # print('\t'.join(postags))
        postagger.release()  # 释放模型


        recognizer = NamedEntityRecognizer() # 初始化实例
        recognizer.load(self.ner_model_path)  # 加载模型
        self.netags = recognizer.recognize(self.words, self.postags)  # 命名实体识别
        # print('\t'.join(netags))
        recognizer.release()  # 释放模型
Example #32
0
postagger.load(os.path.join(MODELDIR, "pos.model"))
postags = postagger.postag(words)
# list-of-string parameter is support in 0.1.5
# postags = postagger.postag(["中国","进出口","银行","与","中国银行","加强","合作"])
print "\t".join(postags)

parser = Parser()
parser.load(os.path.join(MODELDIR, "parser.model"))
arcs = parser.parse(words, postags)

print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)

recognizer = NamedEntityRecognizer()
recognizer.load(os.path.join(MODELDIR, "ner.model"))
netags = recognizer.recognize(words, postags)
print "\t".join(netags)

labeller = SementicRoleLabeller()
labeller.load(os.path.join(MODELDIR, "srl/"))
roles = labeller.label(words, postags, netags, arcs)

for role in roles:
    print role.index, "".join(
            ["%s:(%d,%d)" % (arg.name, arg.range.start, arg.range.end) for arg in role.arguments])

segmentor.release()
postagger.release()
parser.release()
recognizer.release()
labeller.release()