Esempio n. 1
0
def role_viterbi(vertexs, wordnet_optimum, hmm, trie, recognition_attr, tag_func, viterbi_fun=viterbi_template):
    tag_list = tag_func(vertexs)
    if Config.debug:
        sb = []
        for i, tag in enumerate(tag_list):
            sb.append(u"[ %s %s ]" % (vertexs[i].real_word, tag))
        print u"角色观察: %s" % u"".join(sb)

    tag_list = viterbi_fun(tag_list, hmm)
    if Config.debug:
        sb = []
        for i, tag in enumerate(tag_list):
            sb.append(u"%s/%s" % (vertexs[i].real_word, tag))
        print (u"角色标注:[%s]" % u", ".join(sb))

    tag_str = [str(x) for x in tag_list]
    tag_str = "".join(tag_str)
    search = Searcher(trie, tag_str)
    vertexs_offset = [0] * len(vertexs)
    offset = 1
    # head tail skip
    for i, v in enumerate(vertexs[1:-1]):
        vertexs_offset[i + 1] = offset
        offset += len(vertexs[i + 1].real_word)
    while search.next():
        name_str = ""
        for i in range(search.begin, search.begin + len(search.key)):
            name_str += vertexs[i].real_word

        # 添加到词网内
        vertex = Vertex(name_str, attribute=recognition_attr)
        wordnet_optimum.add(vertexs_offset[search.begin], vertex)
    vertexs = viterbi(wordnet_optimum.vertexs)
    return vertexs
 def gen_word(self, text):
     self.text = text
     self.word_net = WordNet(self.text)
     # 粗分词网
     gen_word_net(self.text, self.word_net)
     # 维特比
     self.vertexs = viterbi(self.word_net.vertexs)
     self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
 def test_recognition_1_level(self):
     text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业"
     self.gen_word(text)
     # vertexs = persion_recognition.recognition(vertexs, word_net_optimum, word_net)
     # word_net_optimum = WordNet(text, vertexs=vertexs)
     organization_recognition.recognition(self.vertexs, self.word_net_optimum, self.word_net)
     vertexs = viterbi(self.word_net_optimum.vertexs)
     self.assertIn(Vertex(u"济南杨铭宇餐饮管理有限公司", attribute=u"nt 1"), vertexs)
 def gen_word(self, text):
     self.text = text
     self.word_net = WordNet(self.text)
     # 粗分词网
     gen_word_net(self.text, self.word_net)
     # 维特比
     self.vertexs = viterbi(self.word_net.vertexs)
     self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
 def setUp(self):
     self.text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业"
     self.word_net = WordNet(self.text)
     # 粗分词网
     gen_word_net(self.text, self.word_net)
     # 维特比
     self.vertexs = viterbi(self.word_net.vertexs)
     self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
    def test_recognition(self):
        text = u"签约仪式前,秦光荣、李纪恒、仇和、王春桂、张晓辉等一同会见了参加签约的企业家。"
        word_net = WordNet(text)

        # 粗分词网
        gen_word_net(text, word_net)

        # 维特比
        vertexs = viterbi(word_net.vertexs)
        word_net_optimum = WordNet(text, vertexs=vertexs)
        person_recognition.recognition(vertexs, word_net_optimum, word_net)
        vertexs = viterbi(word_net_optimum.vertexs)
        self.assertIn(Vertex(u"秦光荣", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"李纪恒", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"仇和", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"王春桂", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"张晓辉", attribute=u"nr 1"), vertexs)
        print(vertexs)
Esempio n. 7
0
 def test_recognition(self):
     place_recognition.recognition(self.vertexs, self.word_net_optimum,
                                   self.word_net)
     vertexs = viterbi(self.word_net_optimum.vertexs)
     self.assertIn(Vertex(u"宁夏"), vertexs)
     self.assertIn(Vertex(u"固原市"), vertexs)
     self.assertIn(Vertex(u"彭阳县", attribute=u"ns 1"), vertexs)
     self.assertIn(Vertex(u"红河镇", attribute=u"ns 1"), vertexs)
     self.assertIn(Vertex(u"黑牛沟村", attribute=u"ns 1"), vertexs)
Esempio n. 8
0
 def setUp(self):
     self.text = u"蓝翔给宁夏固原市彭阳县红河镇黑牛沟村捐赠了挖掘机"
     self.word_net = WordNet(self.text)
     # 粗分词网
     gen_word_net(self.text, self.word_net)
     # 维特比
     self.vertexs = viterbi(self.word_net.vertexs)
     self.vertexs = combine_by_custom_dict(self.vertexs, CustomDict().trie)
     self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
 def test_recognition_1_level(self):
     text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业"
     self.gen_word(text)
     # vertexs = persion_recognition.recognition(vertexs, word_net_optimum, word_net)
     # word_net_optimum = WordNet(text, vertexs=vertexs)
     organization_recognition.recognition(self.vertexs,
                                          self.word_net_optimum,
                                          self.word_net)
     vertexs = viterbi(self.word_net_optimum.vertexs)
     self.assertIn(Vertex(u"济南杨铭宇餐饮管理有限公司", attribute=u"nt 1"), vertexs)
Esempio n. 10
0
def seg_to_vertexs(text):
    word_net = WordNet(text)

    # 粗分词网
    gen_word_net(text, word_net)

    if Config.debug:
        print(u"打印粗分词网:")
        print(unicode(word_net))

    # 维特比
    vertexs = viterbi(word_net.vertexs)
    if Config.use_custom_dict:
        vertexs = combine_by_custom_dict(vertexs)
    word_net_optimum = WordNet(text, vertexs=vertexs)

    if Config.name_recognize:
        person_recognition.recognition(vertexs, word_net_optimum, word_net)

    if Config.place_recognize:
        place_recognition.recognition(vertexs, word_net_optimum, word_net)

    if Config.debug:
        print(u"打印人名、地名识别词网:")
        print(unicode(word_net_optimum))

    vertexs = viterbi(word_net_optimum.vertexs)

    if Config.org_recognize:
        word_net_optimum = WordNet(text, vertexs=vertexs)
        vertexs = organization_recognition.recognition(vertexs, word_net_optimum, word_net)

    if Config.debug:
        print(u"打印人组织识别词网:")
        print(unicode(word_net_optimum))
    return vertexs
Esempio n. 11
0
def role_viterbi(vertexs,
                 wordnet_optimum,
                 hmm,
                 trie,
                 recognition_attr,
                 tag_func,
                 viterbi_fun=viterbi_template):
    tag_list = tag_func(vertexs)
    if Config.debug:
        sb = []
        for i, tag in enumerate(tag_list):
            sb.append(u"[ %s %s ]" % (vertexs[i].real_word, tag))
        print u"角色观察: %s" % u"".join(sb)

    tag_list = viterbi_fun(tag_list, hmm)
    if Config.debug:
        sb = []
        for i, tag in enumerate(tag_list):
            sb.append(u"%s/%s" % (vertexs[i].real_word, tag))
        print(u"角色标注:[%s]" % u", ".join(sb))

    tag_str = [str(x) for x in tag_list]
    tag_str = ''.join(tag_str)
    search = Searcher(trie, tag_str)
    vertexs_offset = [0] * len(vertexs)
    offset = 1
    # head tail skip
    for i, v in enumerate(vertexs[1:-1]):
        vertexs_offset[i + 1] = offset
        offset += len(vertexs[i + 1].real_word)
    while search.next():
        name_str = ""
        for i in range(search.begin, search.begin + len(search.key)):
            name_str += vertexs[i].real_word

        # 添加到词网内
        vertex = Vertex(name_str, attribute=recognition_attr)
        wordnet_optimum.add(vertexs_offset[search.begin], vertex)
    vertexs = viterbi(wordnet_optimum.vertexs)
    return vertexs
Esempio n. 12
0
def role_viterbi(vertexs, wordnet_optimum, hmm, trie, recognition_attr, tag_func):
    tag_list = tag_func(vertexs)
    tag_list = viterbi_template(tag_list, hmm)
    tag_str = [str(x) for x in tag_list]
    tag_str = ''.join(tag_str)
    search = Searcher(trie, tag_str)
    vertexs_offset = [0] * len(vertexs)
    offset = 1
    # head tail skip
    for i, v in enumerate(vertexs[1:-1]):
        vertexs_offset[i + 1] = offset
        offset += len(vertexs[i + 1].real_word)
    while search.next():
        name_str = ""
        for i in range(search.begin, search.begin + len(search.key)):
            name_str += vertexs[i].real_word

        # 添加到词网内
        vertex = Vertex(name_str, attribute=recognition_attr)
        wordnet_optimum.add(vertexs_offset[search.begin], vertex)
    vertexs = viterbi(wordnet_optimum.vertexs)
    return vertexs