def parse_pattern(tag_str, vertexs, wordnet_optimum, wordnet_all):
    new_tag_list = []
    new_vertexs = []
    for i, t in enumerate(tag_str):
        if t == str(NR.U):
            new_tag_list.append(str(NR.K))
            new_tag_list.append(str(NR.B))
            word_K = vertexs[i].real_word[:-1]
            word_B = vertexs[i].real_word[-1]
            new_vertexs.append(Vertex(word_K))
            new_vertexs.append(Vertex(word_B))
        elif t == str(NR.V):
            if tag_str[i - 1] == str(NR.B):
                new_tag_list.append(str(NR.E))
            else:
                new_tag_list.append(str(NR.D))
            new_tag_list.append(str(NR.L))
            word_ED = vertexs[i].real_word[:-1]
            word_L = vertexs[i].real_word[-1]
            new_vertexs.append(Vertex(word_ED))
            new_vertexs.append(Vertex(word_L))
        else:
            new_tag_list.append(t)
            new_vertexs.append(vertexs[i])
    return "".join(new_tag_list), new_vertexs
Beispiel #2
0
 def test_vector(self):
     v1 = Vertex("test", attribute="nr 1")
     v2 = Vertex("test", attribute="nr 1")
     v3 = Vertex("test", attribute="nr1 1")
     self.assertEqual(v1, v2)
     self.assertNotEqual(v1, v3)
     self.assertIn(v1, [v2])
     self.assertNotIn(v1, [v3])
Beispiel #3
0
 def test_recognition(self):
     place_recognition.recognition(self.vertexs, self.word_net_optimum,
                                   self.word_net)
     vertexs = viterbi(self.word_net_optimum.vertexs)
     self.assertIn(Vertex(u"宁夏"), vertexs)
     self.assertIn(Vertex(u"固原市"), vertexs)
     self.assertIn(Vertex(u"彭阳县", attribute=u"ns 1"), vertexs)
     self.assertIn(Vertex(u"红河镇", attribute=u"ns 1"), vertexs)
     self.assertIn(Vertex(u"黑牛沟村", attribute=u"ns 1"), vertexs)
    def test_recognition(self):
        text = u"签约仪式前,秦光荣、李纪恒、仇和、王春桂、张晓辉等一同会见了参加签约的企业家。"
        word_net = WordNet(text)

        # 粗分词网
        gen_word_net(text, word_net)

        # 维特比
        vertexs = viterbi(word_net.vertexs)
        word_net_optimum = WordNet(text, vertexs=vertexs)
        person_recognition.recognition(vertexs, word_net_optimum, word_net)
        vertexs = viterbi(word_net_optimum.vertexs)
        self.assertIn(Vertex(u"秦光荣", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"李纪恒", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"仇和", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"王春桂", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"张晓辉", attribute=u"nr 1"), vertexs)
        print(vertexs)
 def test_recognition_1_level(self):
     text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业"
     self.gen_word(text)
     # vertexs = persion_recognition.recognition(vertexs, word_net_optimum, word_net)
     # word_net_optimum = WordNet(text, vertexs=vertexs)
     organization_recognition.recognition(self.vertexs,
                                          self.word_net_optimum,
                                          self.word_net)
     vertexs = viterbi(self.word_net_optimum.vertexs)
     self.assertIn(Vertex(u"济南杨铭宇餐饮管理有限公司", attribute=u"nt 1"), vertexs)
Beispiel #6
0
    def test_role_tag(self):
        word_seg_list = [
            new_tag_vertex(TAG_BIGIN),
            Vertex(u"秦", attribute=Attribute(u'n 1')),
            Vertex(u"光荣", attribute=Attribute(u'n 1')),
            Vertex(u"同志", attribute=Attribute(u'n 1')),
            new_tag_vertex(TAG_END),
        ]
        taglist = role_tag(word_seg_list)

        self.assertTrue(isinstance(taglist, list))
        self.assertEqual(taglist[2].to_tuple(), (NR.Z, 29, NR.L, 2))

        tag_index_list = viterbi_roletag(taglist, PersonTranMatrix().hmm)
        self.assertEqual(tag_index_list[0], NR.A, u"人名识别,第一个标识应该为TAG_BAGIN")
        self.assertEqual(tag_index_list[1], NR.B)
        self.assertEqual(tag_index_list[2], NR.Z)
        self.assertEqual(tag_index_list[3], NR.L)
        self.assertEqual(tag_index_list[4], NR.A)
 def test_organization_recognition(self):
     text = traditional_to_simplified(u"馬總統上午前往陸軍航空601旅,")
     Config.debug = True
     self.gen_word(text)
     person_recognition.recognition(self.vertexs, self.word_net_optimum,
                                    self.word_net)
     place_recognition.recognition(self.vertexs, self.word_net_optimum,
                                   self.word_net)
     word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
     vertexs = organization_recognition.recognition(self.vertexs,
                                                    word_net_optimum,
                                                    self.word_net)
     dump_vertexs(vertexs)
     self.assertIn(Vertex(u"陆军航空601旅", attribute=u"nt 1"), vertexs)
 def test_recognition_2_level(self):
     text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业"
     self.gen_word(text)
     person_recognition.recognition(self.vertexs, self.word_net_optimum,
                                    self.word_net)
     place_recognition.recognition(self.vertexs, self.word_net_optimum,
                                   self.word_net)
     word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
     vertexs = organization_recognition.recognition(self.vertexs,
                                                    word_net_optimum,
                                                    self.word_net)
     # viterbi(word_net_optimum.vertexs)
     dump_vertexs(vertexs)
     self.assertIn(Vertex(u"济南杨铭宇餐饮管理有限公司", attribute=u"nt 1"), vertexs)
Beispiel #9
0
def role_viterbi(vertexs,
                 wordnet_optimum,
                 hmm,
                 trie,
                 recognition_attr,
                 tag_func,
                 viterbi_fun=viterbi_template):
    tag_list = tag_func(vertexs)
    if Config.debug:
        sb = []
        for i, tag in enumerate(tag_list):
            sb.append(u"[ %s %s ]" % (vertexs[i].real_word, tag))
        print u"角色观察: %s" % u"".join(sb)

    tag_list = viterbi_fun(tag_list, hmm)
    if Config.debug:
        sb = []
        for i, tag in enumerate(tag_list):
            sb.append(u"%s/%s" % (vertexs[i].real_word, tag))
        print(u"角色标注:[%s]" % u", ".join(sb))

    tag_str = [str(x) for x in tag_list]
    tag_str = ''.join(tag_str)
    search = Searcher(trie, tag_str)
    vertexs_offset = [0] * len(vertexs)
    offset = 1
    # head tail skip
    for i, v in enumerate(vertexs[1:-1]):
        vertexs_offset[i + 1] = offset
        offset += len(vertexs[i + 1].real_word)
    while search.next():
        name_str = ""
        for i in range(search.begin, search.begin + len(search.key)):
            name_str += vertexs[i].real_word

        # 添加到词网内
        vertex = Vertex(name_str, attribute=recognition_attr)
        wordnet_optimum.add(vertexs_offset[search.begin], vertex)
    vertexs = viterbi(wordnet_optimum.vertexs)
    return vertexs
Beispiel #10
0
 def test_word_net_insert(self):
     text = u"1234567890"
     word_net_all = WordNet(text)
     for i, c in enumerate(text):
         word_net_all.add(i + 1, Vertex(c))