def gen_word(self, text):
     self.text = text
     self.word_net = WordNet(self.text)
     # 粗分词网
     gen_word_net(self.text, self.word_net)
     # 维特比
     self.vertexs = viterbi(self.word_net.vertexs)
     self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
 def gen_word(self, text):
     self.text = text
     self.word_net = WordNet(self.text)
     # 粗分词网
     gen_word_net(self.text, self.word_net)
     # 维特比
     self.vertexs = viterbi(self.word_net.vertexs)
     self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
 def setUp(self):
     self.text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业"
     self.word_net = WordNet(self.text)
     # 粗分词网
     gen_word_net(self.text, self.word_net)
     # 维特比
     self.vertexs = viterbi(self.word_net.vertexs)
     self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
Example #4
0
 def setUp(self):
     self.text = u"蓝翔给宁夏固原市彭阳县红河镇黑牛沟村捐赠了挖掘机"
     self.word_net = WordNet(self.text)
     # 粗分词网
     gen_word_net(self.text, self.word_net)
     # 维特比
     self.vertexs = viterbi(self.word_net.vertexs)
     self.vertexs = combine_by_custom_dict(self.vertexs, CustomDict().trie)
     self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
Example #5
0
    def test_combin_by_dict(self):
        dat = DoubleArrayTrie()
        dat.build([u"江", u"河", u"湖", "海"])
        text = u"江河湖海"
        word_net = WordNet(text)
        gen_word_net(text, word_net, dat)
        vertexs = [v[0] for v in word_net.vertexs]
        self.assertEqual(len(word_net), 6, u"自定义字典分词")

        combin_dat = DoubleArrayTrie()
        combin_dat.build(key=[u"江河湖海"], v=[u"江河湖海 n 1"])
        vertexs = combine_by_custom_dict(vertexs, combin_dat)
        self.assertEqual(len(vertexs), 3, u"合并完成后应该只有前尾加中间词")
Example #6
0
    def test_combin_by_dict(self):
        dat = DoubleArrayTrie()
        dat.build([u"江", u"河", u"湖", "海"])
        text = u"江河湖海"
        word_net = WordNet(text)
        gen_word_net(text, word_net, dat)
        vertexs = [v[0] for v in word_net.vertexs]
        self.assertEqual(len(word_net), 6, u"自定义字典分词")

        combin_dat = DoubleArrayTrie()
        combin_dat.build(key=[u"江河湖海"], v=[u"江河湖海 n 1"])
        vertexs = combine_by_custom_dict(vertexs, combin_dat)
        self.assertEqual(len(vertexs), 3, u"合并完成后应该只有前尾加中间词")
    def test_recognition(self):
        text = u"签约仪式前,秦光荣、李纪恒、仇和、王春桂、张晓辉等一同会见了参加签约的企业家。"
        word_net = WordNet(text)

        # 粗分词网
        gen_word_net(text, word_net)

        # 维特比
        vertexs = viterbi(word_net.vertexs)
        word_net_optimum = WordNet(text, vertexs=vertexs)
        person_recognition.recognition(vertexs, word_net_optimum, word_net)
        vertexs = viterbi(word_net_optimum.vertexs)
        self.assertIn(Vertex(u"秦光荣", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"李纪恒", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"仇和", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"王春桂", attribute=u"nr 1"), vertexs)
        self.assertIn(Vertex(u"张晓辉", attribute=u"nr 1"), vertexs)
        print(vertexs)
Example #8
0
 def test_gen_word_net(self):
     text = u"一举成名天下知"
     word_net = WordNet(text)
     gen_word_net(text, word_net)
     self.assertEqual(word_net.vertexs.__len__(), text.__len__() + 2)
     # 一举 一举成名
     # 举
     # 成 成名
     # 名
     # 天 天下
     # 下
     # 知
     self.assertEqual(word_net.vertexs[1].__len__(), 2)
     self.assertEqual(word_net.vertexs[2].__len__(), 1)
     self.assertEqual(word_net.vertexs[3].__len__(), 2)
     self.assertEqual(word_net.vertexs[4].__len__(), 1)
     self.assertEqual(word_net.vertexs[5].__len__(), 2)
     self.assertEqual(word_net.vertexs[6].__len__(), 1)
     self.assertEqual(word_net.vertexs[7].__len__(), 1)
Example #9
0
def seg_to_vertexs(text):
    word_net = WordNet(text)

    # 粗分词网
    gen_word_net(text, word_net)

    if Config.debug:
        print(u"打印粗分词网:")
        print(unicode(word_net))

    # 维特比
    vertexs = viterbi(word_net.vertexs)
    if Config.use_custom_dict:
        vertexs = combine_by_custom_dict(vertexs)
    word_net_optimum = WordNet(text, vertexs=vertexs)

    if Config.name_recognize:
        person_recognition.recognition(vertexs, word_net_optimum, word_net)

    if Config.place_recognize:
        place_recognition.recognition(vertexs, word_net_optimum, word_net)

    if Config.debug:
        print(u"打印人名、地名识别词网:")
        print(unicode(word_net_optimum))

    vertexs = viterbi(word_net_optimum.vertexs)

    if Config.org_recognize:
        word_net_optimum = WordNet(text, vertexs=vertexs)
        vertexs = organization_recognition.recognition(vertexs, word_net_optimum, word_net)

    if Config.debug:
        print(u"打印人组织识别词网:")
        print(unicode(word_net_optimum))
    return vertexs
Example #10
0
 def test_gen_word_net_include_num(self):
     text = u"123456"
     word_net = WordNet(text)
     gen_word_net(text, word_net)
     self.assertEqual(word_net.vertexs.__len__(), 6 + 2)
     self.assertTrue([] not in word_net.vertexs, u"原始词网,不能可能有空节点")