def role_viterbi(vertexs, wordnet_optimum, hmm, trie, recognition_attr, tag_func, viterbi_fun=viterbi_template): tag_list = tag_func(vertexs) if Config.debug: sb = [] for i, tag in enumerate(tag_list): sb.append(u"[ %s %s ]" % (vertexs[i].real_word, tag)) print u"角色观察: %s" % u"".join(sb) tag_list = viterbi_fun(tag_list, hmm) if Config.debug: sb = [] for i, tag in enumerate(tag_list): sb.append(u"%s/%s" % (vertexs[i].real_word, tag)) print (u"角色标注:[%s]" % u", ".join(sb)) tag_str = [str(x) for x in tag_list] tag_str = "".join(tag_str) search = Searcher(trie, tag_str) vertexs_offset = [0] * len(vertexs) offset = 1 # head tail skip for i, v in enumerate(vertexs[1:-1]): vertexs_offset[i + 1] = offset offset += len(vertexs[i + 1].real_word) while search.next(): name_str = "" for i in range(search.begin, search.begin + len(search.key)): name_str += vertexs[i].real_word # 添加到词网内 vertex = Vertex(name_str, attribute=recognition_attr) wordnet_optimum.add(vertexs_offset[search.begin], vertex) vertexs = viterbi(wordnet_optimum.vertexs) return vertexs
def gen_word(self, text): self.text = text self.word_net = WordNet(self.text) # 粗分词网 gen_word_net(self.text, self.word_net) # 维特比 self.vertexs = viterbi(self.word_net.vertexs) self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
def test_recognition_1_level(self): text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业" self.gen_word(text) # vertexs = persion_recognition.recognition(vertexs, word_net_optimum, word_net) # word_net_optimum = WordNet(text, vertexs=vertexs) organization_recognition.recognition(self.vertexs, self.word_net_optimum, self.word_net) vertexs = viterbi(self.word_net_optimum.vertexs) self.assertIn(Vertex(u"济南杨铭宇餐饮管理有限公司", attribute=u"nt 1"), vertexs)
def gen_word(self, text): self.text = text self.word_net = WordNet(self.text) # 粗分词网 gen_word_net(self.text, self.word_net) # 维特比 self.vertexs = viterbi(self.word_net.vertexs) self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
def setUp(self): self.text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业" self.word_net = WordNet(self.text) # 粗分词网 gen_word_net(self.text, self.word_net) # 维特比 self.vertexs = viterbi(self.word_net.vertexs) self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
def test_recognition(self): text = u"签约仪式前,秦光荣、李纪恒、仇和、王春桂、张晓辉等一同会见了参加签约的企业家。" word_net = WordNet(text) # 粗分词网 gen_word_net(text, word_net) # 维特比 vertexs = viterbi(word_net.vertexs) word_net_optimum = WordNet(text, vertexs=vertexs) person_recognition.recognition(vertexs, word_net_optimum, word_net) vertexs = viterbi(word_net_optimum.vertexs) self.assertIn(Vertex(u"秦光荣", attribute=u"nr 1"), vertexs) self.assertIn(Vertex(u"李纪恒", attribute=u"nr 1"), vertexs) self.assertIn(Vertex(u"仇和", attribute=u"nr 1"), vertexs) self.assertIn(Vertex(u"王春桂", attribute=u"nr 1"), vertexs) self.assertIn(Vertex(u"张晓辉", attribute=u"nr 1"), vertexs) print(vertexs)
def test_recognition(self): place_recognition.recognition(self.vertexs, self.word_net_optimum, self.word_net) vertexs = viterbi(self.word_net_optimum.vertexs) self.assertIn(Vertex(u"宁夏"), vertexs) self.assertIn(Vertex(u"固原市"), vertexs) self.assertIn(Vertex(u"彭阳县", attribute=u"ns 1"), vertexs) self.assertIn(Vertex(u"红河镇", attribute=u"ns 1"), vertexs) self.assertIn(Vertex(u"黑牛沟村", attribute=u"ns 1"), vertexs)
def setUp(self): self.text = u"蓝翔给宁夏固原市彭阳县红河镇黑牛沟村捐赠了挖掘机" self.word_net = WordNet(self.text) # 粗分词网 gen_word_net(self.text, self.word_net) # 维特比 self.vertexs = viterbi(self.word_net.vertexs) self.vertexs = combine_by_custom_dict(self.vertexs, CustomDict().trie) self.word_net_optimum = WordNet(self.text, vertexs=self.vertexs)
def test_recognition_1_level(self): text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业" self.gen_word(text) # vertexs = persion_recognition.recognition(vertexs, word_net_optimum, word_net) # word_net_optimum = WordNet(text, vertexs=vertexs) organization_recognition.recognition(self.vertexs, self.word_net_optimum, self.word_net) vertexs = viterbi(self.word_net_optimum.vertexs) self.assertIn(Vertex(u"济南杨铭宇餐饮管理有限公司", attribute=u"nt 1"), vertexs)
def seg_to_vertexs(text): word_net = WordNet(text) # 粗分词网 gen_word_net(text, word_net) if Config.debug: print(u"打印粗分词网:") print(unicode(word_net)) # 维特比 vertexs = viterbi(word_net.vertexs) if Config.use_custom_dict: vertexs = combine_by_custom_dict(vertexs) word_net_optimum = WordNet(text, vertexs=vertexs) if Config.name_recognize: person_recognition.recognition(vertexs, word_net_optimum, word_net) if Config.place_recognize: place_recognition.recognition(vertexs, word_net_optimum, word_net) if Config.debug: print(u"打印人名、地名识别词网:") print(unicode(word_net_optimum)) vertexs = viterbi(word_net_optimum.vertexs) if Config.org_recognize: word_net_optimum = WordNet(text, vertexs=vertexs) vertexs = organization_recognition.recognition(vertexs, word_net_optimum, word_net) if Config.debug: print(u"打印人组织识别词网:") print(unicode(word_net_optimum)) return vertexs
def role_viterbi(vertexs, wordnet_optimum, hmm, trie, recognition_attr, tag_func, viterbi_fun=viterbi_template): tag_list = tag_func(vertexs) if Config.debug: sb = [] for i, tag in enumerate(tag_list): sb.append(u"[ %s %s ]" % (vertexs[i].real_word, tag)) print u"角色观察: %s" % u"".join(sb) tag_list = viterbi_fun(tag_list, hmm) if Config.debug: sb = [] for i, tag in enumerate(tag_list): sb.append(u"%s/%s" % (vertexs[i].real_word, tag)) print(u"角色标注:[%s]" % u", ".join(sb)) tag_str = [str(x) for x in tag_list] tag_str = ''.join(tag_str) search = Searcher(trie, tag_str) vertexs_offset = [0] * len(vertexs) offset = 1 # head tail skip for i, v in enumerate(vertexs[1:-1]): vertexs_offset[i + 1] = offset offset += len(vertexs[i + 1].real_word) while search.next(): name_str = "" for i in range(search.begin, search.begin + len(search.key)): name_str += vertexs[i].real_word # 添加到词网内 vertex = Vertex(name_str, attribute=recognition_attr) wordnet_optimum.add(vertexs_offset[search.begin], vertex) vertexs = viterbi(wordnet_optimum.vertexs) return vertexs
def role_viterbi(vertexs, wordnet_optimum, hmm, trie, recognition_attr, tag_func): tag_list = tag_func(vertexs) tag_list = viterbi_template(tag_list, hmm) tag_str = [str(x) for x in tag_list] tag_str = ''.join(tag_str) search = Searcher(trie, tag_str) vertexs_offset = [0] * len(vertexs) offset = 1 # head tail skip for i, v in enumerate(vertexs[1:-1]): vertexs_offset[i + 1] = offset offset += len(vertexs[i + 1].real_word) while search.next(): name_str = "" for i in range(search.begin, search.begin + len(search.key)): name_str += vertexs[i].real_word # 添加到词网内 vertex = Vertex(name_str, attribute=recognition_attr) wordnet_optimum.add(vertexs_offset[search.begin], vertex) vertexs = viterbi(wordnet_optimum.vertexs) return vertexs