def main(): # 识别歧意词 text = u"龚学平、张晓辉等领导说,邓颖超生前杜绝超生" terms = segment.seg(text) print_terms(terms) # 识别人名 text = u"签约仪式前,秦光荣、李纪恒、仇和等一同会见了参加签约的企业家。" terms = segment.seg(text) print_terms(terms) # 识别地名 text = u"蓝翔给宁夏固原市彭阳县红河镇黑牛沟村捐赠了挖掘机" terms = segment.seg(text) print_terms(terms) # 识别组织名 text = u"济南杨铭宇餐饮管理有限公司是由杨先生创办的餐饮企业" terms = segment.seg(text) print_terms(terms) # 简繁转换 text = u"以后等你当上皇后,就能买草莓庆祝了" print segment.simplified_to_traditional(text) # 繁简转换 text = u"用筆記簿型電腦寫程式HelloWorld" print segment.traditional_to_simplified(text)
def test_organization_recognition(self): text = traditional_to_simplified(u"馬總統上午前往陸軍航空601旅,") Config.debug = True self.gen_word(text) person_recognition.recognition(self.vertexs, self.word_net_optimum, self.word_net) place_recognition.recognition(self.vertexs, self.word_net_optimum, self.word_net) word_net_optimum = WordNet(self.text, vertexs=self.vertexs) vertexs = organization_recognition.recognition(self.vertexs, word_net_optimum, self.word_net) dump_vertexs(vertexs) self.assertIn(Vertex(u"陆军航空601旅", attribute=u"nt 1"), vertexs)