def using_typed_words(): from harvesttext.resources import get_qh_typed_words, get_baidu_stopwords ht0 = HarvestText() typed_words, stopwords = get_qh_typed_words(), get_baidu_stopwords() ht0.add_typed_words(typed_words) print("加载清华领域词典,并使用停用词") print("全部类型", typed_words.keys()) sentence = "THUOCL是自然语言处理的一套中文词库,词表来自主流网站的社会标签、搜索热词、输入法词库等。" print(sentence) print(ht0.posseg(sentence, stopwords=stopwords)) print("一些词语被赋予特殊类型IT,而“是”等词语被筛出。")
def entity_error_check(): ht0 = HarvestText() typed_words = {"人名":["武磊"]} ht0.add_typed_words(typed_words) sent1 = "武磊和吴力只差一个拼音" print(sent1) print(ht0.entity_linking(sent1, pinyin_recheck=True)) sent2 = "武磊和吴磊只差一个字" print(sent2) print(ht0.entity_linking(sent2, char_recheck=True)) sent3 = "吴磊和吴力都可能是武磊的代称" print(sent3) print(ht0.get_linking_mention_candidates(sent3, pinyin_recheck=True, char_recheck=True))
def test_using_typed_words(): sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read() from harvesttext.resources import get_qh_typed_words,get_baidu_stopwords ht0 = HarvestText() typed_words, stopwords = get_qh_typed_words(), get_baidu_stopwords() ht0.add_typed_words(typed_words) print("加载清华领域词典,并使用停用词") print("全部类型",typed_words.keys()) sentence = "THUOCL是自然语言处理的一套中文词库,词表来自主流网站的社会标签、搜索热词、输入法词库等。" print(sentence) print(ht0.posseg(sentence,stopwords=stopwords)) print("一些词语被赋予特殊类型IT,而“是”等词语被筛出。") sys.stdout.close() assert open(get_current_function_name() + "_current").read() == expected
def test_entity_error_check(): sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read() ht0 = HarvestText() typed_words = {"人名":["武磊"]} ht0.add_typed_words(typed_words) sent1 = "武磊和吴力只差一个拼音" print(sent1) print(ht0.entity_linking(sent1, pinyin_recheck=True)) sent2 = "武磊和吴磊只差一个字" print(sent2) print(ht0.entity_linking(sent2, char_recheck=True)) sent3 = "吴磊和吴力都可能是武磊的代称" print(sent3) print(ht0.get_linking_mention_candidates(sent3, pinyin_recheck=True, char_recheck=True)) sys.stdout.close() assert open(get_current_function_name() + "_current").read() == expected