def test_build_word_ego_graph(): sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read() import networkx as nx import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] # 步骤一(替换sans-serif字体) plt.rcParams['axes.unicode_minus'] = False # 步骤二(解决坐标轴负数的负号显示问题) from harvesttext import get_sanguo, get_sanguo_entity_dict, get_baidu_stopwords ht0 = HarvestText() entity_mention_dict, entity_type_dict = get_sanguo_entity_dict() ht0.add_entities(entity_mention_dict, entity_type_dict) sanguo1 = get_sanguo()[0] stopwords = get_baidu_stopwords() docs = ht0.cut_sentences(sanguo1) G = ht0.build_word_ego_graph(docs,"刘备",min_freq=3,other_min_freq=2,stopwords=stopwords) pos = nx.kamada_kawai_layout(G) nx.draw(G,pos) nx.draw_networkx_labels(G,pos) G = ht0.build_entity_ego_graph(docs, "刘备", min_freq=3, other_min_freq=2) pos = nx.spring_layout(G) nx.draw(G, pos) nx.draw_networkx_labels(G, pos) sys.stdout.close() assert open(get_current_function_name() + "_current").read() == expected
def build_word_ego_graph(): import networkx as nx import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] # 步骤一(替换sans-serif字体) plt.rcParams['axes.unicode_minus'] = False # 步骤二(解决坐标轴负数的负号显示问题) from harvesttext import get_sanguo, get_sanguo_entity_dict, get_baidu_stopwords ht0 = HarvestText() entity_mention_dict, entity_type_dict = get_sanguo_entity_dict() ht0.add_entities(entity_mention_dict, entity_type_dict) sanguo1 = get_sanguo()[0] stopwords = get_baidu_stopwords() docs = ht0.cut_sentences(sanguo1) G = ht0.build_word_ego_graph(docs, "刘备", min_freq=3, other_min_freq=2, stopwords=stopwords) pos = nx.kamada_kawai_layout(G) nx.draw(G, pos) nx.draw_networkx_labels(G, pos) plt.show() G = ht0.build_entity_ego_graph(docs, "刘备", min_freq=3, other_min_freq=2) pos = nx.spring_layout(G) nx.draw(G, pos) nx.draw_networkx_labels(G, pos) plt.show()
def load_resources(): from harvesttext.resources import get_qh_sent_dict, get_baidu_stopwords, get_sanguo, get_sanguo_entity_dict sdict = get_qh_sent_dict() # {"pos":[积极词...],"neg":[消极词...]} print("pos_words:", list(sdict["pos"])[10:15]) print("neg_words:", list(sdict["neg"])[5:10]) stopwords = get_baidu_stopwords() print("stopwords:", list(stopwords)[5:10]) docs = get_sanguo() # 文本列表,每个元素为一章的文本 print("三国演义最后一章末16字:\n", docs[-1][-16:]) entity_mention_dict, entity_type_dict = get_sanguo_entity_dict() print("刘备 指称:", entity_mention_dict["刘备"]) print("刘备 类别:", entity_type_dict["刘备"]) print("蜀 类别:", entity_type_dict["蜀"]) print("益州 类别:", entity_type_dict["益州"])
def test_load_resources(): sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read() from harvesttext.resources import get_qh_sent_dict,get_baidu_stopwords,get_sanguo,get_sanguo_entity_dict sdict = get_qh_sent_dict() # {"pos":[积极词...],"neg":[消极词...]} print("pos_words:",list(sdict["pos"])[10:15]) print("neg_words:",list(sdict["neg"])[5:10]) stopwords = get_baidu_stopwords() print("stopwords:", list(stopwords)[5:10]) docs = get_sanguo() # 文本列表,每个元素为一章的文本 print("三国演义最后一章末16字:\n",docs[-1][-16:]) entity_mention_dict, entity_type_dict = get_sanguo_entity_dict() print("刘备 指称:",entity_mention_dict["刘备"]) print("刘备 类别:",entity_type_dict["刘备"]) print("蜀 类别:", entity_type_dict["蜀"]) print("益州 类别:", entity_type_dict["益州"]) sys.stdout.close()