コード例 #1
0
def test_build_word_ego_graph():
    sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read()
    import networkx as nx
    import matplotlib.pyplot as plt
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 步骤一(替换sans-serif字体)
    plt.rcParams['axes.unicode_minus'] = False  # 步骤二(解决坐标轴负数的负号显示问题)
    from harvesttext import get_sanguo, get_sanguo_entity_dict, get_baidu_stopwords

    ht0 = HarvestText()
    entity_mention_dict, entity_type_dict = get_sanguo_entity_dict()
    ht0.add_entities(entity_mention_dict, entity_type_dict)
    sanguo1 = get_sanguo()[0]
    stopwords = get_baidu_stopwords()
    docs = ht0.cut_sentences(sanguo1)
    G = ht0.build_word_ego_graph(docs,"刘备",min_freq=3,other_min_freq=2,stopwords=stopwords)
    pos = nx.kamada_kawai_layout(G)
    nx.draw(G,pos)
    nx.draw_networkx_labels(G,pos)

    G = ht0.build_entity_ego_graph(docs, "刘备", min_freq=3, other_min_freq=2)
    pos = nx.spring_layout(G)
    nx.draw(G, pos)
    nx.draw_networkx_labels(G, pos)


    sys.stdout.close()
    assert open(get_current_function_name() + "_current").read() == expected
コード例 #2
0
ファイル: basics.py プロジェクト: zimizzzz/HarvestText
def build_word_ego_graph():
    import networkx as nx
    import matplotlib.pyplot as plt
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 步骤一(替换sans-serif字体)
    plt.rcParams['axes.unicode_minus'] = False  # 步骤二(解决坐标轴负数的负号显示问题)
    from harvesttext import get_sanguo, get_sanguo_entity_dict, get_baidu_stopwords

    ht0 = HarvestText()
    entity_mention_dict, entity_type_dict = get_sanguo_entity_dict()
    ht0.add_entities(entity_mention_dict, entity_type_dict)
    sanguo1 = get_sanguo()[0]
    stopwords = get_baidu_stopwords()
    docs = ht0.cut_sentences(sanguo1)
    G = ht0.build_word_ego_graph(docs,
                                 "刘备",
                                 min_freq=3,
                                 other_min_freq=2,
                                 stopwords=stopwords)
    pos = nx.kamada_kawai_layout(G)
    nx.draw(G, pos)
    nx.draw_networkx_labels(G, pos)
    plt.show()
    G = ht0.build_entity_ego_graph(docs, "刘备", min_freq=3, other_min_freq=2)
    pos = nx.spring_layout(G)
    nx.draw(G, pos)
    nx.draw_networkx_labels(G, pos)
    plt.show()
コード例 #3
0
ファイル: basics.py プロジェクト: zimizzzz/HarvestText
def using_typed_words():
    from harvesttext.resources import get_qh_typed_words, get_baidu_stopwords
    ht0 = HarvestText()
    typed_words, stopwords = get_qh_typed_words(), get_baidu_stopwords()
    ht0.add_typed_words(typed_words)
    print("加载清华领域词典,并使用停用词")
    print("全部类型", typed_words.keys())
    sentence = "THUOCL是自然语言处理的一套中文词库,词表来自主流网站的社会标签、搜索热词、输入法词库等。"
    print(sentence)
    print(ht0.posseg(sentence, stopwords=stopwords))
    print("一些词语被赋予特殊类型IT,而“是”等词语被筛出。")
コード例 #4
0
def test_using_typed_words():
    sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read()
    from harvesttext.resources import get_qh_typed_words,get_baidu_stopwords
    ht0 = HarvestText()
    typed_words, stopwords = get_qh_typed_words(), get_baidu_stopwords()
    ht0.add_typed_words(typed_words)
    print("加载清华领域词典,并使用停用词")
    print("全部类型",typed_words.keys())
    sentence = "THUOCL是自然语言处理的一套中文词库,词表来自主流网站的社会标签、搜索热词、输入法词库等。"
    print(sentence)
    print(ht0.posseg(sentence,stopwords=stopwords))
    print("一些词语被赋予特殊类型IT,而“是”等词语被筛出。")

    sys.stdout.close()
    assert open(get_current_function_name() + "_current").read() == expected
コード例 #5
0
ファイル: basics.py プロジェクト: zimizzzz/HarvestText
def load_resources():
    from harvesttext.resources import get_qh_sent_dict, get_baidu_stopwords, get_sanguo, get_sanguo_entity_dict
    sdict = get_qh_sent_dict()  # {"pos":[积极词...],"neg":[消极词...]}
    print("pos_words:", list(sdict["pos"])[10:15])
    print("neg_words:", list(sdict["neg"])[5:10])

    stopwords = get_baidu_stopwords()
    print("stopwords:", list(stopwords)[5:10])

    docs = get_sanguo()  # 文本列表,每个元素为一章的文本
    print("三国演义最后一章末16字:\n", docs[-1][-16:])
    entity_mention_dict, entity_type_dict = get_sanguo_entity_dict()
    print("刘备 指称:", entity_mention_dict["刘备"])
    print("刘备 类别:", entity_type_dict["刘备"])
    print("蜀 类别:", entity_type_dict["蜀"])
    print("益州 类别:", entity_type_dict["益州"])
コード例 #6
0
def test_load_resources():
    sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read()
    from harvesttext.resources import get_qh_sent_dict,get_baidu_stopwords,get_sanguo,get_sanguo_entity_dict
    sdict = get_qh_sent_dict()              # {"pos":[积极词...],"neg":[消极词...]}
    print("pos_words:",list(sdict["pos"])[10:15])
    print("neg_words:",list(sdict["neg"])[5:10])

    stopwords = get_baidu_stopwords()
    print("stopwords:", list(stopwords)[5:10])

    docs = get_sanguo()                 # 文本列表,每个元素为一章的文本
    print("三国演义最后一章末16字:\n",docs[-1][-16:])
    entity_mention_dict, entity_type_dict = get_sanguo_entity_dict()
    print("刘备 指称:",entity_mention_dict["刘备"])
    print("刘备 类别:",entity_type_dict["刘备"])
    print("蜀 类别:", entity_type_dict["蜀"])
    print("益州 类别:", entity_type_dict["益州"])

    sys.stdout.close()