Example #1
0
def main():
    data = load_data()

    verbs = extract_verbs(data)
    # 結果の表示
    for verb in verbs:
        print(verb.encode('utf8'))
    # 3897 words
    return 0
Example #2
0
def main():    
    data = load_data()

    freq = calc_word_freq(data)
    # 結果の表示
    for key, val in sorted(freq.items(), key=lambda t:t[1], reverse=True):
        print('{}: {}'.format(key.encode('utf8'), val))

    return 0
Example #3
0
def main():
    data = load_data()

    freq = calc_word_freq(data)
    # 結果の表示
    for key, val in sorted(freq.items(), key=lambda t: t[1], reverse=True):
        print('{}: {}'.format(key.encode('utf8'), val))

    return 0
Example #4
0
def main():
    data = load_data()

    nphrases = extract_nounphrases(data)
    # 結果の表示
    for nphrase in nphrases:
        print(nphrase)
    # 3897 words
    return 0
Example #5
0
def main():    
    data = load_data()

    nouns = extract_sahensetsuzoku_nouns(data)
    # 結果の表示
    for noun in nouns:
        print(noun)
    # 3897 words
    return 0
Example #6
0
def main():    
    data = load_data()

    nphrases = extract_nounphrases(data)
    # 結果の表示
    for nphrase in nphrases:
        print(nphrase)
    # 3897 words
    return 0
Example #7
0
def main():
    data = load_data()

    nseries = extract_nounseries(data)
    # 結果の表示
    for ns in nseries:
        print(ns)

    return 0
Example #8
0
def main():    
    data = load_data()

    verbs = extract_verbs_base(data)
    # 結果の表示
    for verb in verbs:
        print(verb.encode('utf8'))
    # 3897 words
    return 0
Example #9
0
def main():    
    data = load_data()

    nseries = extract_nounseries(data)
    # 結果の表示
    for ns in nseries:
        print(ns)

    return 0
Example #10
0
def main():
    data = load_data()

    freq = calc_word_freq(data)
    freq = sorted(freq.values(), reverse=True)

    plt.loglog(range(1, len(freq) + 1), freq)
    plt.show()

    return 0
Example #11
0
def main():    
    data = load_data()

    freq = calc_word_freq(data)
    freq = sorted(freq.values(), reverse=True)

    plt.loglog(range(1, len(freq)+1), freq)
    plt.show()

    return 0
Example #12
0
def main():
    data = load_data()

    freq = calc_word_freq(data)

    plt.hist(freq.values(), bins=100)
    plt.show()

    # 頻度50以下の単語を除いてみる
    plt.hist(filter(lambda t: t > 50, freq.values()), bins=100)
    plt.show()
    return 0
Example #13
0
def main():    
    data = load_data()

    freq = calc_word_freq(data)

    plt.hist(freq.values(), bins=100)
    plt.show()

    # 頻度50以下の単語を除いてみる
    plt.hist(filter(lambda t: t>50, freq.values()), bins=100)
    plt.show()
    return 0
Example #14
0
def main():
    data = load_data()

    freq = calc_word_freq(data)
    # 上位10単語の取り出し
    words = sorted(freq.items(), key=lambda t: t[1], reverse=True)[:10]
    keys, values = zip(*words)  # キーと頻度に分ける

    width = 0.5
    plt.bar([i + width / 2.0 for i in range(10)], values, width)
    plt.xticks([i + width for i in range(10)], keys)
    plt.show()
    return 0
Example #15
0
def main():    
    data = load_data()

    freq = calc_word_freq(data)
    # 上位10単語の取り出し
    words = sorted(freq.items(), key=lambda t:t[1], reverse=True)[:10]
    keys, values = zip(*words)  # キーと頻度に分ける

    width = 0.5
    plt.bar([i + width/2.0 for i in range(10)],
            values, width)
    plt.xticks([i + width for i in range(10)], keys)
    plt.show()
    return 0