def main(): data = load_data() freq = calc_word_freq(data) freq = sorted(freq.values(), reverse=True) plt.loglog(range(1, len(freq) + 1), freq) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) freq = sorted(freq.values(), reverse=True) plt.loglog(range(1, len(freq)+1), freq) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) plt.hist(freq.values(), bins=100) plt.show() # 頻度50以下の単語を除いてみる plt.hist(filter(lambda t: t > 50, freq.values()), bins=100) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) plt.hist(freq.values(), bins=100) plt.show() # 頻度50以下の単語を除いてみる plt.hist(filter(lambda t: t>50, freq.values()), bins=100) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) # 上位10単語の取り出し words = sorted(freq.items(), key=lambda t: t[1], reverse=True)[:10] keys, values = zip(*words) # キーと頻度に分ける width = 0.5 plt.bar([i + width / 2.0 for i in range(10)], values, width) plt.xticks([i + width for i in range(10)], keys) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) # 上位10単語の取り出し words = sorted(freq.items(), key=lambda t:t[1], reverse=True)[:10] keys, values = zip(*words) # キーと頻度に分ける width = 0.5 plt.bar([i + width/2.0 for i in range(10)], values, width) plt.xticks([i + width for i in range(10)], keys) plt.show() return 0