def main(): data = load_data() verbs = extract_verbs(data) # 結果の表示 for verb in verbs: print(verb.encode('utf8')) # 3897 words return 0
def main(): data = load_data() freq = calc_word_freq(data) # 結果の表示 for key, val in sorted(freq.items(), key=lambda t:t[1], reverse=True): print('{}: {}'.format(key.encode('utf8'), val)) return 0
def main(): data = load_data() freq = calc_word_freq(data) # 結果の表示 for key, val in sorted(freq.items(), key=lambda t: t[1], reverse=True): print('{}: {}'.format(key.encode('utf8'), val)) return 0
def main(): data = load_data() nphrases = extract_nounphrases(data) # 結果の表示 for nphrase in nphrases: print(nphrase) # 3897 words return 0
def main(): data = load_data() nouns = extract_sahensetsuzoku_nouns(data) # 結果の表示 for noun in nouns: print(noun) # 3897 words return 0
def main(): data = load_data() nseries = extract_nounseries(data) # 結果の表示 for ns in nseries: print(ns) return 0
def main(): data = load_data() verbs = extract_verbs_base(data) # 結果の表示 for verb in verbs: print(verb.encode('utf8')) # 3897 words return 0
def main(): data = load_data() freq = calc_word_freq(data) freq = sorted(freq.values(), reverse=True) plt.loglog(range(1, len(freq) + 1), freq) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) freq = sorted(freq.values(), reverse=True) plt.loglog(range(1, len(freq)+1), freq) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) plt.hist(freq.values(), bins=100) plt.show() # 頻度50以下の単語を除いてみる plt.hist(filter(lambda t: t > 50, freq.values()), bins=100) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) plt.hist(freq.values(), bins=100) plt.show() # 頻度50以下の単語を除いてみる plt.hist(filter(lambda t: t>50, freq.values()), bins=100) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) # 上位10単語の取り出し words = sorted(freq.items(), key=lambda t: t[1], reverse=True)[:10] keys, values = zip(*words) # キーと頻度に分ける width = 0.5 plt.bar([i + width / 2.0 for i in range(10)], values, width) plt.xticks([i + width for i in range(10)], keys) plt.show() return 0
def main(): data = load_data() freq = calc_word_freq(data) # 上位10単語の取り出し words = sorted(freq.items(), key=lambda t:t[1], reverse=True)[:10] keys, values = zip(*words) # キーと頻度に分ける width = 0.5 plt.bar([i + width/2.0 for i in range(10)], values, width) plt.xticks([i + width for i in range(10)], keys) plt.show() return 0