from knock30 import load_morpheme from collections import defaultdict from knock36 import calc_word_frequency from matplotlib import pyplot as plt path = 'neko.txt.mecab' result = load_morpheme(path) words_freq = calc_word_frequency(result) dic = sorted(words_freq.items(), key = lambda x:x[1], reverse = True) words = [dic[i][0] for i in range(10)] counts = [dic[i][1] for i in range(10)] plt.rcParams['font.family'] = 'AppleGothic' plt.xlabel('出現頻度が高い10語') plt.ylabel('出現頻度') plt.bar(words,counts) plt.show()
# 35. 名詞の連接 # 名詞の連接(連続して出現する名詞)を最長一致で抽出せよ. from knock30 import load_morpheme from typing import List, Dict M = List[Dict[str, str]] T = List[M] def get_consecutive_nouns(morphemes: T) -> List[str]: consecutive_nouns = [] # type: List[str] for sentence in morphemes: nouns = [] for morpheme in sentence: if morpheme["pos"] == "名詞": nouns.append(morpheme["surface"]) else: if len(nouns) > 1: consecutive_nouns.append("".join(nouns)) nouns = [] return consecutive_nouns if __name__ == "__main__": for noun in get_consecutive_nouns(load_morpheme()): print(noun)