예제 #1
0
from knock30 import load_morpheme
from collections import defaultdict
from knock36 import calc_word_frequency
from matplotlib import pyplot as plt

path = 'neko.txt.mecab'
result = load_morpheme(path)
words_freq = calc_word_frequency(result)

dic = sorted(words_freq.items(), key = lambda x:x[1], reverse = True)
words = [dic[i][0] for i in range(10)]
counts = [dic[i][1] for i in range(10)]

plt.rcParams['font.family'] = 'AppleGothic'
plt.xlabel('出現頻度が高い10語')
plt.ylabel('出現頻度')
plt.bar(words,counts)
plt.show()
예제 #2
0
# 35. 名詞の連接
# 名詞の連接(連続して出現する名詞)を最長一致で抽出せよ.

from knock30 import load_morpheme
from typing import List, Dict

M = List[Dict[str, str]]
T = List[M]


def get_consecutive_nouns(morphemes: T) -> List[str]:
    consecutive_nouns = []  # type: List[str]
    for sentence in morphemes:
        nouns = []
        for morpheme in sentence:
            if morpheme["pos"] == "名詞":
                nouns.append(morpheme["surface"])
            else:
                if len(nouns) > 1:
                    consecutive_nouns.append("".join(nouns))
                nouns = []
    return consecutive_nouns


if __name__ == "__main__":
    for noun in get_consecutive_nouns(load_morpheme()):
        print(noun)