Esempio n. 1
0
from knock30 import load_mecab

verb = set()
for line in load_mecab():
    for morpheme in line:
        if(morpheme['pos'] == '動詞'):
            verb.add(morpheme['base'])
print(verb)
Esempio n. 2
0
# -*- coding: utf-8 -*-

from knock30 import load_mecab

path = 'neko.txt.mecab'

ans = []
for sentence in load_mecab(path):
    for morpheme in sentence:
        if morpheme['pos'] == '動詞':
            ans.append(morpheme['surface'])

print(ans)
Esempio n. 3
0
# -*- coding: utf-8 -*-

from knock30 import load_mecab
from knock36 import morpheme_count
import matplotlib.pyplot as plt


def log_log_graph(rank):
    x = []
    y = []
    for i, (morpheme, count) in enumerate(rank):
        x.append(i + 1)
        y.append(count)
    plt.scatter(x, y, s=10, marker='.')
    plt.yscale('log')
    plt.xscale('log')
    plt.show()


if __name__ == '__main__':
    path = 'neko.txt.mecab'
    rank = sorted(morpheme_count(load_mecab(path)).items(),
                  key=lambda x: -x[1])
    log_log_graph(rank)
Esempio n. 4
0
# -*- coding: utf-8 -*-

from knock30 import load_mecab
from collections import defaultdict


def morpheme_count(sentences):
    count = defaultdict(lambda: 0)
    for sentence in sentences:
        for morpheme in sentence:
            count[morpheme['surface']] += 1
    return count


if __name__ == '__main__':
    path = 'neko.txt.mecab'
    for morpheme, count in sorted(morpheme_count(load_mecab(path)).items(),
                                  key=lambda x: -x[1]):
        print(f'{morpheme} {count}')
Esempio n. 5
0
def word_counter(num):
    word_count = Counter()
    for line in load_mecab():
        for morpheme in line:
            word_count[morpheme['surface']]+=1
    return word_count.most_common(num)
Esempio n. 6
0
# -*- coding: utf-8 -*-

from knock30 import load_mecab
from knock36 import morpheme_count
import matplotlib.pyplot as plt


def histogram(data):
    counts = []
    for morpheme, count in data.items():
        counts.append(count)
    plt.hist(counts, bins=20, range=(1, 50))
    plt.show()


if __name__ == '__main__':
    path = 'neko.txt.mecab'
    histogram(morpheme_count(load_mecab(path)))