def main(): morpheme_list = nlp_30.mecab_morpheme() pos_list = [] pos_frequency_list = [] for morpheme in morpheme_list: pos_list.append(morpheme["surface"]) counter = Counter(pos_list) rank = 1 cnt_compare = 0 ranking = [] for word, cnt in counter.most_common(): if cnt < cnt_compare: rank = rank + 1 cnt_compare = cnt # print rank,":",word, cnt ranking.append(cnt) # 単語の出現頻度順位を横軸,その出現頻度を縦軸として,両対数グラフをプロットせよ←R # print sorted(set(ranking)) # print len(set(ranking)) pos_frequency_list = sorted(ranking, reverse=True) X = [] for x in xrange(len(ranking)): # print x num = x + 1 # print num X.append(num) Y = pos_frequency_list # print X # print Y plt.plot(X, Y) plt.xscale("log") plt.yscale("log") plt.show()
def main(): morpheme_list = nlp_30.mecab_morpheme() verb_surface = set([]) for morpheme in morpheme_list: if morpheme["pos"] == "動詞": verb_surface.add(morpheme["surface"]) for verb in verb_surface: print verb
def main(): morpheme_list = nlp_30.mecab_morpheme() noun_sahen = set([]) for morpheme in morpheme_list: if morpheme["pos"] == "名詞" and morpheme["pos1"] == "サ変接続": noun_sahen.add(morpheme["surface"]) for noun in noun_sahen: print noun
def main(): morpheme_list = nlp_30.mecab_morpheme() word_list = [] for morpheme in morpheme_list: word_list.append(morpheme["surface"]) counter = Counter(word_list) for word, cnt in counter.most_common(10): print word, cnt
def main(): morpheme_list = nlp_30.mecab_morpheme() noun_sahen = set([]) for i in range(1, len(morpheme_list)-1): if morpheme_list[i]["surface"] == "の": if morpheme_list[i-1]["pos"] == "名詞" and morpheme_list[i+1]["pos"] == "名詞": print "------" print morpheme_list[i-1]["surface"] + morpheme_list[i]["surface"] + morpheme_list[i+1]["surface"]
def main(): morpheme_list = nlp_30.mecab_morpheme() noun_list = [] output_noun_list = [] for i in range(len(morpheme_list)-1): if morpheme_list[i]["pos"] == "名詞": noun_list.append(morpheme_list[i]["surface"]) else: noun_list = [] if len(noun_list) > 1: if morpheme_list[i+1]["pos"] == "名詞": pass else: for word in noun_list: output_noun_list.append(word) output_noun_list.append("\n") for j in output_noun_list: print j,
def main(): morpheme_list = nlp_30.mecab_morpheme() pos_list = [] pos_list_word = [] pos_list_cnt = [] for morpheme in morpheme_list: pos_list.append(morpheme["pos"]) counter = Counter(pos_list) for word, cnt in counter.most_common(): # print word, cnt word = unicode(word,encoding='utf-8') pos_list_word.append(word) pos_list_cnt.append(cnt) X =[] for x in xrange(len(pos_list_word)): X.append(x) Y = pos_list_cnt plt.barh(X,Y, align="center") # 中央寄せ plt.yticks(X, pos_list_word) plt.show()