예제 #1
0
def main():
	morpheme_list = nlp_30.mecab_morpheme()
	pos_list = []
	pos_frequency_list = []
	for morpheme in morpheme_list:
		pos_list.append(morpheme["surface"])
	counter = Counter(pos_list)
	rank = 1
	cnt_compare = 0
	ranking = []
	for word, cnt in counter.most_common():
		if cnt < cnt_compare:
			rank = rank + 1
		cnt_compare = cnt
 		# print rank,":",word, cnt
 		ranking.append(cnt)
 		# 単語の出現頻度順位を横軸,その出現頻度を縦軸として,両対数グラフをプロットせよ←R
 	# print sorted(set(ranking))
 	# print len(set(ranking))
 	pos_frequency_list = sorted(ranking, reverse=True)
 	X = []
	for x in xrange(len(ranking)):
		# print x
		num = x + 1
		# print num
		X.append(num)
	Y = pos_frequency_list
	# print X
	# print Y

	plt.plot(X, Y)
	plt.xscale("log")
	plt.yscale("log")
	plt.show()
예제 #2
0
def main():
    morpheme_list = nlp_30.mecab_morpheme()
    verb_surface = set([])
    for morpheme in morpheme_list:
        if morpheme["pos"] == "動詞":
            verb_surface.add(morpheme["surface"])
    for verb in verb_surface:
        print verb
예제 #3
0
def main():
	morpheme_list = nlp_30.mecab_morpheme()
	noun_sahen = set([])
	for morpheme in morpheme_list:
		if morpheme["pos"] == "名詞" and morpheme["pos1"] == "サ変接続":
			noun_sahen.add(morpheme["surface"])
	for noun in noun_sahen:
		print noun
예제 #4
0
def main():
	morpheme_list = nlp_30.mecab_morpheme()
	word_list = []
	for morpheme in morpheme_list:
		word_list.append(morpheme["surface"])
	counter = Counter(word_list)
	for word, cnt in counter.most_common(10):
 	   print word, cnt
예제 #5
0
def main():
	morpheme_list = nlp_30.mecab_morpheme()
	noun_sahen = set([])
	for i in range(1, len(morpheme_list)-1):
		if morpheme_list[i]["surface"] == "の":
			if morpheme_list[i-1]["pos"] == "名詞" and morpheme_list[i+1]["pos"] == "名詞":
				print "------"
				print morpheme_list[i-1]["surface"] + morpheme_list[i]["surface"] + morpheme_list[i+1]["surface"]
예제 #6
0
def main():
	morpheme_list = nlp_30.mecab_morpheme()
	noun_list = []
	output_noun_list = []
	for i in range(len(morpheme_list)-1):
		if morpheme_list[i]["pos"] == "名詞":
			noun_list.append(morpheme_list[i]["surface"])
		else:
			noun_list = []

		if len(noun_list) > 1:
			if morpheme_list[i+1]["pos"] == "名詞":
				pass
			else:
				for word in noun_list:
					output_noun_list.append(word)
				output_noun_list.append("\n")
	for j in output_noun_list:
		print j,
예제 #7
0
def main():
	morpheme_list = nlp_30.mecab_morpheme()
	pos_list = []
	pos_list_word = []
	pos_list_cnt = []
	for morpheme in morpheme_list:
		pos_list.append(morpheme["pos"])
	counter = Counter(pos_list)
	for word, cnt in counter.most_common():
 		# print word, cnt
 		word = unicode(word,encoding='utf-8')
 		pos_list_word.append(word)
 		pos_list_cnt.append(cnt)

 	X =[]
	for x in xrange(len(pos_list_word)):
		X.append(x)
	Y = pos_list_cnt

	plt.barh(X,Y, align="center")  # 中央寄せ
	plt.yticks(X, pos_list_word)
	plt.show()