def main(f): word_dic = defaultdict(int) sl = knock30.parse(f) for s in sl: for word in s: word_dic[word["surface"]] += 1 word_freq = [v for k, v in sorted(word_dic.items(), key=lambda x:x[1], reverse=True)] plt.hist(word_freq) plt.savefig("output_knock38.png")
def main(f): word_dic = defaultdict(int) sl = knock30.parse(f) for s in sl: for word in s: word_dic[word["surface"]] += 1 for k, v in sorted(word_dic.items(), key=lambda x: x[1], reverse=True)[0:10]: print k, v
def main(f): word_dic = defaultdict(int) sl = knock30.parse(f) for s in sl: for word in s: word_dic[word["surface"]] += 1 word_freq = sorted(word_dic.values(), reverse=True) plt.xscale("log") plt.yscale("log") plt.bar(range(len(word_freq)), word_freq) plt.savefig("output_knock39.png")
def main(f): word_dic = defaultdict(int) sl = knock30.parse(f) for s in sl: for word in s: word_dic[word["surface"]] += 1 top_ten = [] top_ten_labels = [] for k, v in sorted(word_dic.items(), key=lambda x:x[1], reverse=True)[0:10]: top_ten.append(v) top_ten_labels.append(k) print top_ten_labels plt.bar(range(10), top_ten, tick_label=top_ten_labels) plt.savefig("output_knock37.png")
def main(f): sl = knock30.parse(f) flag_of_noun = False flag2 = False for dic_list in sl: for d in dic_list: if flag2 and d["pos"] == "名詞": print meisiku + d["surface"] frag_of_noun = False flag2 = False elif flag2 and d["pos"] != "名詞": flag_of_noun = False flag2 = False elif flag_of_noun and d["surface"] == "の": flag2 = True meisiku += "の" elif flag_of_noun and d["surface"] != "の": flag_of_noun = False elif d["pos"] == "名詞": flag_of_noun = True meisiku = d["surface"]
def main(f): sl = knock30.parse(f) flag_of_noun = False flag_of_noun2 = False meisi = "" for dic_list in sl: for d in dic_list: if flag_of_noun2 and d["pos"] != "名詞": print meisi meisi = "" flag_of_noun = False flag_of_noun2 = False elif flag_of_noun and d["pos"] != "名詞": flag_of_noun = False meisi = "" elif flag_of_noun and d["pos"] == "名詞": flag_of_noun2 = True meisi += d["surface"] elif not (flag_of_noun) and d["pos"] == "名詞": flag_of_noun = True meisi += d["surface"]
def main(f): sl = knock30.parse(f) for dic_list in sl: for d in dic_list: if d["pos"] == "動詞": print d["surface"]
def main(f): sl = knock30.parse(f) for dic_list in sl: for d in dic_list: if d["pos"] == "名詞" and d["pos1"] == "サ変接続": print d["surface"]