def main_36(): morph_list = ch04_30.main_30() d = defaultdict(int) for line in chain.from_iterable(morph_list): word = line['surface'] d[word] += 1 # value で sort return sorted(d.items(), key=lambda x: x[1], reverse=True)
def main_35(): morph_list = ch04_30.main_30() rensetu = [] for row in morph_list: for k, g in groupby(row, lambda x: x['pos'] == "名詞"): meisi = list(g) if k and len(meisi) > 1: rensetu.append(''.join([m['surface'] for m in meisi])) return rensetu
def main_34_ngram(): # ngramを用いる場合 from nltk.util import ngrams morph_list = ch04_30.main_30() a_no_b_ngram = [] for line in morph_list: if len(line) > 3: # 要素が3つ以上の時のみ for e in list(ngrams(line, 3)): if e[0]['pos'] == "名詞" and e[1]['base'] == "の" and e[2][ 'pos'] == "名詞": a_no_b_ngram.append(''.join( str(e[0]['surface']) + "の" + str(e[2]['surface'])))
def main_34(): morph_list = ch04_30.main_30() a_no_b = [] for line in morph_list: for i in range(len(line) - 2): # 途中:lineを3-gram変換して参照したい if line[i]['pos'] == "名詞" \ and line[i + 1]['base'] == "の" \ and line[i + 2]['pos'] == "名詞": a_no_b.append(''.join( str(line[i]['surface']) + "の" + str(line[i + 2]['surface']))) return a_no_b
def main_32(): morph_list = ch04_30.main_30() return [ line['base'] for line in chain.from_iterable(morph_list) if line['pos'] == '動詞' ]
def main_33(): morph_list = ch04_30.main_30() return [line['base'] for line in chain.from_iterable(morph_list) if line['pos1']=='サ変接続' and line['pos']=="名詞"]