Exemple #1
0
def main_36():
    morph_list = ch04_30.main_30()
    d = defaultdict(int)
    for line in chain.from_iterable(morph_list):
        word = line['surface']
        d[word] += 1
    # value で sort
    return sorted(d.items(), key=lambda x: x[1], reverse=True)
Exemple #2
0
def main_35():
    morph_list = ch04_30.main_30()
    rensetu = []
    for row in morph_list:
        for k, g in groupby(row, lambda x: x['pos'] == "名詞"):
            meisi = list(g)
            if k and len(meisi) > 1:
                rensetu.append(''.join([m['surface'] for m in meisi]))
    return rensetu
Exemple #3
0
def main_34_ngram():
    # ngramを用いる場合
    from nltk.util import ngrams
    morph_list = ch04_30.main_30()
    a_no_b_ngram = []
    for line in morph_list:
        if len(line) > 3:  # 要素が3つ以上の時のみ
            for e in list(ngrams(line, 3)):
                if e[0]['pos'] == "名詞" and e[1]['base'] == "の" and e[2][
                        'pos'] == "名詞":
                    a_no_b_ngram.append(''.join(
                        str(e[0]['surface']) + "の" + str(e[2]['surface'])))
Exemple #4
0
def main_34():
    morph_list = ch04_30.main_30()
    a_no_b = []
    for line in morph_list:
        for i in range(len(line) - 2):
            # 途中:lineを3-gram変換して参照したい
            if line[i]['pos'] == "名詞" \
                    and line[i + 1]['base'] == "の" \
                    and line[i + 2]['pos'] == "名詞":
                a_no_b.append(''.join(
                    str(line[i]['surface']) + "の" +
                    str(line[i + 2]['surface'])))
    return a_no_b
Exemple #5
0
def main_32():
    morph_list = ch04_30.main_30()
    return [
        line['base'] for line in chain.from_iterable(morph_list)
        if line['pos'] == '動詞'
    ]
Exemple #6
0
def main_33():
    morph_list = ch04_30.main_30()
    return [line['base'] for line in chain.from_iterable(morph_list) if line['pos1']=='サ変接続' and line['pos']=="名詞"]