Esempio n. 1
0
def word_count()->list:
    lines = mecab(file)
    longest_N_list = []
    word_list = []
    for line in lines:
        for word in line:
            word_list.append(word['surface'])
    return(Counter(word_list))
Esempio n. 2
0
def get_v_sur():
    lines = mecab(file)
    t = 0
    V_sur = []
    for line in lines:
        for word in line:
            if word['pos'] == '動詞':
                surface = word['surface']
                V_sur.append(surface)
        t += 1
        if t == 5:
            break
    return (V_sur)
Esempio n. 3
0
def get_v_base():
    lines = mecab(file)
    t = 0
    V_base = []
    for line in lines:
        for word in line:
            if word['pos'] == '動詞':
                base = word['base']
                V_base.append(base)
        t += 1
        if t == 5:
            break
    return (V_base)
Esempio n. 4
0
def get_greedy_n():
    lines = mecab(file)
    t = 0
    longest_N_list = []
    Name_phrase = []
    for line in lines:
        for word in line:
            if word['pos'] == '名詞':
                Name_phrase.append(word['surface'])
            else:
                if len(Name_phrase) > 1:
                    longest_N_list.append("".join(Name_phrase))
                Name_phrase = []
        t += 1
        if t == 10:
            break
    return (longest_N_list)
Esempio n. 5
0
def co_occ(target) -> list:
    lines = mecab(file)
    co_list = []
    nouns = []
    tar_sen = []
    find_tar = 0
    for line in lines:
        for word in line:
            if word['surface'] == '猫': find_tar = 1
            else: nouns.append(word['surface'])
            if word['surface'] == "。":
                if find_tar == 1:
                    for noun in nouns:
                        co_list.append(noun)
                nouns = []
                find_tar = 0
    return (co_list)