Esempio n. 1
0
def main():
    out_path = 'Dependency.txt'
    with open(out_path, "w") as f:
        for chunks in load_cabocha_iter():
            for chunk in chunks:
                if chunk.dst == -1:
                    continue
                src = chunk.normalized_surface()
                dst = chunks[chunk.dst].normalized_surface()
                f.write(f'{src}\t{dst}\n')
Esempio n. 2
0
def main():
    with open('out3.txt', 'w', encoding='utf8') as f:
        for chunks in load_cabocha_iter():
            for i, chunk in enumerate(chunks):
                # 係り元のチェック
                if not chunk.srcs:
                    continue

                # 動詞の基本形
                verbs = [
                    morph.base for morph in chunk.morphs if morph.pos == '動詞'
                ]
                if not verbs:
                    continue

                # 助詞を含む係り元の文節
                particle_phrases = [
                    chunks[src] for src in chunk.srcs[i]
                    if any(m.pos == '助詞' for m in chunks[src].morphs)
                ]
                if not particle_phrases:
                    continue

                # 「サ変接続名詞 + を」 をチェックしてparticle_phrasesから除く
                # print([a.normalized_surface() for a in particle_phrases])
                predicate = ''
                for phrase in particle_phrases:
                    for i in range(len(phrase.morphs) - 1):
                        if phrase.morphs[i].pos1 == 'サ変接続' and phrase.morphs[
                                i + 1].surface == 'を':
                            predicate = f'{phrase.morphs[i].surface}を{verbs[0]}'
                            particle_phrases.remove(phrase)
                            break
                    else:
                        continue
                    break
                else:
                    continue

                # [(助詞, 文節), (助詞, 文節),...]
                particles_phrases = []
                for phrase in particle_phrases:
                    for morph in phrase.morphs:
                        if morph.pos == '助詞':
                            particles_phrases.append(
                                (morph.surface, phrase.normalized_surface()))
                            break

                pp = sorted(particles_phrases)
                f.write(
                    f'{predicate}\t{" ".join([p[0] for p in pp])}\t{" ".join([p[1] for p in pp])}\n'
                )
Esempio n. 3
0
def main():
    for chunks in load_cabocha_iter(4):
        paths = {}  # ex) {0: [5], 1: [2, 3, 4, 5], 3: [4, 5], 4: [5]}
        for id_, chunk in enumerate(chunks):
            if all(m.pos != '名詞' for m in chunk.morphs):
                continue
            current = chunk
            paths[id_] = []
            while current.dst != -1:
                paths[id_].append(current.dst)
                current = chunks[current.dst]
        for k, l in combinations(paths.keys(), 2):
            print(obtain_path_str(chunks, paths, k, l))
Esempio n. 4
0
def main():
    cnt = 0
    for chunks in load_cabocha_iter():
        for chunk in chunks:
            if chunk.dst == -1:
                continue
            src = chunk.normalized_surface()
            dst = chunks[chunk.dst].normalized_surface()
            if not src or not dst:
                continue
            print(f'{src}\t{dst}')
            cnt += 1
            if cnt == 20:
                return
Esempio n. 5
0
def main():
    out_path = 'Dependency_noun_verb.txt'
    with open(out_path, "w") as f:
        for chunks in load_cabocha_iter():
            for chunk in chunks:
                if chunk.dst == -1:
                    continue
                if all(morph.pos != '名詞' for morph in chunk.morphs):
                    continue
                if all(morph.pos != '動詞'
                       for morph in chunks[chunk.dst].morphs):
                    continue
                src = chunk.normalized_surface()
                dst = chunks[chunk.dst].normalized_surface()
                f.write(f'{src}\t{dst}\n')
Esempio n. 6
0
def main():
    path = "noun_to_root.txt"
    count = 0
    with open(path, "w") as f:
        for chunks in load_cabocha_iter():
            for chunk in chunks:
                if chunk.dst == -1:
                    continue
                if all(m.pos != "名詞" for m in chunk.morphs):
                    continue
                f.write(chunk.normalized_surface())
                i = chunk.dst
                while i != -1:
                    f.write(" -> " + chunks[i].normalized_surface())
                    i = chunks[i].dst
                f.write("\n")
Esempio n. 7
0
def main():
    for i, chunks in enumerate(load_cabocha_iter()):
        if i != 5:
            continue
        for chunk in chunks:
            if chunk.dst == -1:
                continue
            noun = [morph.surface for morph in chunk.morphs if morph.pos == '名詞']
            if not noun:
                continue
            path = chunk.normalized_surface() # 吾輩は
            current = chunk
            while current.dst != -1:
                current = chunks[current.dst]
                path += f' -> {current.normalized_surface()}' # 吾輩は -> 見た
            print(path)
Esempio n. 8
0
def main():
    for i, chunks in enumerate(load_cabocha_iter()):
        if i != 7:
            continue
        edges = []
        for j, chunk in enumerate(chunks):
            if chunk.dst == -1:
                continue
            src = chunk.normalized_surface()
            dst = chunks[chunk.dst].normalized_surface()
            if not src or not dst:
                continue
            edges.append((j, src, chunk.dst, dst))

    # グラフの書き出し
    graph = gen_graph(edges)
    print(graph.to_string())
    graph.write('graph.png', format='png', encoding='utf8')
Esempio n. 9
0
def main():
    with open('out2.txt', 'w', encoding='utf8') as f:
        for chunks in load_cabocha_iter():
            case_patterns = {}
            # {id: [動詞の基本形, [(助詞, 文節), (助詞, 文節),...]]}
            for chunk in chunks:
                if chunk.dst == -1:
                    continue
                particles = [morph.surface for morph in chunk.morphs if morph.pos == '助詞']
                verbs = [morph.base for morph in chunks[chunk.dst].morphs if morph.pos == '動詞']
                if not particles or not verbs:
                    continue
                if chunk.dst not in case_patterns:
                    case_patterns[chunk.dst] = [verbs[0], [(particles[0], chunk.normalized_surface())]]
                else:
                    case_patterns[chunk.dst][1].append((particles[0], chunk.normalized_surface()))
            for value in case_patterns.values():
                frames = sorted(value[1])
                # print(f'{value[0]}\t{" ".join([frame[0] for frame in frames])}\t{" ".join([frame[1] for frame in frames])}')
                f.write(f'{value[0]}\t{" ".join([frame[0] for frame in frames])}\t{" ".join([frame[1] for frame in frames])}\n')
Esempio n. 10
0
def main():
    path = "case_pattern_2.txt"
    with open(path, "w") as f:
        for chunks in load_cabocha_iter():
            case_patterns = {}
            # {id: [動詞の基本形, [助詞, 助詞,...]]}
            for chunk in chunks:
                if chunk.dst == -1:
                    continue
                particles = [[chunk.morphs[-1].surface, chunk.normalized_surface()]]
                verbs = [morph.base for morph in chunks[chunk.dst].morphs if morph.pos == '動詞']
                if not particles or not verbs:
                    continue
                if chunk.dst not in case_patterns:
                    case_patterns[chunk.dst] = [verbs[0], particles]
                else:
                    case_patterns[chunk.dst][1].extend(particles)
            for value in case_patterns.values():
                value[1].sort(key=lambda x: x[0])
                f.write(f'{value[0]}\t{" ".join([l[0] for l in value[1]])}\t{" ".join([l[1] for l in value[1]])}\n')
Esempio n. 11
0
def main():
    for i, chunks in enumerate(load_cabocha_iter()):
        if i != 5:
            continue
        paths = {}  # ex) {0: [5], 1: [2, 3, 4, 5], 3: [4, 5], 4: [5]}
        for j, chunk in enumerate(chunks):
            if chunk.dst == -1:
                continue
            noun = [
                morph.surface for morph in chunk.morphs if morph.pos == '名詞'
            ]
            if not noun:
                continue
            current = chunk
            paths[j] = []
            while current.dst != -1:
                paths[j].append(current.dst)
                current = chunks[current.dst]
        print(paths)
        for k, l in combinations(paths.keys(), 2):
            print(obtain_path_str(chunks, paths, k, l))
Esempio n. 12
0
def main():
    path = "case_pattern.txt"
    with open(path, "w") as f:
        for chunks in load_cabocha_iter():
            case_patterns = {}
            # {id: [動詞の基本形, [助詞, 助詞,...]]}
            for chunk in chunks:
                if chunk.dst == -1:
                    continue
                particles = [chunk.morphs[-1].surface]
                # particles = [morph.surface for morph in chunk.morphs if morph.pos == '助詞']
                verbs = [
                    morph.base for morph in chunks[chunk.dst].morphs
                    if morph.pos == '動詞'
                ]
                if not particles or not verbs:
                    continue
                if chunk.dst not in case_patterns:
                    case_patterns[chunk.dst] = [verbs[0], particles]
                else:
                    case_patterns[chunk.dst][1].extend(particles)
            for value in case_patterns.values():
                f.write(f'{value[0]}\t{" ".join(sorted(value[1]))}\n')