Ejemplo n.º 1
0
def main() -> None:
    for chunks in load_chunk_cabocha():
        for chunk in chunks:
            if chunk.dst == -1:
                continue

            # 名詞と動詞を含む文節のフラグ
            exist_noun = False
            exist_verb = False

            # 現在の文節内に名詞が存在するかの判定
            for m in chunk.morphs:
                if m.pos == "名詞":
                    exist_noun = True

            # 現在の文節の掛かり先が動詞であるかの判定
            for m in chunks[chunk.dst].morphs:
                if m.pos == "動詞":
                    exist_verb = True

            # 両方のフラグが True の場合に出力する
            if exist_noun and exist_verb:
                src = chunk.no_symbols()
                dst = chunks[chunk.dst].no_symbols()
                print(f"{src}\t{dst}")
Ejemplo n.º 2
0
def get_case_pattern_of_verb() -> None:
    for sentence in load_chunk_cabocha():
        case_pattern = {}  # type: Dict[int, Dict[str, List[str]]]
        # 例
        #   {1: {'生れる': ['で']}, 4: {'つく': ['か', 'が']}}
        #   {5: {'泣く': ['で']}, 7: {'する': ['て', 'だけ', 'は']}}
        for chunk in sentence:
            if chunk.dst == -1:
                continue
            # 助詞の探索
            pp = [m.surface for m in chunk.morphs if m.pos == "助詞"]
            # 述語の探索 (助詞の掛かり先から探索する)
            verbs = [
                m.base for m in sentence[chunk.dst].morphs if m.pos == "動詞"
            ]

            if verbs == [] or pp == []:
                continue

            if chunk.dst not in case_pattern:
                # 最左の動詞をキーとした助詞のリストを、文節の係り先をキーとして追加する
                case_pattern[chunk.dst] = {verbs[0]: pp}
            else:
                # 既存の場合、助詞のリストを追加する
                case_pattern[chunk.dst][verbs[0]].extend(pp)

        for dic in case_pattern.values():
            for verb, pp in dic.items():
                print(f"{verb}\t{' '.join(sorted(pp))}")
Ejemplo n.º 3
0
def get_case_frame_of_verb() -> None:
    for sentence in load_chunk_cabocha():
        case_pattern = []  # type: List[CasePattern]
        # 助詞を含む文節を探索していく
        for chunk in sentence:
            if chunk.dst == -1:
                continue
            # 助詞の探索
            pp = [m.surface for m in chunk.morphs if m.pos == "助詞"]
            # 述語の探索 (助詞の係り先から探索する)
            verbs = [
                m.base for m in sentence[chunk.dst].morphs if m.pos == "動詞"
            ]

            if verbs == [] or pp == []:
                continue
            # 文節の係り先と一致する case.id のリスト
            if [case.id for case in case_pattern
                    if case.id == chunk.dst] == []:
                # case.id に文節の係り先を保存する
                case_pattern.append(
                    CasePattern(chunk.dst, verbs[0], pp, chunk.no_symbols()))
            else:
                # 同じ述語へ係る格を追加する
                for case in case_pattern:
                    if case.id == chunk.dst:
                        case.pp.extend(pp)
                        case.chunk.append(chunk.no_symbols())
                        break
Ejemplo n.º 4
0
def main() -> None:
    for chunks in load_chunk_cabocha():
        for chunk in chunks:
            if chunk.dst == -1:
                continue

            # Chunk.no_symbols() で記号を取り除いた形式にする
            src = chunk.no_symbols()
            dst = chunks[chunk.dst].no_symbols()

            # src, dst が空でなければ出力
            if src != "" and dst != "":
                print(f"{src} {dst}")
Ejemplo n.º 5
0
def get_path_from_noun_to_root() -> None:
    for chunks in load_chunk_cabocha():
        for chunk in chunks:
            if chunk.dst == -1:
                continue
            # 文節に名詞が存在しない
            if not any([m.pos == "名詞" for m in chunk.morphs]):
                continue

            dst = chunk.dst
            phrases = [chunk.no_symbols()]
            # 係り先 (Chunk.dst) が -1 になるまで名詞からパスを探索する
            while chunks[dst].dst != -1:
                phrases.append(chunks[dst].no_symbols())
                dst = chunks[dst].dst
            print(" -> ".join(phrases))
Ejemplo n.º 6
0
def get_dependency_path_between_nouns() -> None:
    for chunks in load_chunk_cabocha():
        phrase_path = defaultdict(list)  # type: PD
        for i, chunk in enumerate(chunks):
            if not any(m.pos == "名詞" for m in chunk.morphs):
                continue
            if chunk.dst == -1:
                phrase_path[i] = []
                continue
            current = chunk
            while current.dst != -1:
                phrase_path[i].append(current.dst)
                current = chunks[current.dst]

        for i, j in combinations(phrase_path.keys(), 2):
            print(join_phrase(i, j, phrase_path, chunks))
Ejemplo n.º 7
0
def mining_light_verb_syntax():
    for chunks in load_chunk_cabocha():
        for i, chunk in enumerate(chunks):
            if chunk.srcs == []:
                continue

            # 動詞の探索
            verbs = [morph.base for morph in chunk.morphs if morph.pos == "動詞"]
            if verbs == []:
                continue

            # 助詞を含む文節のリスト
            phrases_containing_particle = []
            for src in chunk.srcs:
                # 参照先の文節に助詞が一つでも含まれていれば、参照先の文節を追加する
                if any(morph.pos == "助詞" for morph in chunks[src].morphs):
                    phrases_containing_particle.append(chunks[src])
            if phrases_containing_particle == []:
                continue

            # 「サ変接続名詞+を」となる文節を探し助詞を含む文節のリストから除去する
            light_verb = ""
            for phrase in phrases_containing_particle:
                for i in range(len(phrase.morphs) - 1):
                    if (phrase.morphs[i].pos1 == "サ変接続"
                            and phrase.morphs[i + 1].surface == "を"):
                        light_verb = f"{phrase.morphs[i].surface}を{verbs[0]}"
                        phrases_containing_particle.remove(phrase)
                        break

            if light_verb == "":
                continue
            # 助詞と文節のリストの作成
            particles_and_phrases = []
            for phrase in phrases_containing_particle:
                for morph in phrase.morphs:
                    if morph.pos == "助詞":
                        particles_and_phrases.append(
                            [morph.surface, phrase.no_symbols()])
                        break

            particles = [pp[0] for pp in sorted(particles_and_phrases)]
            phrases = [pp[1] for pp in sorted(particles_and_phrases)]

            print(f"{light_verb}\t{' '.join(particles)}\t{' '.join(phrases)}")
Ejemplo n.º 8
0
def main(n=10) -> None:
    chunks_list = load_chunk_cabocha()
    visualize_dependency_tree(chunks_list[n])