def main() -> None: for chunks in load_chunk_cabocha(): for chunk in chunks: if chunk.dst == -1: continue # 名詞と動詞を含む文節のフラグ exist_noun = False exist_verb = False # 現在の文節内に名詞が存在するかの判定 for m in chunk.morphs: if m.pos == "名詞": exist_noun = True # 現在の文節の掛かり先が動詞であるかの判定 for m in chunks[chunk.dst].morphs: if m.pos == "動詞": exist_verb = True # 両方のフラグが True の場合に出力する if exist_noun and exist_verb: src = chunk.no_symbols() dst = chunks[chunk.dst].no_symbols() print(f"{src}\t{dst}")
def get_case_pattern_of_verb() -> None: for sentence in load_chunk_cabocha(): case_pattern = {} # type: Dict[int, Dict[str, List[str]]] # 例 # {1: {'生れる': ['で']}, 4: {'つく': ['か', 'が']}} # {5: {'泣く': ['で']}, 7: {'する': ['て', 'だけ', 'は']}} for chunk in sentence: if chunk.dst == -1: continue # 助詞の探索 pp = [m.surface for m in chunk.morphs if m.pos == "助詞"] # 述語の探索 (助詞の掛かり先から探索する) verbs = [ m.base for m in sentence[chunk.dst].morphs if m.pos == "動詞" ] if verbs == [] or pp == []: continue if chunk.dst not in case_pattern: # 最左の動詞をキーとした助詞のリストを、文節の係り先をキーとして追加する case_pattern[chunk.dst] = {verbs[0]: pp} else: # 既存の場合、助詞のリストを追加する case_pattern[chunk.dst][verbs[0]].extend(pp) for dic in case_pattern.values(): for verb, pp in dic.items(): print(f"{verb}\t{' '.join(sorted(pp))}")
def get_case_frame_of_verb() -> None: for sentence in load_chunk_cabocha(): case_pattern = [] # type: List[CasePattern] # 助詞を含む文節を探索していく for chunk in sentence: if chunk.dst == -1: continue # 助詞の探索 pp = [m.surface for m in chunk.morphs if m.pos == "助詞"] # 述語の探索 (助詞の係り先から探索する) verbs = [ m.base for m in sentence[chunk.dst].morphs if m.pos == "動詞" ] if verbs == [] or pp == []: continue # 文節の係り先と一致する case.id のリスト if [case.id for case in case_pattern if case.id == chunk.dst] == []: # case.id に文節の係り先を保存する case_pattern.append( CasePattern(chunk.dst, verbs[0], pp, chunk.no_symbols())) else: # 同じ述語へ係る格を追加する for case in case_pattern: if case.id == chunk.dst: case.pp.extend(pp) case.chunk.append(chunk.no_symbols()) break
def main() -> None: for chunks in load_chunk_cabocha(): for chunk in chunks: if chunk.dst == -1: continue # Chunk.no_symbols() で記号を取り除いた形式にする src = chunk.no_symbols() dst = chunks[chunk.dst].no_symbols() # src, dst が空でなければ出力 if src != "" and dst != "": print(f"{src} {dst}")
def get_path_from_noun_to_root() -> None: for chunks in load_chunk_cabocha(): for chunk in chunks: if chunk.dst == -1: continue # 文節に名詞が存在しない if not any([m.pos == "名詞" for m in chunk.morphs]): continue dst = chunk.dst phrases = [chunk.no_symbols()] # 係り先 (Chunk.dst) が -1 になるまで名詞からパスを探索する while chunks[dst].dst != -1: phrases.append(chunks[dst].no_symbols()) dst = chunks[dst].dst print(" -> ".join(phrases))
def get_dependency_path_between_nouns() -> None: for chunks in load_chunk_cabocha(): phrase_path = defaultdict(list) # type: PD for i, chunk in enumerate(chunks): if not any(m.pos == "名詞" for m in chunk.morphs): continue if chunk.dst == -1: phrase_path[i] = [] continue current = chunk while current.dst != -1: phrase_path[i].append(current.dst) current = chunks[current.dst] for i, j in combinations(phrase_path.keys(), 2): print(join_phrase(i, j, phrase_path, chunks))
def mining_light_verb_syntax(): for chunks in load_chunk_cabocha(): for i, chunk in enumerate(chunks): if chunk.srcs == []: continue # 動詞の探索 verbs = [morph.base for morph in chunk.morphs if morph.pos == "動詞"] if verbs == []: continue # 助詞を含む文節のリスト phrases_containing_particle = [] for src in chunk.srcs: # 参照先の文節に助詞が一つでも含まれていれば、参照先の文節を追加する if any(morph.pos == "助詞" for morph in chunks[src].morphs): phrases_containing_particle.append(chunks[src]) if phrases_containing_particle == []: continue # 「サ変接続名詞+を」となる文節を探し助詞を含む文節のリストから除去する light_verb = "" for phrase in phrases_containing_particle: for i in range(len(phrase.morphs) - 1): if (phrase.morphs[i].pos1 == "サ変接続" and phrase.morphs[i + 1].surface == "を"): light_verb = f"{phrase.morphs[i].surface}を{verbs[0]}" phrases_containing_particle.remove(phrase) break if light_verb == "": continue # 助詞と文節のリストの作成 particles_and_phrases = [] for phrase in phrases_containing_particle: for morph in phrase.morphs: if morph.pos == "助詞": particles_and_phrases.append( [morph.surface, phrase.no_symbols()]) break particles = [pp[0] for pp in sorted(particles_and_phrases)] phrases = [pp[1] for pp in sorted(particles_and_phrases)] print(f"{light_verb}\t{' '.join(particles)}\t{' '.join(phrases)}")
def main(n=10) -> None: chunks_list = load_chunk_cabocha() visualize_dependency_tree(chunks_list[n])