for m in self.morphs: if m.pos == pos: return True return False def get_pos(self, pos): res = [] for m in self.morphs: if m.pos == pos: res.append(m) return res if __name__ == '__main__': res = [] for chunks in cabocha_into_chunks(): chunks = tuple(map(Chunk_normalized, chunks.values())) for dc in chunks: if not dc.has_pos('動詞'): continue # 動詞を含む文節において,最左の動詞の基本形を述語とする v_base = dc.get_pos('動詞')[0].base srcs = [] for sc_idx in dc.srcs: # 述語に係る助詞を格とする ms = chunks[sc_idx].get_pos('助詞') if ms: # 文節内に助詞が複数ある場合は最も右のものを選ぶ srcs.append((ms[-1].base, chunks[sc_idx].norm)) if srcs:
def norm(self): clause = ''.join(m.surface for m in self.morphs if m.pos != '記号') return clause def has_pos(self, pos): for m in self.morphs: if m.pos == pos: return True return False if __name__ == '__main__': def input_k(): return int(input('Enter a number (0: exit) -> ')) for k in iter(input_k, 0): for chunks in islice(cabocha_into_chunks(), k - 1, k): chunks = tuple(map(Chunk_normalized, chunks.values())) G = pydot.Dot(graph_type='digraph') for i, c in enumerate(chunks): # 同じ単語の可能性もあるので,id で区別 if c.norm: color = 'red' if c.has_pos('名詞') else 'black' G.add_node(pydot.Node(i, label=c.norm, color=color)) for i, c in enumerate(chunks): if c.dst == -1: continue if c.norm and chunks[c.dst].norm: G.add_edge(pydot.Edge(i, c.dst)) G.write_png(f'./out44_{k}.png')
def get_norm(self): clause = "".join(m.surface for m in self.morphs if m.pos != "記号") return clause def has_pos(self, pos): return any(m.pos == pos for m in self.morphs) def get_pos(self, pos): return (m for m in self.morphs if m.pos == pos) if __name__ == "__main__": res = [] # for chunks in cabocha_into_chunks(): for chunks in islice(cabocha_into_chunks(), 33, 34): # list[33:34] と同様 chunks = {k: ChunkNormalized(v) for k, v in chunks.items()} for cv in filter(lambda c: c.has_pos("動詞"), chunks.values()): # [45-1] 動詞を含む文節において,最左の動詞の基本形を述語とする v_base = next(cv.get_pos("動詞")).base srcs = [] for sc_idx in cv.srcs: # [45-2] 述語に係る助詞を格とする ms = tuple(chunks[sc_idx].get_pos("助詞")) if ms: # 文節内に助詞が複数ある場合は最も右のものを選ぶ # e.g. 「次のように」 # Morph(surface='の', base='の', pos='助詞', pos1='連体化') # Morph(surface='に', base='に', pos='助詞', pos1='副詞化') # if len(ms) != 1: # print(chunks[sc_idx].norm, ms)
def has_pos(self, pos): return any(m.pos == pos for m in self.morphs) def get_pos(self, pos): return (m for m in self.morphs if m.pos == pos) def has_sahen_wo(self): for m1, m2 in zip(self.morphs, self.morphs[1:]): if [m1.pos1, m2.pos, m2.base] == ["サ変接続", "助詞", "を"]: return True return False if __name__ == "__main__": res = [] for chunks in islice(cabocha_into_chunks(), 12, 13): chunks = {k: ChunkNormalized(v) for k, v in chunks.items()} for cv in filter(lambda c: c.has_pos("動詞"), chunks.values()): # [45-1] 動詞を含む文節において,最左の動詞の基本形を述語とする v_base = next(cv.get_pos("動詞")).base srcs = [] # [47-1] 「サ変接続名詞+を(助詞)」で構成される文節が動詞に係る場合のみを # 対象とする failed = True for sc_idx in reversed(cv.srcs): if failed and chunks[sc_idx].has_sahen_wo(): # [47-2] 述語は「サ変接続名詞+を+動詞の基本形」とし, # 文節中に複数の動詞があるときは,最左の動詞を用いる v_base = chunks[sc_idx].norm + v_base failed = False else:
用語を -> 作り出した """ import os import sys from itertools import islice from knock41 import cabocha_into_chunks from knock45 import ChunkNormalized sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) from kiyuna.utils.message import message # noqa: E402 isort:skip if __name__ == "__main__": res = [] # for chunks in cabocha_into_chunks(): for chunks in islice(cabocha_into_chunks(), 33, 34): chunks = {k: ChunkNormalized(v) for k, v in chunks.items()} for c in chunks.values(): if c.dst not in chunks: # 名詞を含まないのパス continue # [48-1] 各文節は(表層形の)形態素列で表現する tmp = [c.norm] # [48-2] パスの開始文節から終了文節に至るまで,各文節の表現を"->"で連結する dst = c.dst while dst in chunks: tmp.append(chunks[dst].norm) dst = chunks[dst].dst res.append(" -> ".join(tmp) + "\n") sys.stdout.writelines(res) message(f"write {len(res)} lines", type="success")