パスの開始文節から終了文節に至るまで,各文節の表現を"->"で連結する 「吾輩はここで始めて人間というものを見た」という文(neko.txt.cabochaの8文目)から, 次のような出力が得られるはずである. 吾輩は -> 見た ここで -> 始めて -> 人間という -> ものを -> 見た 人間という -> ものを -> 見た ものを -> 見た ''' from no41 import Chunk, load_cabocha from no43 import search_pos import sys if __name__ == '__main__': infile = open(sys.argv[1], 'rt') sents = load_cabocha(infile) outfile = open(sys.argv[2], 'wt') for sent in sents: sent2 = sent for chunk in sent: noun_chunk = search_pos(chunk, pos="名詞", fmt='chunk') if noun_chunk: path_chunks = [noun_chunk] for current_chunk in sent2: if path_chunks[-1].dst == current_chunk.idx: path_chunks.append(current_chunk) if len(path_chunks) > 1: outfile.write( " -> ".join([node.surface for node in path_chunks]) + "\n")
if (morph.pos == pos and not pos1 and not base)\ or (morph.pos == pos and morph.pos1 == pos1 and not base)\ or (morph.pos == pos and not pos1 and morph.base == base)\ or (morph.pos == pos and morph.pos1 == pos1 and morph.base == base): if fmt == 'bool': return True elif fmt == 'morph': return morph elif fmt == 'chunk': return chunk return False if fmt == 'bool' else None if __name__ == '__main__': f = open(sys.argv[1], 'rt') sents = load_cabocha(f) # import pdb; pdb.set_trace() for sent in sents: sent2 = sent for chunk in sent: for chunk2 in sent2: if chunk.dst == chunk2.idx and search_pos(chunk, "名詞")\ and search_pos(chunk2, "動詞"): surface = "".join([morph.surface for morph in chunk.morphs if morph.pos != "記号"]) surface2 = "".join([morph.surface for morph in chunk2.morphs if morph.pos != "記号"]) print("{}\t{}".format(surface, surface2)) break f.close()