Ejemplo n.º 1
0
パスの開始文節から終了文節に至るまで,各文節の表現を"->"で連結する
「吾輩はここで始めて人間というものを見た」という文(neko.txt.cabochaの8文目)から,
次のような出力が得られるはずである.

吾輩は -> 見た
ここで -> 始めて -> 人間という -> ものを -> 見た
人間という -> ものを -> 見た
ものを -> 見た
'''
from no41 import Chunk, load_cabocha
from no43 import search_pos
import sys

if __name__ == '__main__':
    infile = open(sys.argv[1], 'rt')
    sents = load_cabocha(infile)
    outfile = open(sys.argv[2], 'wt')

    for sent in sents:
        sent2 = sent
        for chunk in sent:
            noun_chunk = search_pos(chunk, pos="名詞", fmt='chunk')
            if noun_chunk:
                path_chunks = [noun_chunk]
                for current_chunk in sent2:
                    if path_chunks[-1].dst == current_chunk.idx:
                        path_chunks.append(current_chunk)
                if len(path_chunks) > 1:
                    outfile.write(
                        " -> ".join([node.surface
                                     for node in path_chunks]) + "\n")
Ejemplo n.º 2
0
            if (morph.pos == pos and not pos1 and not base)\
                or (morph.pos == pos and morph.pos1 == pos1 and not base)\
                or (morph.pos == pos and not pos1 and morph.base == base)\
                or (morph.pos == pos and morph.pos1 == pos1 and morph.base == base):
                if fmt == 'bool':
                    return True
                elif fmt == 'morph':
                    return morph
                elif fmt == 'chunk':
                    return chunk
    return False if fmt == 'bool' else None


if __name__ == '__main__':
    f = open(sys.argv[1], 'rt')
    sents = load_cabocha(f)

    # import pdb; pdb.set_trace()
    for sent in sents:
        sent2 = sent
        for chunk in sent:
            for chunk2 in sent2:
                if chunk.dst == chunk2.idx and search_pos(chunk, "名詞")\
                        and search_pos(chunk2, "動詞"):
                    surface = "".join([morph.surface for morph in chunk.morphs
                                       if morph.pos != "記号"])
                    surface2 = "".join([morph.surface for morph in chunk2.morphs
                                        if morph.pos != "記号"])
                    print("{}\t{}".format(surface, surface2))
                    break
    f.close()