Ejemplo n.º 1
0
def get_pos_idx(chunks, pos):
    '''return indexes of chunks having the pos'''
    idxes = []
    for i, chunk in enumerate(chunks):
        if search_pos(chunk, pos):
            idxes.append(i)
    return idxes
Ejemplo n.º 2
0
吾輩は -> 見た
ここで -> 始めて -> 人間という -> ものを -> 見た
人間という -> ものを -> 見た
ものを -> 見た
'''
from no41 import Chunk, load_cabocha
from no43 import search_pos
import sys

if __name__ == '__main__':
    infile = open(sys.argv[1], 'rt')
    sents = load_cabocha(infile)
    outfile = open(sys.argv[2], 'wt')

    for sent in sents:
        sent2 = sent
        for chunk in sent:
            noun_chunk = search_pos(chunk, pos="名詞", fmt='chunk')
            if noun_chunk:
                path_chunks = [noun_chunk]
                for current_chunk in sent2:
                    if path_chunks[-1].dst == current_chunk.idx:
                        path_chunks.append(current_chunk)
                if len(path_chunks) > 1:
                    outfile.write(
                        " -> ".join([node.surface
                                     for node in path_chunks]) + "\n")
    infile.close()
    outfile.close()
Ejemplo n.º 3
0
見る    は を   吾輩は ものを
'''

from no41 import Chunk, load_cabocha
from no43 import search_pos
import sys

if __name__ == '__main__':
    infile = open(sys.argv[1], 'rt')
    sents = load_cabocha(infile)
    outfile = open(sys.argv[2], 'wt')
    # import pdb; pdb.set_trace()
    for sent in sents:
        sent2 = sent
        for chunk in sent:
            predicate = search_pos(chunk, "動詞", fmt="morph")
            if predicate:
                particles = []
                chunk2s = []
                for chunk2 in sent2:
                    particle = search_pos(chunk2, "助詞", fmt="morph")
                    if particle and chunk2.dst == chunk.idx:
                        particles.append(particle)
                        chunk2s.append(chunk2)
                if particles:
                    particles = " ".join(
                        [particle.base for particle in particles])
                    arguments = " ".join([c.surface for c in chunk2s])
                    outfile.write("{}\t{}\t{}\n".format(
                        predicate.base, particles, arguments))
    infile.close()
Ejemplo n.º 4
0
・コーパス中で頻出する述語と助詞パターン
'''
from no41 import Chunk, load_cabocha
from no43 import search_pos
import sys

if __name__ == '__main__':
    infile = open(sys.argv[1], 'rt')
    sents = load_cabocha(infile)
    outfile = open(sys.argv[2], 'wt')
    # import pdb; pdb.set_trace()
    for sent in sents:
        sent2 = sent
        for chunk in sent:
            # want most left verb in chunk
            predicate = search_pos(chunk, pos="動詞", fmt='morph')
            if predicate:
                particles = []
                chunk2s = []
                for chunk2 in sent2:
                    # want most right particle in chunk
                    particle_chunk = search_pos(chunk2, "助詞", fmt='chunk')
                    if particle_chunk and chunk2.dst == chunk.idx:
                        particle = [
                            morph for morph in particle_chunk.morphs
                            if morph.pos == '助詞'
                        ][-1]
                        particles.append(particle)
                        chunk2s.append(chunk2)
                wo_case_chunk = search_pos(chunk2s,
                                           pos="助詞",