コード例 #1
0
from knock41 import get_sentences

if __name__ == "__main__":
    sentences = get_sentences()
    chunks = sentences[5]
    for chunk in chunks:
        if chunk.morphs[0].pos == "動詞":
            verb_base = chunk.morphs[0].base
            particles = []
            full_particles = {}
            for idx in chunk.srcs:
                for morph in chunks[idx].morphs:
                    if morph.pos == "助詞":
                        particles.append(morph.base)
                        full_particles[idx] = chunks[idx].surface
            print(
                f'{verb_base}\t{" ".join(particles)}\t{" ".join(full_particles.values())}'
            )
コード例 #2
0
ファイル: knock42.py プロジェクト: tmu-nlp/100knock2016
from knock41 import get_sentences

for sentence in get_sentences():
    for chunk in sentence:
        if chunk.dst != -1:
            src = chunk.join_surface_wo_symbol()
            dst = sentence[chunk.dst].join_surface_wo_symbol()
            if src == '' or dst == '':
                continue
            print('{}\t{}'.format(src, dst))
コード例 #3
0
ファイル: knock47.py プロジェクト: tmu-nlp/100knock2016
# -*- coding: utf-8 -*-

from knock41 import get_sentences
# cut -f1 knock47.txt| sort | uniq -c| sort -r | less
# cut -f1,2 knock47.txt| sort | uniq -c| sort -r | less

for sentence in get_sentences():
    for chunk in sentence:
        if chunk.has_verb():
            candidate = list()
            predicate = None
            for src_id in chunk.srcs:
                src_chunk = sentence[src_id]
                if src_chunk.is_sahen_wo():
                    predicate = src_chunk.join_surface() + chunk.get_most_left_verb()
                else:
                    part = src_chunk.get_most_right_particle()
                    if part is not None:
                        candidate.append((part, src_chunk.join_surface()))
            if predicate is not None and len(candidate) != 0:
                particles = ' '.join(part for part, ch in sorted(set(candidate)))
                chunks = ' '.join(ch for part, ch in sorted(set(candidate)))
                print('{}\t{}\t{}'.format(predicate, particles, chunks))
コード例 #4
0
ファイル: knock44.py プロジェクト: tmu-nlp/100knock2016
# -*- coding: utf-8 -*-

import sys
from graphviz import Digraph
from knock41 import get_sentences


def sent2graph(sentence):
    dot = Digraph(format='png')
    for chunk in sentence:
        if chunk.dst != -1:
            dot.node(str(chunk.id), chunk.join_surface())
            dot.node(str(chunk.dst), sentence[chunk.dst].join_surface())
            dot.edge(str(chunk.id), str(chunk.dst))
    dot.render('knock44', cleanup=True)


target = int(sys.argv[1]) - 1
for i, sentence in enumerate(get_sentences()):
    if i == target:
        sent2graph(sentence)
        break