from knock41 import get_sentences if __name__ == "__main__": sentences = get_sentences() chunks = sentences[5] for chunk in chunks: if chunk.morphs[0].pos == "動詞": verb_base = chunk.morphs[0].base particles = [] full_particles = {} for idx in chunk.srcs: for morph in chunks[idx].morphs: if morph.pos == "助詞": particles.append(morph.base) full_particles[idx] = chunks[idx].surface print( f'{verb_base}\t{" ".join(particles)}\t{" ".join(full_particles.values())}' )
from knock41 import get_sentences for sentence in get_sentences(): for chunk in sentence: if chunk.dst != -1: src = chunk.join_surface_wo_symbol() dst = sentence[chunk.dst].join_surface_wo_symbol() if src == '' or dst == '': continue print('{}\t{}'.format(src, dst))
# -*- coding: utf-8 -*- from knock41 import get_sentences # cut -f1 knock47.txt| sort | uniq -c| sort -r | less # cut -f1,2 knock47.txt| sort | uniq -c| sort -r | less for sentence in get_sentences(): for chunk in sentence: if chunk.has_verb(): candidate = list() predicate = None for src_id in chunk.srcs: src_chunk = sentence[src_id] if src_chunk.is_sahen_wo(): predicate = src_chunk.join_surface() + chunk.get_most_left_verb() else: part = src_chunk.get_most_right_particle() if part is not None: candidate.append((part, src_chunk.join_surface())) if predicate is not None and len(candidate) != 0: particles = ' '.join(part for part, ch in sorted(set(candidate))) chunks = ' '.join(ch for part, ch in sorted(set(candidate))) print('{}\t{}\t{}'.format(predicate, particles, chunks))
# -*- coding: utf-8 -*- import sys from graphviz import Digraph from knock41 import get_sentences def sent2graph(sentence): dot = Digraph(format='png') for chunk in sentence: if chunk.dst != -1: dot.node(str(chunk.id), chunk.join_surface()) dot.node(str(chunk.dst), sentence[chunk.dst].join_surface()) dot.edge(str(chunk.id), str(chunk.dst)) dot.render('knock44', cleanup=True) target = int(sys.argv[1]) - 1 for i, sentence in enumerate(get_sentences()): if i == target: sent2graph(sentence) break