from knock41 import Chunk, cabocha_chunk_data import collections with open('../data/neko.txt.cabocha', 'r') as data_in: with open('result/knock46.txt', 'w') as data_out: for i, line in enumerate(cabocha_chunk_data(data_in)): print('[{}行目]'.format(i + 1), file=data_out) for phrase in line: verb_pattern = '' temp_src = collections.defaultdict(lambda: '') if '動詞' in phrase.get_phrase_pos(): verb_pattern += phrase.get_phrase_list()[ phrase.get_phrase_pos().index('動詞')] + '\t' for src in phrase.srcs: if '助詞' in line[src].get_phrase_pos(): if '格助詞' in line[src].get_phrase_pos1(): temp_index = max([ k for k, y in enumerate( line[src].get_phrase_pos1()) if y == '格助詞' ]) temp_src[line[src].get_phrase_txt( )] = line[src].get_phrase_list()[temp_index] else: temp_index = max([ k for k, y in enumerate( line[src].get_phrase_pos()) if y == '助詞' ]) temp_src[line[src].get_phrase_txt( )] = line[src].get_phrase_list()[temp_index]
from knock41 import Chunk, cabocha_chunk_data if __name__ == '__main__': with open('../data/neko.txt.cabocha', 'r') as data_in: with open('result/knock42.txt', 'w') as data_out: for line in cabocha_chunk_data(data_in): for phrase in line: if phrase.get_phrase_txt() == '' or line[ phrase.dst].get_phrase_txt() == '': continue if phrase.dst == -1: if phrase.srcs == []: data_out.write(phrase.get_phrase_txt() + '\n') else: continue else: data_out.write(phrase.get_phrase_txt() + '\t\t' + line[phrase.dst].get_phrase_txt() + '\n') data_out.write('\n')