def parse_chunk_analysis_result(lines): lines_list = NLP40.split_into_lines_list(lines) map_list = [] for lines_list_elem in lines_list: m = make_chunk_list_from(lines_list_elem) map_list.append(m) pass return map_list
morphs = [] for i_s in dst.srcs(): last_morph_of_src = oneline[i_s].morphs()[-1] if NLP45.is_particle(last_morph_of_src): particles.append(last_morph_of_src.surface()) morphs.append(oneline[i_s].morphs()) key_base = dst.morphs()[0].base() if key_base in map_verb_and_pattern_of_particle_and_morphs: d = map_verb_and_pattern_of_particle_and_morphs[key_base] d['particles'] += copy.deepcopy(particles) d['morphs'] += copy.deepcopy(morphs) else: new_dict = dict(particles = copy.deepcopy(particles), morphs = copy.deepcopy(morphs)) map_verb_and_pattern_of_particle_and_morphs[key_base] = new_dict write_verb_and_pattern_of_particle_and_morphs_result(i, map_verb_and_pattern_of_particle_and_morphs) def write_verb_and_pattern_of_particle_and_morphs_result(i, map_verb_and_pattern_of_particle_and_morphs): #print(i) for key_base in map_verb_and_pattern_of_particle_and_morphs: d = map_verb_and_pattern_of_particle_and_morphs[key_base] particles = d['particles'] morphs = d['morphs'] chunk_strings = list(map(lambda x: NLP42.concat_morphs(x), morphs)) if len(particles) > 0: print(key_base, ' '.join(map(lambda x: str(x), particles)), ' '.join(chunk_strings), sep = '\t') if __name__ == '__main__': lines = NLP40.read_kakariuke_analysis_result() map_list = NLP41.parse_chunk_analysis_result(lines) write_verb_and_pattern_of_particle_and_morphs(map_list)
def parse_morph_from(line): elem_list = line.split('\t') essence = elem_list[1] elements = essence.split(',') surface = elem_list[0] base = elements[6] pos = elements[0] pos1 = elements[1] return Morph(surface = surface, base = base, pos = pos, pos1 = pos1) def parse_chunk_info(line): m = re.match(r'\*\s+\d+\s+(\d+)D', line) if m: return int(m.group(1)) #dst else: return -1 def set_srcs_for_chunk_list(chunk_list): for i, chunk in enumerate(chunk_list): d = chunk.dst() if d is None or d == -1: continue else: d_chunk = chunk_list[d] d_chunk.append_srcs(i) if __name__ == '__main__': lines = NLP40.read_kakariuke_analysis_result() map_list = parse_chunk_analysis_result(lines) NLP40.write_map_list(map_list, [8])