def write_sa_verb_and_pattern_of_particle_and_morphs_oneline(i, oneline): map_sa_verb_and_pattern_of_particle_and_morphs = {} for i_d, dst in enumerate(oneline): if not NLP43.is_verb(dst.morphs()[0]): continue for i_s in dst.srcs(): chunk = oneline[i_s].morphs() if len(chunk) < 2: continue last_morph_of_src = chunk[-1] second_last_morph_of_src = chunk[-2] if not (is_particle_wo(last_morph_of_src) and is_noun_connected_sa(second_last_morph_of_src)): continue key_base = NLP42.concat_morphs(chunk) + dst.morphs()[0].base() for i_src_of_s in dst.srcs(): if i_src_of_s == i_s: continue c = oneline[i_src_of_s].morphs() c = omit_punctual_from_morphs(c) if len(c) < 1: continue last_morph_of_i_src_of_s = c[-1] if NLP45.is_particle(last_morph_of_i_src_of_s): if key_base in map_sa_verb_and_pattern_of_particle_and_morphs: d = map_sa_verb_and_pattern_of_particle_and_morphs[key_base] d['particles'].append(copy.deepcopy(last_morph_of_i_src_of_s.surface())) d['morphs'].append(copy.deepcopy(c)) else: new_dict = dict(particles = copy.deepcopy([last_morph_of_i_src_of_s.surface()]), morphs = copy.deepcopy([c])) map_sa_verb_and_pattern_of_particle_and_morphs[key_base] = new_dict write_verb_and_pattern_of_particle_and_morphs_result(i, map_sa_verb_and_pattern_of_particle_and_morphs)
def replace_noun_morph_from_chunk_with(chunk, what): temp_morphs = chunk.morphs() new_morphs = list(map(lambda x: Morph(what, what, x.pos(), x.pos1()) if NLP43.is_noun(x) else copy.deepcopy(x), temp_morphs)) for m in chunk.morphs(): pass new_chunk = Chunk(new_morphs, chunk.dst()) for s in chunk.srcs(): new_chunk.append_srcs(s) return new_chunk
def write_verb_and_pattern_of_particle_oneline(i, oneline): map_verb_and_pattern_of_particle = {} for i_d, dst in enumerate(oneline): if not NLP43.is_verb(dst.morphs()[0]): continue particles = [] for i_s in dst.srcs(): last_morph_of_src = oneline[i_s].morphs()[-1] if is_particle(last_morph_of_src): particles.append(last_morph_of_src.surface()) key = dst.morphs()[0].base() if key in map_verb_and_pattern_of_particle: map_verb_and_pattern_of_particle[key] += copy.deepcopy(particles) else: map_verb_and_pattern_of_particle[key] = copy.deepcopy(particles) write_verb_and_pattern_of_particle_result(i, map_verb_and_pattern_of_particle)
def write_verb_and_pattern_of_particle_and_morphs_oneline(i, oneline): map_verb_and_pattern_of_particle_and_morphs = {} for i_d, dst in enumerate(oneline): if not NLP43.is_verb(dst.morphs()[0]): continue particles = [] morphs = [] for i_s in dst.srcs(): last_morph_of_src = oneline[i_s].morphs()[-1] if NLP45.is_particle(last_morph_of_src): particles.append(last_morph_of_src.surface()) morphs.append(oneline[i_s].morphs()) key_base = dst.morphs()[0].base() if key_base in map_verb_and_pattern_of_particle_and_morphs: d = map_verb_and_pattern_of_particle_and_morphs[key_base] d['particles'] += copy.deepcopy(particles) d['morphs'] += copy.deepcopy(morphs) else: new_dict = dict(particles = copy.deepcopy(particles), morphs = copy.deepcopy(morphs)) map_verb_and_pattern_of_particle_and_morphs[key_base] = new_dict write_verb_and_pattern_of_particle_and_morphs_result(i, map_verb_and_pattern_of_particle_and_morphs)
def get_noun_index(i, chunks): noun_index = [] for index, chunk in enumerate(chunks): if NLP43.has_chunk_noun(chunk) and chunk.dst() != -1: noun_index.append(index) return noun_index
def is_noun_connected_sa(m): return NLP43.is_noun(m) and m.pos1().startswith('サ変接続')