コード例 #1
0
def parse_chunk_analysis_result(lines):
    lines_list = NLP40.split_into_lines_list(lines)
    map_list = []
    for lines_list_elem in lines_list:
        m = make_chunk_list_from(lines_list_elem)
        map_list.append(m)
        pass
    return map_list
コード例 #2
0
        morphs = []
        for i_s in dst.srcs():
            last_morph_of_src = oneline[i_s].morphs()[-1]
            if NLP45.is_particle(last_morph_of_src):
                particles.append(last_morph_of_src.surface())
                morphs.append(oneline[i_s].morphs())
        key_base = dst.morphs()[0].base()
        if key_base in map_verb_and_pattern_of_particle_and_morphs:
            d = map_verb_and_pattern_of_particle_and_morphs[key_base]
            d['particles'] += copy.deepcopy(particles)
            d['morphs'] += copy.deepcopy(morphs)
        else:
            new_dict = dict(particles = copy.deepcopy(particles), morphs = copy.deepcopy(morphs))
            map_verb_and_pattern_of_particle_and_morphs[key_base] = new_dict
    write_verb_and_pattern_of_particle_and_morphs_result(i, map_verb_and_pattern_of_particle_and_morphs)

def write_verb_and_pattern_of_particle_and_morphs_result(i, map_verb_and_pattern_of_particle_and_morphs):
    #print(i)
    for key_base in map_verb_and_pattern_of_particle_and_morphs:
        d = map_verb_and_pattern_of_particle_and_morphs[key_base]
        particles = d['particles']
        morphs = d['morphs']
        chunk_strings = list(map(lambda x: NLP42.concat_morphs(x), morphs))
        if len(particles) > 0:
            print(key_base, ' '.join(map(lambda x: str(x), particles)), ' '.join(chunk_strings), sep = '\t')

if __name__ == '__main__':
    lines = NLP40.read_kakariuke_analysis_result()
    map_list = NLP41.parse_chunk_analysis_result(lines)
    write_verb_and_pattern_of_particle_and_morphs(map_list)
コード例 #3
0
def parse_morph_from(line):
    elem_list = line.split('\t')
    essence = elem_list[1]
    elements = essence.split(',')
    surface = elem_list[0]
    base = elements[6]
    pos = elements[0]
    pos1 = elements[1]
    return Morph(surface = surface, base = base, pos = pos, pos1 = pos1)

def parse_chunk_info(line):
    m = re.match(r'\*\s+\d+\s+(\d+)D', line)
    if m:
        return int(m.group(1)) #dst
    else:
        return -1

def set_srcs_for_chunk_list(chunk_list):
    for i, chunk in enumerate(chunk_list):
        d = chunk.dst()
        if d is None or d == -1:
            continue
        else:
            d_chunk = chunk_list[d]
            d_chunk.append_srcs(i)

if __name__ == '__main__':
    lines = NLP40.read_kakariuke_analysis_result()
    map_list = parse_chunk_analysis_result(lines)
    NLP40.write_map_list(map_list, [8])