def main(): out_path = 'Dependency.txt' with open(out_path, "w") as f: for chunks in load_cabocha_iter(): for chunk in chunks: if chunk.dst == -1: continue src = chunk.normalized_surface() dst = chunks[chunk.dst].normalized_surface() f.write(f'{src}\t{dst}\n')
def main(): with open('out3.txt', 'w', encoding='utf8') as f: for chunks in load_cabocha_iter(): for i, chunk in enumerate(chunks): # 係り元のチェック if not chunk.srcs: continue # 動詞の基本形 verbs = [ morph.base for morph in chunk.morphs if morph.pos == '動詞' ] if not verbs: continue # 助詞を含む係り元の文節 particle_phrases = [ chunks[src] for src in chunk.srcs[i] if any(m.pos == '助詞' for m in chunks[src].morphs) ] if not particle_phrases: continue # 「サ変接続名詞 + を」 をチェックしてparticle_phrasesから除く # print([a.normalized_surface() for a in particle_phrases]) predicate = '' for phrase in particle_phrases: for i in range(len(phrase.morphs) - 1): if phrase.morphs[i].pos1 == 'サ変接続' and phrase.morphs[ i + 1].surface == 'を': predicate = f'{phrase.morphs[i].surface}を{verbs[0]}' particle_phrases.remove(phrase) break else: continue break else: continue # [(助詞, 文節), (助詞, 文節),...] particles_phrases = [] for phrase in particle_phrases: for morph in phrase.morphs: if morph.pos == '助詞': particles_phrases.append( (morph.surface, phrase.normalized_surface())) break pp = sorted(particles_phrases) f.write( f'{predicate}\t{" ".join([p[0] for p in pp])}\t{" ".join([p[1] for p in pp])}\n' )
def main(): for chunks in load_cabocha_iter(4): paths = {} # ex) {0: [5], 1: [2, 3, 4, 5], 3: [4, 5], 4: [5]} for id_, chunk in enumerate(chunks): if all(m.pos != '名詞' for m in chunk.morphs): continue current = chunk paths[id_] = [] while current.dst != -1: paths[id_].append(current.dst) current = chunks[current.dst] for k, l in combinations(paths.keys(), 2): print(obtain_path_str(chunks, paths, k, l))
def main(): cnt = 0 for chunks in load_cabocha_iter(): for chunk in chunks: if chunk.dst == -1: continue src = chunk.normalized_surface() dst = chunks[chunk.dst].normalized_surface() if not src or not dst: continue print(f'{src}\t{dst}') cnt += 1 if cnt == 20: return
def main(): out_path = 'Dependency_noun_verb.txt' with open(out_path, "w") as f: for chunks in load_cabocha_iter(): for chunk in chunks: if chunk.dst == -1: continue if all(morph.pos != '名詞' for morph in chunk.morphs): continue if all(morph.pos != '動詞' for morph in chunks[chunk.dst].morphs): continue src = chunk.normalized_surface() dst = chunks[chunk.dst].normalized_surface() f.write(f'{src}\t{dst}\n')
def main(): path = "noun_to_root.txt" count = 0 with open(path, "w") as f: for chunks in load_cabocha_iter(): for chunk in chunks: if chunk.dst == -1: continue if all(m.pos != "名詞" for m in chunk.morphs): continue f.write(chunk.normalized_surface()) i = chunk.dst while i != -1: f.write(" -> " + chunks[i].normalized_surface()) i = chunks[i].dst f.write("\n")
def main(): for i, chunks in enumerate(load_cabocha_iter()): if i != 5: continue for chunk in chunks: if chunk.dst == -1: continue noun = [morph.surface for morph in chunk.morphs if morph.pos == '名詞'] if not noun: continue path = chunk.normalized_surface() # 吾輩は current = chunk while current.dst != -1: current = chunks[current.dst] path += f' -> {current.normalized_surface()}' # 吾輩は -> 見た print(path)
def main(): for i, chunks in enumerate(load_cabocha_iter()): if i != 7: continue edges = [] for j, chunk in enumerate(chunks): if chunk.dst == -1: continue src = chunk.normalized_surface() dst = chunks[chunk.dst].normalized_surface() if not src or not dst: continue edges.append((j, src, chunk.dst, dst)) # グラフの書き出し graph = gen_graph(edges) print(graph.to_string()) graph.write('graph.png', format='png', encoding='utf8')
def main(): with open('out2.txt', 'w', encoding='utf8') as f: for chunks in load_cabocha_iter(): case_patterns = {} # {id: [動詞の基本形, [(助詞, 文節), (助詞, 文節),...]]} for chunk in chunks: if chunk.dst == -1: continue particles = [morph.surface for morph in chunk.morphs if morph.pos == '助詞'] verbs = [morph.base for morph in chunks[chunk.dst].morphs if morph.pos == '動詞'] if not particles or not verbs: continue if chunk.dst not in case_patterns: case_patterns[chunk.dst] = [verbs[0], [(particles[0], chunk.normalized_surface())]] else: case_patterns[chunk.dst][1].append((particles[0], chunk.normalized_surface())) for value in case_patterns.values(): frames = sorted(value[1]) # print(f'{value[0]}\t{" ".join([frame[0] for frame in frames])}\t{" ".join([frame[1] for frame in frames])}') f.write(f'{value[0]}\t{" ".join([frame[0] for frame in frames])}\t{" ".join([frame[1] for frame in frames])}\n')
def main(): path = "case_pattern_2.txt" with open(path, "w") as f: for chunks in load_cabocha_iter(): case_patterns = {} # {id: [動詞の基本形, [助詞, 助詞,...]]} for chunk in chunks: if chunk.dst == -1: continue particles = [[chunk.morphs[-1].surface, chunk.normalized_surface()]] verbs = [morph.base for morph in chunks[chunk.dst].morphs if morph.pos == '動詞'] if not particles or not verbs: continue if chunk.dst not in case_patterns: case_patterns[chunk.dst] = [verbs[0], particles] else: case_patterns[chunk.dst][1].extend(particles) for value in case_patterns.values(): value[1].sort(key=lambda x: x[0]) f.write(f'{value[0]}\t{" ".join([l[0] for l in value[1]])}\t{" ".join([l[1] for l in value[1]])}\n')
def main(): for i, chunks in enumerate(load_cabocha_iter()): if i != 5: continue paths = {} # ex) {0: [5], 1: [2, 3, 4, 5], 3: [4, 5], 4: [5]} for j, chunk in enumerate(chunks): if chunk.dst == -1: continue noun = [ morph.surface for morph in chunk.morphs if morph.pos == '名詞' ] if not noun: continue current = chunk paths[j] = [] while current.dst != -1: paths[j].append(current.dst) current = chunks[current.dst] print(paths) for k, l in combinations(paths.keys(), 2): print(obtain_path_str(chunks, paths, k, l))
def main(): path = "case_pattern.txt" with open(path, "w") as f: for chunks in load_cabocha_iter(): case_patterns = {} # {id: [動詞の基本形, [助詞, 助詞,...]]} for chunk in chunks: if chunk.dst == -1: continue particles = [chunk.morphs[-1].surface] # particles = [morph.surface for morph in chunk.morphs if morph.pos == '助詞'] verbs = [ morph.base for morph in chunks[chunk.dst].morphs if morph.pos == '動詞' ] if not particles or not verbs: continue if chunk.dst not in case_patterns: case_patterns[chunk.dst] = [verbs[0], particles] else: case_patterns[chunk.dst][1].extend(particles) for value in case_patterns.values(): f.write(f'{value[0]}\t{" ".join(sorted(value[1]))}\n')