from makePickle import pickleLoad for bun in pickleLoad('outchunk.pickle'): if not bun: continue for chunk in bun: if chunk.dst == -1 \ or '名詞' not in {morph.pos for morph in chunk.morphs} \ or '動詞' not in {morph.pos for morph in bun[chunk.dst].morphs}: # 係り先が無い時 or 係り元に「名詞」を含まない時 or 係り先に「動詞」を含まない時 continue print(''.join( [morph.surface for morph in chunk.morphs if not morph.pos == '記号']) + '\t' + ''.join([ morph.surface for morph in bun[chunk.dst].morphs if not morph.pos == '記号' ]))
from makePickle import pickleLoad input_sentence = pickleLoad('outchunk.pickle') output_sentence = [] for bun in input_sentence: if not bun: # bunが空([])の時 continue strings_list = [] for chunks in bun: if chunks.dst == -1: # 係り先が無い時 continue origin = ''.join([ morph.surface for morph in chunks.morphs if not morph.pos == '記号' ]) # 記号を除去 end = ''.join([ morph.surface for morph in bun[chunks.dst].morphs if not morph.pos == '記号' ]) # 上に同じ strings_list.append(origin + end) string = '\t'.join(strings_list) # タブ区切りで文字列にする output_sentence.append(string) for sentence in output_sentence: print(sentence)
from makePickle import pickleLoad lines = pickleLoad('out.pickle') for l in lines[2]: print(l.surface, l.base, l.pos, l.pos1)
chunk = Chunk() chunk.dst = int(status[2][:-1]) # D抜いてintにキャスト temp_dic[chunk.dst].append(index) # 係り元を集計 else: if 'EOS' in line: # EOSの時 chunk.srcs = temp_dic[index] inner_list.append(chunk) outer_list.append(inner_list[1:]) chunk = Chunk() inner_list = [] temp_dic = defaultdict(list) index = -1 else: elements = re.split('[,\t]', line.rstrip()) morph = Morph() morph.surface = elements[0] morph.base = elements[7] morph.pos = elements[1] morph.pos1 = elements[2] chunk.morphs.append(morph) return outer_list if __name__ == '__main__': chunk8 = pickleLoad('outchunk.pickle')[7] df = pd.DataFrame( [[''.join([morph.surface for morph in chunk.morphs]), str(chunk.dst)] for chunk in chunk8], columns=['文節', '係り先']) print(df)
import makePickle as mp from k40 import create_morph mp.pickleDump(create_morph(), 'out') dic = mp.pickleLoad('out.pickle') print(list(map(lambda x: x.surface, dic[2])))