Beispiel #1
0
from makePickle import pickleLoad

for bun in pickleLoad('outchunk.pickle'):
    if not bun:
        continue
    for chunk in bun:
        if chunk.dst == -1 \
                or '名詞' not in {morph.pos for morph in chunk.morphs} \
                or '動詞' not in {morph.pos for morph in bun[chunk.dst].morphs}:
            # 係り先が無い時 or 係り元に「名詞」を含まない時 or 係り先に「動詞」を含まない時
            continue
        print(''.join(
            [morph.surface
             for morph in chunk.morphs if not morph.pos == '記号']) + '\t' +
              ''.join([
                  morph.surface
                  for morph in bun[chunk.dst].morphs if not morph.pos == '記号'
              ]))
Beispiel #2
0
from makePickle import pickleLoad

input_sentence = pickleLoad('outchunk.pickle')

output_sentence = []
for bun in input_sentence:
    if not bun:  # bunが空([])の時
        continue
    strings_list = []
    for chunks in bun:
        if chunks.dst == -1:  # 係り先が無い時
            continue
        origin = ''.join([
            morph.surface for morph in chunks.morphs if not morph.pos == '記号'
        ])  # 記号を除去
        end = ''.join([
            morph.surface for morph in bun[chunks.dst].morphs
            if not morph.pos == '記号'
        ])  # 上に同じ
        strings_list.append(origin + end)
    string = '\t'.join(strings_list)  # タブ区切りで文字列にする
    output_sentence.append(string)

for sentence in output_sentence:
    print(sentence)
Beispiel #3
0
from makePickle import pickleLoad

lines = pickleLoad('out.pickle')
for l in lines[2]:
    print(l.surface, l.base, l.pos, l.pos1)
Beispiel #4
0
                chunk = Chunk()
                chunk.dst = int(status[2][:-1])  # D抜いてintにキャスト
                temp_dic[chunk.dst].append(index)  # 係り元を集計
            else:
                if 'EOS' in line:  # EOSの時
                    chunk.srcs = temp_dic[index]
                    inner_list.append(chunk)
                    outer_list.append(inner_list[1:])
                    chunk = Chunk()
                    inner_list = []
                    temp_dic = defaultdict(list)
                    index = -1
                else:
                    elements = re.split('[,\t]', line.rstrip())
                    morph = Morph()
                    morph.surface = elements[0]
                    morph.base = elements[7]
                    morph.pos = elements[1]
                    morph.pos1 = elements[2]
                    chunk.morphs.append(morph)
    return outer_list


if __name__ == '__main__':
    chunk8 = pickleLoad('outchunk.pickle')[7]
    df = pd.DataFrame(
        [[''.join([morph.surface for morph in chunk.morphs]),
          str(chunk.dst)] for chunk in chunk8],
        columns=['文節', '係り先'])
    print(df)
Beispiel #5
0
import makePickle as mp
from k40 import create_morph

mp.pickleDump(create_morph(), 'out')
dic = mp.pickleLoad('out.pickle')
print(list(map(lambda x: x.surface, dic[2])))