Exemple #1
0
## 왜 입력 데이터를 반전시키는 것만으로 학습의 진행이 빨라지고 정확도가 향상되는 것일까?
* 직관적으로는 기울기 전파가 원활해지기 때문이라고 생각됨 
"""

import numpy as np
import sys
sys.path.append('../../')
from myutils.seq2seq import Seq2seq
from myutils.optimizer import Adam
from myutils.trainer import Trainer
from seq_dataset import load_data, get_vocab

#%%
# read additon dataset
(x_train, t_train), (x_test, t_test) = load_data('addition.txt')
## seq2seq 성능 개선 : 입력 데이터 반전(Reverse)
x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]  #*****************

char_to_id, id_to_char = get_vocab()
print(x_train.shape, t_train.shape, x_test.shape,
      t_test.shape)  # (45000,7) (45000,5) (5000,7) (5000,5)
print('vocab_size:', len(id_to_char))  # 13 : 0~9, +, _, ' '

#%%
# Setting hyperparameters
vocab_size = len(char_to_id)
wordvec_size = 16
hidden_size = 128
batch_size = 128
max_epoch = 25
@author: shkim
"""
"""
# Attension 시각화(Visualization)
"""
#%%
import sys
sys.path.append('..')
import numpy as np
from seq_dataset import load_data, get_vocab
import matplotlib.pyplot as plt
from myutils.attention_seq2seq import AttentionSeq2seq

#%%
(x_train, t_train), (x_test, t_test) = load_data('date.txt')
char_to_id, id_to_char = get_vocab()

# 입력 문장 반전
x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]

vocab_size = len(char_to_id)  # 59
wordvec_size = 16
hidden_size = 256

model = AttentionSeq2seq(vocab_size, wordvec_size, hidden_size)
model.load_params('AttentionSeq2seq-ep10.pkl')

#%%
_idx = 0
"""
## 시계열 데이터 변환을 위한 Toy Example 
* 덧셈(addition) 계산 문제 --> Question & Answering Sentence
* dataset : addition.txt --> seq_dataset.py
"""

#%%
"""
## 시계열 데이터 변환용 덧셈 Toy Dataset 살펴보기
* 덧셈 학습 데이터 : addition.txt --> 5만개의 덧셈 학습 데이터(문제와 답)
"""

from seq_dataset import load_data, get_vocab

#%%
(x_train, t_train), (x_test, t_test) = load_data('addition.txt', seed=2020)
char_to_id, id_to_char = get_vocab()

print('x_train.shape:', x_train.shape, 't_train.shape:',
      t_train.shape)  # (45000,7),(45000,5)
print('x_test.shape:', x_test.shape, 't_test.shape:',
      t_test.shape)  # (5000,7),(5000,5)

print(x_train[0])  # [ 0  7  2 11 11 12  5]
print(t_train[0])  # [ 6  7  9 10  5]
# print('5(%c)' % id_to_char[5])  # 5( )
# print('6(%c)' % id_to_char[6])  # 6(_)

print('x_train[0]-->', ''.join([id_to_char[c] for c in x_train[0]]))  # 19+884
print('t_train[0]-->', ''.join([id_to_char[c] for c in t_train[0]]))  # _903