コード例 #1
0
ファイル: 3.cbow_improve.py プロジェクト: youngick/ai-rnn
sys.path.append('../../')
from myutils.trainer import Trainer
from myutils.optimizer import Adam
from myutils.cbow import CBOW
from myutils.util import create_contexts_target
from ptb_dataset import load_data

#%%
# hyper-parameter setting
window_size = 5
hidden_size = 100
batch_size = 100
max_epoch = 20

# Data Read
corpus, word_to_id, id_to_word = load_data('train')
vocab_size = len(word_to_id)
print(corpus.shape, vocab_size)  # (929589,)   10000

#%%
# Create contexts and target
contexts, target = create_contexts_target(corpus, window_size)
print(contexts.shape, target.shape)  # (929579, 10) (929579,)

#%%
# Create Model
model = CBOW(vocab_size, hidden_size, window_size, corpus)
optimizer = Adam()
trainer = Trainer(model, optimizer)

#%%
コード例 #2
0
from myutils.time_layers import BetterLstmLm
from ptb_dataset import load_data

#%%
# Setting Hyperparameters
batch_size = 20
wordvec_size = 650
hidden_size = 650  # LSTM 은닉 상태 벡터의 원소 수
time_size = 35  # Truncated BPTT가 한번에 펼치는 시간 크기 
lr = 20.0  
max_epoch = 10  # 12hr
max_grad = 0.25
dropout = 0.5

# Read Taining Data
corpus, word_to_id, id_to_word = load_data('train')
corpus_val, _, _ = load_data('val')
corpus_test, _, _ = load_data('test')
vocab_size = len(word_to_id)

xs = corpus[:-1]  # input
ts = corpus[1:]  # label
data_size = len(xs)
print('말뭉치 크기:%d, 어휘수:%d, 입력 데이터 크기:%d' % 
      (len(corpus), vocab_size, data_size))  # 929589, 10000, 929588

#%%
# Create Model
model = BetterLstmLm(vocab_size, wordvec_size, hidden_size, dropout)
optimizer = SGD(lr)
trainer = RnnlmTrainer(model, optimizer)