Python SimpleRnnlm 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: simple_rnnlm

클래스/타입: SimpleRnnlm

hotexamples.com에서의 예제들: 4

Python SimpleRnnlm - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 simple_rnnlm.SimpleRnnlm에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

SimpleRnnlm(3)

backward(3)

forward(3)

save_params(1)

예제 #1

파일 보기

def main():
    batch_size = 10
    wordvec_size = 100
    hidden_size = 100
    time_size = 5
    lr = 0.1
    max_epoch = 100

    corpus, word_to_id, id_to_word = ptb.load_data('train')
    corpus_size = 1000
    corpus = corpus[:1000]
    vocab_size = int(max(corpus) + 1)

    xs = corpus[:-1]
    ts = corpus[1:]
    data_size = len(xs)
    print(f'corpus size: {corpus_size}, vocabulary size: {vocab_size}')

    max_iters = data_size // (batch_size + time_size)
    time_idx = 0
    total_loss = 0
    loss_count = 0
    ppl_list = []

    model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)

    jump = (corpus_size - 1) // batch_size
    offsets = [i * jump for i in range(batch_size)]

    for epoch in range(1, max_epoch + 1):
        for iter_ in range(max_iters):
            batch_x = np.empty((batch_size, time_size), dtype=int)
            batch_t = np.empty((batch_size, time_size), dtype=int)
            for t in range(time_size):
                for i, offset in enumerate(offsets):
                    batch_x[i, t] = xs[(offset + time_idx) % data_size]
                    batch_t[i, t] = xs[(offset + time_idx) % data_size]
                time_idx += 1

            loss = model.forward(batch_x, batch_t)
            model.backward()
            optimizer.update(model.params, model.grads)
            total_loss += loss
            loss_count += 1

        ppl = np.exp(total_loss / loss_count)
        print(f'| epoch {epoch} | perplexity {ppl}')
        ppl_list.append(float(ppl))
        total_loss, loss_count = 0, 0
    print('DONE')

예제 #2

파일 보기

vocab_size = int(max(corpus) + 1)

xs = corpus[:-1]  # input
ts = corpus[1:]  # output(answer)
data_size = len(xs)
print(f'Corpus Size: {corpus_size}, Number of Vocab: {vocab_size}')

# variables for learning
max_iters = data_size // (batch_size * time_size)
time_idx = 0
total_loss = 0
loss_count = 0
ppl_list = []

# init model
model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
optimizer = SGD(lr)

# calculate start reading point of each sample of mini batch
jump = (corpus_size - 1) // batch_size
offsets = [i * jump for i in range(batch_size)]

for epoch in range(max_epoch):
    for iter in range(max_iters):
        # get mini batch
        batch_x = np.empty((batch_size, time_size), dtype='i')
        batch_t = np.empty((batch_size, time_size), dtype='i')
        for t in range(time_size):
            for i, offset in enumerate(offsets):
                batch_x[i, t] = xs[(offset + time_idx) % data_size]
                batch_t[i, t] = ts[(offset + time_idx) % data_size]

예제 #3

파일 보기

파일: train.py 프로젝트: rydeen7/deep-learning-from-scratch-2

import sys, os
sys.path.append(os.pardir)
from common.optimizer import SGD
from common.trainer import RnnlmTrainer
from dataset import ptb
from simple_rnnlm import SimpleRnnlm

# ハイパーパラメータの設定
batch_size = 10
wordvec_size = 100
hidden_size = 100  # RNNの隠れ状態ベクトルの要素数
time_size = 5  # RNNを展開するサイズ
lr = 0.1
max_epoch = 100

# 学習データの読み込み
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_size = 1000  # テスト用にデータセットを小さくする
corpus = corpus[:corpus_size]
vocab_size = int(max(corpus) + 1)
xs = corpus[:-1]  # 入力
ts = corpus[1:]  # 出力（教師ラベル）

# モデルの生成
model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
optimizer = SGD(lr)
trainer = RnnlmTrainer(model, optimizer)

trainer.fit(xs, ts, max_epoch, batch_size, time_size)
trainer.plot()

예제 #4

파일 보기

파일: train_custom_loop.py 프로젝트: inoueshinichi/DeepLearning2_NLP

def main():
    # ハイパーパラメータの設定
    batch_size = 10
    wordvec_size  = 100
    hidden_size  = 500
    time_size = 5 # Truncated BPTTの展開する時間サイズ
    lr = 0.1
    max_epoch = 100

    # 学習データセットの読み込み
    corpus, word_to_id, id_to_word = ptb.load_data('train')
    corpus_size  =1000
    corpus = corpus[:corpus_size]
    vocab_size = int(max(corpus) + 1)

    print("corpus: ", corpus)
    print("corpus size: ", len(corpus))

    # 入力
    xs = corpus[:-1]
    ts = corpus[1:] # 教師ラベル = 入力の次の単語ID
    print("corpus size: %d, vocabulary size: %d" % (corpus_size, vocab_size))
    data_size = len(xs)
    print("data_size: ", data_size)

    # 学習時に使用する変数
    max_iters = data_size // (batch_size * time_size)
    time_idx = 0
    total_loss = 0
    loss_count  =0
    ppl_list = []

    # モデルの生成
    model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)

    # (1) ミニバッチの各サンプルの読み込み開始位置を計算
    jump = (corpus_size - 1) // batch_size # e.g. corpus_size=1000, batch_size=20 -> (1000-1)//20 -> 49
    print("jump: ", jump)
    offsets = [i * jump for i in range(batch_size)] # batch_size=20, jump=49 -> [0, 49, 98, ..., 931]
    print("offsets: ", offsets)

    # 学習
    for epoch in range(max_epoch):
        for iter in range(max_iters):

            # (2) ミニバッチの取得
            batch_x = np.empty((batch_size, time_size), dtype=np.int32)
            batch_t = np.empty((batch_size, time_size), dtype=np.int32)
            for t in range(time_size):
                for i, offset in enumerate(offsets):
                    print(f"offset: {offset}, time_idx: {time_idx} -> {(offset + time_idx) % data_size}")
                    batch_x[i, t] = xs[(offset + time_idx) % data_size]
                    batch_t[i, t] = ts[(offset + time_idx) % data_size]
                time_idx += 1 # 0リセットされないので、iterループ毎にcorpusのoffsetからのズレを変更している.
            
            # 勾配を求め、パラメータを更新
            loss = model.forward(batch_x, batch_t)
            model.backward()
            optimizer.update(model.params, model.grads)
            total_loss += loss
            loss_count += 1

        # (3) エポック毎にパープレキシティの評価
        ppl = np.exp(total_loss / loss_count)
        print('| epoch %d | perplexity %.2f' % (epoch+1, ppl))
        ppl_list.append(float(ppl))
        total_loss, loss_count = 0, 0

    # グラフの描画
    x = np.arange(len(ppl_list))
    plt.plot(x, ppl_list, label='train')
    plt.xlabel('epochs')
    plt.ylabel('perplexity')
    plt.show()