예제 #1
0
 def __init__(self, vocab_size, num_hidden, **kwargs):
     super(RNNModel, self).__init__(**kwargs)
     self.rnn = rnn.RNN(num_hidden)
     self.rnn.initialize()
     self.vocab_size = vocab_size
     self.dense = nn.Dense(vocab_size)  #output to observation
import d2lzh as d2l
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn, rnn
import math, time

(corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()

################################################# TODO 定义模型 #####################################################
# 构造单隐藏层,个数是256
num_hiddens = 256
rnn_layer = rnn.RNN(num_hiddens)
rnn_layer.initialize()

batch_size = 2
state = rnn_layer.begin_state(batch_size=batch_size)        # 返回初始化隐藏状态列表
print(state[0].shape)    # (隐藏层个数,batch_size,num_hiddens)


num_steps = 35
X = nd.random.uniform(shape=(num_steps, batch_size, vocab_size))
Y, state_new = rnn_layer(X, state)
print(Y.shape, len(state_new), state_new[0].shape)          # state_new和state同型


class RNNModel(nn.Block):
    def __init__(self, rnn_layer, vocab_size, **kwargs):
        super().__init__(**kwargs)
        self.rnn = rnn_layer
        self.vocab_size = vocab_size
        self.dense = nn.Dense(vocab_size)
예제 #3
0
        for x in inputs:
            h = nd.tanh(nd.dot(x, self.w_xh) + nd.dot(h, self.w_hh) + self.b_h)
            y = nd.dot(h, self.w_hq) + self.b_q
            outputs.append(y)
        y_hat = nd.concat(*outputs, dim=0)
        return y_hat, (h, )


if __name__ == '__main__':
    _corpus_indices, _idx_to_char, _char_to_idx, _vocab_size = \
        data_sets.load_jaychou_lyrics("../data/jaychou_lyrics.txt.zip")
    _num_hidden = 256
    _num_steps = 35
    _batch_size = 32
    _lr = 1e2
    use_gluon = False
    if use_gluon:
        _rnn_layer = rnn.RNN(_num_hidden)
        model = BaseRNNGluon(_vocab_size, _idx_to_char, _char_to_idx,
                             _num_hidden, _rnn_layer)
    else:
        model = RNNScratch(_vocab_size, _idx_to_char, _char_to_idx,
                           _num_hidden)

    model.fit(_corpus_indices,
              _num_steps, {
                  "lr": _lr,
                  "batch_size": _batch_size
              },
              epochs=250)