def __init__(self, vocab_size, num_hidden, **kwargs): super(RNNModel, self).__init__(**kwargs) self.rnn = rnn.RNN(num_hidden) self.rnn.initialize() self.vocab_size = vocab_size self.dense = nn.Dense(vocab_size) #output to observation
import d2lzh as d2l from mxnet import autograd, gluon, init, nd from mxnet.gluon import loss as gloss, nn, rnn import math, time (corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics() ################################################# TODO 定义模型 ##################################################### # 构造单隐藏层,个数是256 num_hiddens = 256 rnn_layer = rnn.RNN(num_hiddens) rnn_layer.initialize() batch_size = 2 state = rnn_layer.begin_state(batch_size=batch_size) # 返回初始化隐藏状态列表 print(state[0].shape) # (隐藏层个数,batch_size,num_hiddens) num_steps = 35 X = nd.random.uniform(shape=(num_steps, batch_size, vocab_size)) Y, state_new = rnn_layer(X, state) print(Y.shape, len(state_new), state_new[0].shape) # state_new和state同型 class RNNModel(nn.Block): def __init__(self, rnn_layer, vocab_size, **kwargs): super().__init__(**kwargs) self.rnn = rnn_layer self.vocab_size = vocab_size self.dense = nn.Dense(vocab_size)
for x in inputs: h = nd.tanh(nd.dot(x, self.w_xh) + nd.dot(h, self.w_hh) + self.b_h) y = nd.dot(h, self.w_hq) + self.b_q outputs.append(y) y_hat = nd.concat(*outputs, dim=0) return y_hat, (h, ) if __name__ == '__main__': _corpus_indices, _idx_to_char, _char_to_idx, _vocab_size = \ data_sets.load_jaychou_lyrics("../data/jaychou_lyrics.txt.zip") _num_hidden = 256 _num_steps = 35 _batch_size = 32 _lr = 1e2 use_gluon = False if use_gluon: _rnn_layer = rnn.RNN(_num_hidden) model = BaseRNNGluon(_vocab_size, _idx_to_char, _char_to_idx, _num_hidden, _rnn_layer) else: model = RNNScratch(_vocab_size, _idx_to_char, _char_to_idx, _num_hidden) model.fit(_corpus_indices, _num_steps, { "lr": _lr, "batch_size": _batch_size }, epochs=250)