def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': 0, 'wd': 0 }) for epoch in range(num_epochs): l_sum, n, startTime = 0.0, 0, time.time() data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx) state = model.begin_state(batch_size=batch_size) for X, Y in data_iter: for s in state: s.detach() with autograd.record(): (output, state) = model(X, state) y = Y.T.reshape((-1, )) l = loss(output, y).meam() l.backward() params = [p.data() for p in model.collect_params().values()] d2l.grad_clipping(params, clipping_theta, ctx) trainer.step(1) l_sum += l.asscalar() * y.size() n += y.size if (epoch + 1) % pred_period == 0: print('epochs %d, perplexity %f, time %0.2f' % (epoch + 1, math.exp(l_sum / n, )))
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): # 定义损失函数 模型初始化 优化函数 loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate':lr, 'momentum':0, 'wd':0}) for epoch in range(num_epochs): l_sum, n, startTime = 0.0, 0, time.time() # get batch of train_data and init data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx) state = model.begin_state(batch_size=batch_size, ctx=ctx) for X,Y in data_iter: for s in state: # TODO 从计算图分离,减少梯度计算随次数的增加而增加 s.detach() with autograd.record(): (output, state) = model(X, state) y = Y.T.reshape((-1, )) # Y(batch_size, num_steps),转置后拉成成为行向量 l = loss(output, y).mean() # 计算平均分类损失 l.backward() # 剪裁梯度 params = [p.data() for p in model.collect_params().values()] d2l.grad_clipping(params, clipping_theta, ctx) trainer.step(1) # 前进一步 l_sum += l.asscalar() * y.size n += y.size if (epoch+1)%pred_period == 0: # 每pred_period打印一次 # TODO math.exp(l_sum/n)困惑度 print('epochs %d, perplexity %f, time %0.2f' % (epoch+1, math.exp(l_sum/n), time.time()-startTime)) for prefix in prefixes: print('-', predict_rnn_gluon(prefix, pred_len, model, vocab_size, ctx, idx_to_char, char_to_idx))
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': 0, 'wd': 0 }) for epoch in range(num_epochs): l_sum, n, startTime = 0.0, 0, time.time() data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx) state = model.begin_state(batch_size=batch_size) for X, Y in data_iter: for s in state: s.detach() with autograd.record(): (output, state) = model(X, state) y = Y.T.reshape((-1, )) l = loss(output, y).meam() l.backward()
def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate':lr, 'momentum':0, 'wd':0}) for epoch in range(num_epochs): l_sum, n, startTime = 0.0, 0, time.time() data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, )
model.initialize(force_reinit=True, ctx=ctx) print(predict_rnn_gluon('分开', 10, model, vocab_size, ctx, idx_to_char, char_to_idx)) def train_and_predit_rnn_gluon(model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): # 定义损失函数 模型初始化 优化函数 loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(force_reinit=True, ctx=ctx, init=init.Normal(sigma=0.01)) trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate':lr, 'momentum':0, 'wd':0}) for epoch in range(num_epochs): l_sum, n, startTime = 0.0, 0, time.time() # data_iter = d2l.data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx) state = model.begin_state(batch_size=batch_size, ctx=ctx) for X,Y in data_iter: for s in state: # TODO 从计算图分离,减少梯度计算随次数的增加而增加 s.detach() with autograd.record(): (output, state) = model(X, state) y = Y.T.reshape((-1, )) # Y(batch_size, num_steps),转置后拉成成为行向量 l = loss(output, y).mean() # 计算平均分类损失 l.backward() # 剪裁梯度 params = [p.data() for p in model.collect_params().values()] d2l.grad_clipping(params, clipping_theta, ctx) trainer.step(1) l_sum += l.asscalar() * y.size
def train_and_predict_rnn_gluon( model, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes, ): loss = gloss.SoftmaxCrossEntropyLoss() model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01)) trainer = gluon.Trainer( model.collect_params(), "sgd", {"learning_rate": lr, "momentum": 0, "wd": 0} ) for epoch in range(num_epochs): l_sum, n, start = 0.0, 0, time.time() data_iter = d2l.data_iter_consecutive( corpus_indices, batch_size, num_steps, ctx ) state = model.begin_state(batch_size=batch_size, ctx=ctx) for X, Y in data_iter: for s in state: s.detach() with autograd.record(): (output, state) = model(X, state) y = Y.T.reshape((-1,)) l = loss(output, y).mean() l.backward() # 梯度裁剪 params = [p.data() for p in model.collect_params().values()] d2l.grad_clipping(params, clipping_theta, ctx) trainer.step(1) # 因为已经误差取过均值,梯度不用再做平均 l_sum += l.asscalar() * y.size n += y.size if (epoch + 1) % pred_period == 0: print( "epoch %d, perplexity %f, time %.2f sec" % (epoch + 1, math.exp(l_sum / n), time.time() - start) ) for prefix in prefixes: print( " -", predict_rnn_gluon( prefix, pred_len, model, vocab_size, ctx, idx_to_char, char_to_idx, ), )