def train_cpu(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None): for epoch in range(1, num_epochs + 1): train_l_sum = 0 train_acc_sum = 0 for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() if trainer is None: gb.sgd(params, lr, batch_size) else: trainer.step(batch_size) train_l_sum += l.mean().asscalar() train_acc_sum += accuracy(y_hat, y) test_acc = evaluate_accuracy(test_iter, net) print("epoch %d, loss %.4f, train acc %.3f, test acc %.3f" % (epoch, train_l_sum / len(train_iter), train_acc_sum / len(train_iter), test_acc))
def train_and_predict_rnn(rnn, is_random_iter, num_epochs, num_steps, num_hiddens, lr, clipping_theta, batch_size, vocab_size, pred_period, pred_len, prefixes, get_params, get_inputs, ctx, corpus_indices, idx_to_char, char_to_idx, is_lstm=False): if is_random_iter: data_iter = data_iter_random else: data_iter = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): # 如使用相邻采样,隐藏变量只需在该 epoch 开始时初始化。 if not is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) train_l_sum = nd.array([0], ctx=ctx) train_l_cnt = 0 for X, Y in data_iter(corpus_indices, batch_size, num_steps, ctx): # 如使用随机采样,读取每个随机小批量前都需要初始化隐藏变量。 if is_random_iter: state_h = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) if is_lstm: state_c = nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx) # 如使用相邻采样,需要使用 detach 函数从计算图分离隐藏状态变量。 else: state_h = state_h.detach() if is_lstm: state_c = state_c.detach() with autograd.record(): # outputs 形状:(batch_size, vocab_size)。 if is_lstm: outputs, state_h, state_c = rnn( get_inputs(X, vocab_size), state_h, state_c, *params) else: outputs, state_h = rnn( get_inputs(X, vocab_size), state_h, *params) # 设 t_ib_j 为时间步 i 批量中的元素 j: # y 形状:(batch_size * num_steps,) # y = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]。 y = Y.T.reshape((-1,)) # 拼接 outputs,形状:(batch_size * num_steps, vocab_size)。 outputs = nd.concat(*outputs, dim=0) l = loss(outputs, y) l.backward() # 裁剪梯度。 grad_clipping(params, state_h, Y, clipping_theta, ctx) gb.sgd(params, lr, 1) train_l_sum = train_l_sum + l.sum() train_l_cnt += l.size if epoch % pred_period == 0: print('\nepoch %d, perplexity %f' % (epoch, (train_l_sum / train_l_cnt).exp().asscalar())) for prefix in prefixes: print(' - ', predict_rnn( rnn, prefix, pred_len, params, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx, get_inputs, is_lstm))
def train_ch3(net, train_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None): for epoch in range(num_epochs): train_l_sum = 0 train_acc_sum = 0 for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() if trainer is None: gb.sgd(params, lr, batch_size) else: trainer.step(batch_size) train_l_sum += l.mean().asscalar() train_acc_sum += accuracy(y_hat, y) print('epoch %d, loss %.4f, train acc %.3f' % (epoch + 1, train_l_sum / len(train_iter), train_acc_sum / len(train_iter)))
def train_ch3(net, train_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None): for epoch in range(num_epochs): if epoch % 100 == 0: trainer.set_learning_rate(trainer.learning_rate * 0.5) print(trainer.learning_rate) train_acc_sum = 0 train_acc_sum1 = 0 train_acc_sum2 = 0 train_acc_sum3 = 0 flag = 0 train_l_sum = 0 train_acc_sum = 0 for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() if trainer is None: gb.sgd(params, lr, batch_size) else: trainer.step(batch_size) train_l_sum += l.mean().asscalar() if epoch == (num_epochs - 1): labels[flag:flag + len(y)] = y labels_test[flag:flag + len(y)] = y_hat flag = flag + len(y) train_acc_sum += accuracy(y_hat, y, 0.51) train_acc_sum1 += accuracy_sum(y_hat, y, 1.5) train_acc_sum2 += accuracy_sum(y_hat, y, 3.0) train_acc_sum3 += accuracy_sum(y_hat, y, 4.5) print( 'epoch %d, loss %.4f, train_acc %f, train_acc1 %f, train_acc2 %f, train_acc3 %f' % (epoch + 1, train_l_sum / len(train_iter), train_acc_sum / data_num, train_acc_sum1 / data_num, train_acc_sum2 / data_num, train_acc_sum3 / data_num))
def train_batch(X, y, gpu_params, ctx, lr): gpu_Xs = split_and_load(X, ctx) gpu_ys = split_and_load(y, ctx) with autograd.record(): ls = [ loss(lenet(gpu_X, gpu_W), gpu_y) for gpu_X, gpu_y, gpu_W in zip(gpu_Xs, gpu_ys, gpu_params) ] for l in ls: l.backward() for i in range(len(gpu_params[0])): allreduce([gpu_params[c][i].grad for c in range(len(ctx))]) for params in gpu_params: gb.sgd(params, lr, X.shape[0])
def train_ch3(net, train_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None): for epoch in range(num_epochs): train_acc_sum = 0 train_acc_sum1 = 0 train_acc_sum2 = 0 train_acc_sum3 = 0 flag = 0 train_l_sum = 0 train_acc_sum = 0 for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() if trainer is None: gb.sgd(params, lr, batch_size) else: trainer.step(batch_size) train_l_sum += l.mean().asscalar() if epoch % 10 == 0: np.savetxt('y_hat' + str(flag) + '.txt', y_hat.asnumpy(), fmt='%f') np.savetxt('y' + str(flag) + '.txt', y.asnumpy(), fmt='%f') flag = flag + 1 train_acc_sum += accuracy(y_hat, y, 0.51) train_acc_sum1 += accuracy_sum(y_hat, y, 0.51) train_acc_sum2 += accuracy_sum(y_hat, y, 1.01) train_acc_sum3 += accuracy_sum(y_hat, y, 1.51) print( 'epoch %d, loss %.4f, train_acc %f, train_acc1 %f, train_acc2 %f, train_acc3 %f' % (epoch + 1, train_l_sum / len(train_iter), train_acc_sum / data_num, train_acc_sum1 / data_num, train_acc_sum2 / data_num, train_acc_sum3 / data_num))
def fit_and_plot(lambd): w, b = params = init_params() train_ls = [] test_ls = [] for _ in range(num_epoch): for X, y in train_iter: with autograd.record(): l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l.backward() gb.sgd(params, lr, batch_size) train_ls.append( loss(net(train_feature, w, b), train_labels).mean().asscalar()) test_ls.append( loss(net(test_feature, w, b), test_labels).mean().asscalar()) gb.semilogy(range(1, num_epoch + 1), train_ls, 'epoch', 'loss', range(1, num_epoch + 1), test_ls, ['train', 'test']) return w[:10].T, b
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens, vocab_size, ctx, corpus_indices, idx_to_char, char_to_idx, is_random_iter, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes): if is_random_iter: data_iter_fn = data_iter_random else: data_iter_fn = data_iter_consecutive params = get_params() loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(num_epochs): if not is_random_iter: state = init_rnn_state(batch_size, num_hiddens, ctx) data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx) loss_sum = 0.0 start = time.time() for t, (X, Y) in enumerate(data_iter): if is_random_iter: state = init_rnn_state(batch_size, num_hiddens, ctx) else: for s in state: s.detach() with autograd.record(): inputs = to_onehot(X, vocab_size) outputs, state = rnn(inputs, state, params) outputs = nd.concat(*outputs, dim=0) y = Y.T.reshape((-1, )) l = loss(outputs, y).mean() l.backward() grad_clipping(params, clipping_theta, ctx) gb.sgd(params, lr, 1) loss_sum += l.asscalar() if (epoch + 1) % pred_period == 0: print('epoch %d, perplexity %f, time %.2f sec' % (epoch + 1, math.exp(loss_sum / (t + 1)), time.time() - start)) for prefix in prefixes: print( '-', predict_rnn(prefix, pred_len, rnn, params, init_rnn_state, num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx))
print('1st conv block:', h1.shape) print('2nd conv block:', h2.shape) print('1st dense:', h3.shape) print('2nd dense:', h4_linear.shape) print('output:', h4_linear) return h4_linear for data, _ in train_data: net(data, verbose=True) break softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() learning_rate = .2 epochs = 5 for _ in range(epochs): train_loss = 0. train_acc = 0. for data, label in train_data: label = label.as_in_context(ctx) with autograd.record(): Y_hat = net(data) l = softmax_cross_entropy(Y_hat) l.backward() gb.sgd(params, learning_rate, batch_size) train_loss += nd.mean(l).asscalar() train_acc += accuracy(Y_hat, label) test_acc = evaluate_accuracy(test_data, net, ctx) print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc))