def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y).sum() l.backward() if trainer is None: d2l.sgd(params, lr, batch_size) else: trainer.step(batch_size) # “softmax回归的简洁实现” y = y.astype('float32') train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() n += y.size test_acc = d2l.evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
def train(num_gpus, batch_size, lr): train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) ctx = [mx.gpu(i) for i in range(num_gpus)] print('running on:', ctx) net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(4): start = time.time() for X, y in train_iter: gpu_Xs = gutils.split_and_load(X, ctx) gpu_ys = gutils.spilt_and_load(y, ctx) with autograd.record(): ls = [ loss(net(gpu_X), gpu_y) for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys) ] for l in ls: l.backward() trainer.step(batch_size) nd.waitall() train_time = time.time() - start test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0]) print('epoch %d, time %.1f sec, test_acc %.2f' % (epoch + 1, train_time, test_acc))
def train(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs): print('training on', ctx) loss = gloss.SoftmaxCrossEntropyLoss() best_test_acc = 0.0 for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time() for X, y in train_iter: X, y = X.as_in_context(ctx), y.as_in_context(ctx) with autograd.record(): y_hat = net(X) l = loss(y_hat, y).sum() l.backward() trainer.step(batch_size) y = y.astype('float32') train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() n += y.size test_acc = d2l.evaluate_accuracy(test_iter, net, ctx) if test_acc > best_test_acc: best_test_acc = test_acc net.save_parameters('./model/MNIST/mlp.params') else: lr_scheduler.learning_rate *= 0.9 print( 'epoch %d, lr %.2f, loss %.4f, train acc %.3f, test acc %.3f, best test acc %.3f, ' 'time %.1f sec' % (epoch + 1, lr_scheduler.learning_rate, train_l_sum / n, train_acc_sum / n, test_acc, best_test_acc, time.time() - start))
def train(net, train_iter, valid_iter, num_epochs, lr, wd, ctx, lr_period, lr_decay): trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': 0.9, 'wd': wd }) for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time() if epoch > 0 and epoch % lr_period == 0: trainer.set_learning_rate(trainer.learning_rate * lr_decay) for X, y in train_iter: y = y.astype('float32').as_in_context(ctx) with autograd.record(): y_hat = net(X.as_in_context(ctx)) l = loss(y_hat, y).sum() l.backward() trainer.step(batch_size) train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() n += y.size time_s = "time %.2f sec" % (time.time() - start) if valid_iter is not None: valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx) epoch_s = ( "epoch %d, loss %f, train acc %f, valid acc %f, " % (epoch + 1, train_l_sum / n, train_acc_sum / n, valid_acc)) else: epoch_s = ("epoch %d, loss %f, train acc %f, " % (epoch + 1, train_l_sum / n, train_acc_sum / n)) print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))
def train(num_gpus, batch_size, lr): train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) ctx = [mx.gpu(i) for i in range(num_gpus)] print('runing on:', ctx) #将模型参数复制到num_gpus块显卡的显存上 gpu_params = [get_params(params, c) for c in ctx] for epoch in range(4): start = time.time() for X, y in train_iter: #对单个小批量进行多GPU训练 train_batch(X, y, gpu_params, ctx, lr) nd.waitall() train_time = time.time() - start def net(x): #在gpu(0)上验证模型 return lenet(x, gpu_params[0]) test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0]) print('epoch %d, time %.1f sec, test_acc %.2f' % (epoch + 1, train_time, test_acc))
def train(net,train_iter,valid_iter,lr,wd,lr_decay,lr_period,epochs,loss,ctx): trainer=gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':lr,'wd':wd,'momentum':0.9}) for epoch in range(epochs): l_sum,acc_sum,n,start=.0,.0,0,time.time() if epoch>0 and epoch%lr_period==0: trainer.set_learning_rate(trainer.learning_rate*lr_decay) for X,y in train_iter: y=y.astype('float32').as_in_context(ctx) with autograd.record(): y_hat=net(X.as_in_context(ctx)) l=loss(y_hat,y).sum() l.backward() trainer.step(batch_size) l_sum+=l acc_sum+=(y_hat.argmax(axis=1)==y).sum().asscalar() n+=y.size train_time = time.time() if valid_iter is not None: valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx) print("epoch %d,valid_acc %f" %(epoch,valid_acc)) print("epoch %d,cost %f sec,loss %f,train_acc %f,lr %f" %(epoch,train_time,l_sum/n,acc_sum/n,trainer.learning_rate))
def train(num_gpus, batch_size, lr): comm = MPI.COMM_WORLD comm_rank = comm.Get_rank() comm_size = comm.Get_size() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) #ctx = [mx.gpu(i) for i in range(num_gpus)] if comm_rank == 0: ctx = mx.gpu(0) else: ctx = mx.gpu(1) print('running on:', ctx) net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}, SSP_FLAG=True, thre=2) loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(400000): start = time.time() for X, y in train_iter: gpu_Xs = gutils.split_and_load(X, ctx) gpu_ys = gutils.split_and_load(y, ctx) with autograd.record(): ls = [ loss(net(gpu_X), gpu_y) for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys) ] for l in ls: l.backward() trainer.step(epoch, batch_size) train_time = time.time() - start test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[comm_rank]) print('epoch %d, time %.1f sec, test acc %.2f, process %d' % (epoch + 1, train_time, test_acc, comm_rank))
batch_size = 64 # 构建数据集,将原来28x28的图片放大到224x224 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) ctx = d2l.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) print('training on', ctx) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) for epoch in range(3): train_loss_sum = 0 train_acc_sum = 0 n = 0 start = time.time() for X, y in train_iter: X, y = X.as_in_context(ctx), y.as_in_context(ctx) with autograd.record(): y_hat = net(X) loss = softmax_cross_entropy(y_hat, y).sum() loss.backward() trainer.step(batch_size) y = y.astype('float32') train_loss_sum += loss.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() n += y.size test_acc = d2l.evaluate_accuracy(test_iter, net, ctx) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec' % (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc, time.time() - start))
# 因为标签类型是int,要把y 变成浮点数 再进行相等判断。 return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar() def evaluate_accuracy(data_iter, net): acc_sum, n = 0.0, 0 for X, y in data_iter: y = y.astype('float32') acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar() n += y.size return acc_sum / n # *accuracy函数的小测试 print('accuracy test:', accuracy(y_hat, y)) print('accuracy,d2l_test:', d2l.evaluate_accuracy(data_iter=test_iter, net=net)) # 6 # 训练模型 ========================================================================================= num_epoch, lr = 5, 0.1 d2l.train_ch3(net, train_iter, test_iter, cross_entropy, num_epoch, batch_size, [W, b], lr) # 7 # 分类预测 ======================================================== for X, y in test_iter: break true_labels = d2l.get_fashion_mnist_labels(y.asnumpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy()) titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)] d2l.show_fashion_mnist(X[0:9], titles[0:9])
def softmax(x): x_exp = x.exp() partition = x_exp.sum(axis =1, keepdims = True) return x_exp/partition def net(x): return softmax( nd.dot(x.reshape((-1, num_inputs)),w)+b ) def cross_entropy(y_hat, y): return - nd.pick(y_hat, y).log() def accuracy(y_hat, y): return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar() print (d2l.evaluate_accuracy(test_iter, net)) num_epochs, lr = 5, 0.1 def train_ch3(net, train_iter, test_iter, loss, num_epochs,batch_size,params=None, lr=None, trainer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y).sum() l.backward() if trainer is None: d2l.sgd(params, lr, batch_size) else: trainer.step(batch_size) # “softmax回归的简洁实现”一节将用到
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time() if epoch > 0 and epoch % lr_period == 0: trainer.set_learning_rate(trainer.learning_rate * lr_decay) for X, y in train_iter: y = y.astype('float32').as_in_context(ctx) with autograd.record(): y_hat = net(X.as_in_context(ctx)) l = loss(y_hat, y).sum() l.backward() trainer.step(batch_size) train_l_sum += l.asscalar() train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar() n += y.size time_s = "time %.2f sec" % (time.time() - start) if valid_iter is not None: valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx) epoch_s = ("epoch %d, loss %f, train acc %f, valid acc %f, " % (epoch + 1, train_l_sum / n, train_acc_sum / n, valid_acc)) else: epoch_s = ("epoch %d, loss %f, train acc %f, " % (epoch + 1, train_l_sum / n, train_acc_sum / n)) print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate)) # In[ ]: #训练并验证模型 ctx, num_epochs, lr, wd = d2l.try_gpu(), 1, 0.1, 5e-4 lr_period, lr_decay, net = 80, 0.1, get_net(ctx)
with autograd.record(): # tell mxnet to record y_hat = net(X.as_in_context(ctx)) l = loss(y_hat, y).sum() l.backward() # backward trainer.step(batch_size) # update params for a batch train_losses += l.asscalar() train_accuracy += (y_hat.argmax( axis=1) == y).sum().asscalar() # have to be divide by num_samples num_samples += y.size epoch_time = "time %.2f sec" % (time.time() - start_time ) # cal time for train a epoch # valid per epoch if valid_iter is not None: epoch_valid_acc = d2l.evaluate_accuracy(valid_iter, net, ctx) # printing epoch_loss = train_losses / num_samples epoch_train_acc = train_accuracy / num_samples print("epoch: {}, training loss: {}, training acc: {}, validation acc: {}". format(epoch, epoch_loss, epoch_train_acc, epoch_valid_acc)) # test per 5 epochs and save params if epoch % 5 == 0 and test_iter is not None: epoch_test_loss = d2l.evaluate_accuracy(test_iter, net, ctx) print("test at epoch {}, test acc: {}".format(epoch, epoch_test_loss)) net.save_parameters( 'trained_models/resnet18-epoch{}-loss{}.params'.format( epoch, epoch_loss))