def evaluate_accuracy(data_iter, net, ctx): acc = nd.array([0], ctx=ctx) for X, y in data_iter: # 如果ctx是GPU,则将数据集复制到GPU上 X, y = X.as_in_context(ctx), y.as_in_context(ctx) acc += gb.accuracy(net(X), y) return acc.asscalar() / len(data_iter)
def train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs): """Train and evaluate a model on CPU or GPU.""" print('training on', ctx) loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(1, num_epochs + 1): train_l_sum = 0 train_acc_sum = 0 num_add = 0 start = time.time() for X, y in train_iter: X, y = X.as_in_context(ctx), y.as_in_context(ctx) with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() trainer.step(batch_size) train_l_sum += l.mean().asscalar() train_acc_sum += accuracy(y_hat, y) num_add += 1 # test_acc = evaluate_accuracy(test_iter, net, ctx) test_acc = 0 print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, ' 'time %.1f sec' % (epoch, train_l_sum / num_add, train_acc_sum / num_add, test_acc, time.time() - start))
def train(net,train_iterator,test_iterator,num_epochs,lr,wd,ctx,lr_period,lr_decay): # 只训练我们定义的输出网络 trainer=gluon.Trainer(net.output_new.collect_params(),'sgd', {'learning_rate':lr,'momentum':0.9,'wd':wd}) prev_time=datetime.datetime.now() for epoch in range(num_epochs): # train_iter=data_iter(prefix,foldername,batch_size,True) train_iter=train_iterator(batch_size) test_iter=test_iterator(batch_size,False) train_l=0.0 train_acc=0.0 train_n=0 # if epoch > 0 and epoch % lr_period == 0: # trainer.set_learning_rate(trainer.learning_rate*lr_decay) num=0. for X,y,n in train_iter: if n==0: continue y=y.astype('float32').as_in_context(ctx) output_features=net.features(X.as_in_context(ctx)) with autograd.record(): y_hat=net.output_new(output_features) l=loss(y_hat,y) l.backward() trainer.step(n) train_l+=l.mean().asscalar() train_acc+=gb.accuracy(y_hat,y) num+=1 train_n+=n # print ("train samples:%d"%(train_n)) cur_time=datetime.datetime.now() h,remainder=divmod((cur_time-prev_time).seconds,3600) m,s=divmod(remainder,60) time_s="time %02d:%02d:%02d"%(h,m,s) test_acc=evaluate_accuracy(test_iter,net,ctx) if num==0.: epoch_s=("epoch %d, loss %f, train acc %f, test acc %f, " % (epoch, train_l , train_acc, test_acc)) else: epoch_s=("epoch %d, loss %f, train acc %f, test acc %f, " % (epoch, train_l / num, train_acc / num, test_acc)) prev_time=cur_time print(epoch_s+time_s+',lr '+str(trainer.learning_rate))
def evaluate_accuracy(data_iter,net,ctx): acc=0. num=0. test_n=0 for X,y,n in data_iter: if n==0: continue y=y.astype('float32').as_in_context(ctx) output_features=net.features(X.as_in_context(ctx)) y_hat=net.output_new(output_features) acc+=gb.accuracy(y_hat,y) num+=1 test_n+=n # print ("test samples:%d"%(test_n)) if num==0.: return acc else: return acc/num
def train_ch5(net, train_iter, test_iter, loss, batch_size, trainer, num_epochs): for epoch in range(1, num_epochs + 1): train_l_sum = 0 train_acc_sum = 0 start = time() for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() trainer.step(batch_size) train_l_sum += l.mean().asscalar() train_acc_sum += gb.accuracy(y_hat, y) test_acc = evaluate_accuracy(test_iter, net) print( "epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %1f sec" % (epoch, train_l_sum / len(train_iter), train_acc_sum / len(train_iter), test_acc, time() - start))
def train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs): print('training on', ctx) loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(num_epochs): train_l_sum, train_acc_sum, start = 0, 0, time.time() for X, y in train_iter: X, y = X.as_in_context(ctx), y.as_in_context(ctx) with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() trainer.step(batch_size) train_l_sum += l.mean().asscalar() train_acc_sum += gb.accuracy(y_hat, y) test_acc = evaluate_accuracy(test_iter, net, ctx) print( 'epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec' % (epoch + 1, train_l_sum / len(train_iter), train_acc_sum / len(train_iter), test_acc, time.time() - start))
def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period, lr_decay): trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': 0.9, 'wd': wd }) prev_time = datetime.datetime.now() for epoch in range(num_epochs): train_loss = 0.0 train_acc = 0.0 if epoch > 0 and epoch % lr_period == 0: trainer.set_learning_rate(trainer.learning_rate * lr_decay) for data, label in train_data: label = label.astype('float32').as_in_context(ctx) with autograd.record(): output = net(data.as_in_context(ctx)) loss = softmax_cross_entropy(output, label) loss.backward() trainer.step(batch_size) train_loss += nd.mean(loss).asscalar() train_acc += gb.accuracy(output, label) cur_time = datetime.datetime.now() h, remainder = divmod((cur_time - prev_time).seconds, 3600) m, s = divmod(remainder, 60) time_str = "Time %02d:%02d:%02d" % (h, m, s) if valid_data is not None: valid_acc = gb.evaluate_accuracy(valid_data, net, ctx) epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, " % (epoch, train_loss / len(train_data), train_acc / len(train_data), valid_acc)) else: epoch_str = ("Epoch %d. Loss: %f, Train acc %f, " % (epoch, train_loss / len(train_data), train_acc / len(train_data))) prev_time = cur_time print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
def vgg(): print 'programe begin' train_pic_list, train_label_list = get_pic_dogandcat(TRAIN_DATA_SIZE) test_pic_list, test_label_list = get_pic_dogandcat(TEST_DATA_SIZE) train_dataset = gluon.data.ArrayDataset(train_pic_list, train_label_list) train_data_iter = gluon.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) test_dataset = gluon.data.ArrayDataset(test_pic_list, test_label_list) test_data_iter = gluon.data.DataLoader(test_dataset, batch_size=TEST_DATA_SIZE, shuffle=True) # train_data_iter = mx.image.ImageIter(batch_size=BATCH_SIZE, data_shape=(3, 224, 224), # path_imglist='train.lst') # train_data_iter.reset() print 'dataset created' with mx.Context(mx.gpu()): net = nn.Sequential() net.add( nn.Conv2D(channels=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=3)) net.add( nn.Conv2D(channels=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=64)) net.add(nn.MaxPool2D(pool_size=2, strides=2)) net.add( nn.Conv2D(channels=128, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=64)) net.add( nn.Conv2D(channels=128, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=128)) net.add(nn.MaxPool2D(pool_size=2, strides=2)) net.add( nn.Conv2D(channels=256, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=128)) net.add( nn.Conv2D(channels=256, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=256)) net.add( nn.Conv2D(channels=256, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=256)) net.add(nn.MaxPool2D(pool_size=2, strides=2)) net.add( nn.Conv2D(channels=512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=256)) net.add( nn.Conv2D(channels=512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=512)) net.add( nn.Conv2D(channels=512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=512)) net.add(nn.MaxPool2D(pool_size=2, strides=2)) net.add( nn.Conv2D(channels=512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=512)) net.add( nn.Conv2D(channels=512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=512)) net.add( nn.Conv2D(channels=512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation='relu', in_channels=512)) net.add(nn.MaxPool2D(pool_size=2, strides=2)) net.add(nn.Dense(4096, activation='relu', in_units=7 * 7 * 512)) net.add(nn.Dropout(DROPOUT_RATE)) net.add(nn.Dense(4096, activation='relu', in_units=4096)) net.add(nn.Dropout(DROPOUT_RATE)) net.add(nn.Dense(2, in_units=4096)) print 'net created' net.initialize() #################### X = nd.random.uniform(shape=(BATCH_SIZE, 3, 224, 224)) for blk in net: X = blk(X) print(blk.name, 'output shape:\t', X.shape) # exit() # net(train_pic_list[0]) #################### loss = gluon.loss.SoftmaxCrossEntropyLoss() learning_rate = 0.01 trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': learning_rate}) num_epoch = 1000 print 'train begin' for epoch in range(num_epoch): start_time = time.time() train_l_sum = 0 train_acc_sum = 0 num_add = 1 for X, y in train_data_iter: X = X.copyto(mx.gpu()) y = y.copyto(mx.gpu()) with autograd.record(): y_predict = net(X) l = loss(net(X), y) l.backward() trainer.step(batch_size=BATCH_SIZE) train_l_sum += l.mean().asscalar() train_acc_sum += gb.accuracy(y_predict, y) num_add += 1 if (epoch != 0 and epoch % 3 == 0): learning_rate = learning_rate * 0.9 trainer.set_learning_rate(learning_rate) test_acc = evaluate_accuracy(test_data_iter, net) # test_acc = 0 time1 = time.time() - start_time print( 'epoch %3d, loss %.8f, train acc %.8f, test acc %.8f, learning_rate: %.8f, Time: %.3fs, predict_time: %dmin%ds' % (epoch + 1, train_l_sum / num_add, train_acc_sum / num_add, test_acc, trainer.learning_rate, time1, int(time1 * (num_epoch - epoch - 1) / 60), time1 * (num_epoch - epoch - 1) % 60))
def evaluate_accuracy(data_iter, net): acc = nd.array([0]) for X, y in data_iter: acc += gb.accuracy(net(X), y) return acc.asscalar() / len(data_iter)
# print small_conv_arch ctx = mx.gpu() net = vgg() net.initialize(init.Xavier(), ctx=ctx) train_iter, test_iter = get_iter() loss = gloss.SoftmaxCrossEntropyLoss() num_epochs = 100 batch_size = BATCH_SIZE trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) for epoch in range(1, num_epochs + 1): train_l_sum = 0 train_acc_sum = 0 num_add = 0 start = time.time() for X, y in train_iter: X, y = X.as_in_context(ctx), y.as_in_context(ctx) with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() trainer.step(batch_size) train_l_sum += l.mean().asscalar() train_acc_sum += accuracy(y_hat, y) num_add += 1 # test_acc = evaluate_accuracy(test_iter, net, ctx) test_acc = 0 print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, ' 'time %.1f sec' % (epoch, train_l_sum / num_add, train_acc_sum / num_add, test_acc, time.time() - start))
def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period, lr_decay): trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': lr, 'momentum': 0.9, 'wd': wd }) prev_time = datetime.datetime.now() for epoch in range(num_epochs): train_l, train_acc = 0.0, 0.0 if epoch > 0 and epoch % lr_period == 0: trainer.set_learning_rate(trainer.learning_rate * lr_decay) print('e=' + str(epoch) + ' lr=' + str(trainer.learning_rate * lr_decay)) iii = 0 for X, y in train_data: iii += 1 print('... {:.1f}% ...'.format(iii * 100 / len(train_data)), end='\r') # X=nd.swapaxes(X,1,3) # X=nd.swapaxes(X,2,3) y = y.as_in_context(ctx) # import pdb; pdb.set_trace() ### with autograd.record(): y_hat = net(X.astype('float32').as_in_context(ctx)) l = loss(y_hat, y) l.backward() trainer.step(batch_size) train_l += l.mean().asscalar() train_acc += gb.accuracy(y_hat, y) cur_time = datetime.datetime.now() h, remainder = divmod((cur_time - prev_time).seconds, 3600) m, s = divmod(remainder, 60) time_s = "time %02d:%02d:%02d" % (h, m, s) if valid_data is not None: valid_acc = 0 iii = 0 for X, y in valid_data: iii += 1 print('... {:.1f}% ...'.format(iii * 100 / len(valid_data)), end='\r') # X=nd.swapaxes(X,1,3) # X=nd.swapaxes(X,2,3) y = y.as_in_context(ctx) val_y_hat = net(X.astype('float32').as_in_context(ctx)) valid_acc += gb.accuracy(val_y_hat, y) epoch_s = ("epoch %d, loss %f, train acc %f, valid acc %f, " % (epoch + 1, train_l / len(train_data), train_acc / len(train_data), valid_acc / len(valid_data))) else: epoch_s = ("epoch %d, loss %f, train acc %f, " % (epoch + 1, train_l / len(train_data), train_acc / len(train_data))) prev_time = cur_time print(epoch_s + time_s) # + ', lr ' + str(trainer.learning_rate)) net.save_parameters('./params_ffn/dbc_' + str(epoch + 16) + '.param') print('model saved')
# 然后通过lr_scheduler.learning_rate -= 0.01 让学习率随着epoch线性递减。 learning_rate = 0.001 trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate}) num_eproch = 100 for epoch in range(num_eproch): start_time = time.time() train_l_sum = 0 train_acc_sum = 0 num_add = 0 for X, y in train_data_iter: with autograd.record(): y_predict = net(X) l = loss(net(X), y) l.backward() trainer.step(batch_size=BATCH_SIZE) train_l_sum += l.mean().asscalar() train_acc_sum += gb.accuracy(y_predict, y) num_add += 1 if (epoch != 0 and epoch % 3 == 0): learning_rate = learning_rate * 0.9 trainer.set_learning_rate(learning_rate) test_acc = evaluate_accuracy(test_data_iter, net) time1 = time.time() - start_time print( 'epoch %3d, loss %.8f, train acc %.8f, test acc %.8f, learning_rate: %.8f, Time: %.3fs, predict_time: %dmin%ds' % (epoch + 1, train_l_sum / len(train_data_iter), train_acc_sum / len(train_data_iter), test_acc, trainer.learning_rate, time1, int(time1 * (num_eproch - epoch - 1) / 60), time1 * (num_eproch - epoch - 1) % 60))
train_iter.reset() test_iter.reset() num_batch = 1 train_l_sum = 0 train_acc_sum = 0 train_time_sum = 0 read_time_sum = 0 test_time_sum = 0 for data_cpu in train_iter: start1 = time.time() X = data_cpu.data[0].copyto(mx.gpu()) / 255 Y_true = data_cpu.label[0].copyto(mx.gpu()) read_time_sum += (time.time() - start1) start2 = time.time() with autograd.record(): y_predict = net(X) l = loss(y_predict, Y_true) l.backward() trainer.step(batch_size=BATCH_SIZE) train_time_sum += (time.time() - start2) start3 = time.time() train_l_sum += l.mean().asscalar() train_acc_sum += gb.accuracy(y_predict, Y_true) num_batch += 1 test_time_sum += (time.time() - start3) print epoch, num_batch, train_l_sum / num_batch, train_acc_sum / num_batch print 'read_time_sum:', read_time_sum, ' arg:', read_time_sum / num_batch print 'train_time_sum:', train_time_sum, ' arg:', train_time_sum / num_batch print 'test_time_sum:', test_time_sum, ' arg:', test_time_sum / num_batch print 'total_time:', total_time_start - time.time()