def train(num_gpus, batch_size, lr): train_iter, test_iter = gb.load_data_fashion_mnist( batch_size, root="../data/fashion-mnist") ctx = [mx.gpu(i) for i in range(num_gpus)] print("running on:", ctx) net.initialize(init=init.Normal(sigma=0.01), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": lr}) loss = gloss.SoftmaxCrossEntropyLoss() for epoch in range(4): start = time.time() for X, y in train_iter: gpu_Xs = gutils.split_and_load(X, ctx) gpu_ys = gutils.split_and_load(y, ctx) with autograd.record(): ls = [ loss(net(gpu_X), gpu_y) for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys) ] for l in ls: l.backward() trainer.step(batch_size) nd.waitall() train_time = time.time() - start test_acc = gb.evaluate_accuracy(test_iter, net, ctx[0]) print("epoch %d, time: %.1f sec, test acc: %.2f" % (epoch + 1, train_time, test_acc))
def train(num_gpus, batch_size, lr): train_iter, test_iter = gb.load_data_fashion_mnist(batch_size) ctx = [mx.gpu(i) for i in range(num_gpus)] print('training on ', ctx) net = resnet18(10) net.initialize(init=init.Normal(sigma=0.01), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) for epoch in range(5): start = time() for X, y in train_iter: gpu_Xs = gutils.split_and_load(X, ctx) gpu_ys = gutils.split_and_load(y, ctx) with autograd.record(): ls = [ loss(net(gpu_X), gpu_y) for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys) ] for l in ls: l.backward() trainer.step(batch_size) nd.waitall() print('epoch %d, training time: %.1f sec' % (epoch, time() - start)) test_acc = gb.evaluate_accuracy(test_iter, net, ctx[0]) print('validation accuracy %.4f' % (test_acc))
def train(num_gpus, batch_size, lr): train_iter, test_iter = gb.load_data_fashion_mnist(batch_size) ctx = [mx.gpu(i) for i in range(num_gpus)] print("training on:", ctx) gpu_params = [get_params(params, c) for c in ctx] for epoch in range(5): start = time() for X, y in train_iter: train_batch(X, y, gpu_params, ctx, lr) nd.waitall() print('epoch %d, time: %.1f sec' % (epoch, time() - start)) net = lambda x: lenet(x, gpu_params[0]) test_acc = gb.evaluate_accuracy(test_iter, net, ctx[0]) print('validation accuracy: %.4f' % test_acc)
def train(num_gpus, batch_size, lr): train_iter, test_iter = gb.load_data_fashion_mnist( batch_size, root="../data/fashion-mnist") ctx = [mx.gpu(i) for i in range(num_gpus)] print("running on:", ctx) gpu_params = [get_params(params, c) for c in ctx] for epoch in range(4): start = time.time() for X, y in train_iter: train_batch(X, y, gpu_params, ctx, lr) nd.waitall() train_time = time.time() - start def net(x): return lenet(x, gpu_params[0]) test_acc = gb.evaluate_accuracy(test_iter, net, ctx[0]) print("epoch %d, time: %.1f sec, test acc: %.2f" % (epoch + 1, train_time, test_acc))
# 连续三个卷积层,且使用更小的卷积窗口。除了最后的卷积层外, # 进一步增大了输出通道数。前两个卷积层后不使用池化层来减小输入的高宽。 nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), # 使用比 LeNet 输出大数倍了全连接层。其使用丢弃层来控制复杂度。 nn.Dense(4096, activation="relu"), nn.Dropout(.5), nn.Dense(4096, activation="relu"), nn.Dropout(.5), # 输出层。我们这里使用 FashionMNIST,所以用 10,而不是论文中的 1000。 nn.Dense(10) ) X = nd.random.uniform(shape=(1,1,224,224)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) train_data, test_data = gb.load_data_fashion_mnist(batch_size=128, resize=224) lr = 0.01 ctx = gb.try_gpu() net.collect_params().initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) loss = gluon.loss.SoftmaxCrossEntropyLoss() gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=5)
nn.Conv2D(channels=6, kernel_size=5, activation='sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), # Dense 会默认将(批量大小,通道,高,宽)形状的输入转换成 #(批量大小,通道 x 高 x 宽)形状的输入。 nn.Dense(120, activation='sigmoid'), nn.Dense(84, activation='sigmoid'), nn.Dense(10)) X = nd.random.uniform(shape=(1, 1, 28, 28)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) train_data, test_data = gb.load_data_fashion_mnist(batch_size=256) try: ctx = mx.gpu() _ = nd.zeros((1, ), ctx=ctx) except: ctx = mx.cpu() ctx lr = 1 net.collect_params().initialize(ctx=ctx, init=init.Xavier(), force_reinit=True) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) loss = gluon.loss.SoftmaxCrossEntropyLoss() gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=5)
strides, padding, activation='relu'), nn.Conv2D(num_channels, kernel_size=1, activation='relu'), nn.Conv2D(num_channels, kernel_size=1, activation='relu')) return blk net = nn.Sequential() net.add(nin_block(96, kernel_size=11, strides=4, padding=0), nn.MaxPool2D(pool_size=3, strides=2), nin_block(256, kernel_size=5, strides=1, padding=2), nn.MaxPool2D(pool_size=3, strides=2), nin_block(384, kernel_size=3, strides=1, padding=1), nn.MaxPool2D(pool_size=3, strides=2), nn.Dropout(0.5), nin_block(10, kernel_size=3, strides=1, padding=1), nn.GlobalAvgPool2D(), nn.Flatten()) # X = nd.random.uniform(shape=(1, 1, 224, 224)) # net.initialize() # for layer in net: # X = layer(X) # print(layer.name, 'output shape:\t', X.shape) # train lr, num_epochs, batch_size, ctx = 0.1, 5, 128, gb.try_gpu() net.initialize(init=init.Xavier(), force_reinit=True, ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train_iter, test_iter = gb.load_data_fashion_mnist(batch_size=batch_size, resize=224) gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
if i == 0 and not first_block: blk.add(Residual(num_channels, use_1x1conv=True, strides=2)) else: blk.add(Residual(num_channels)) return blk net = nn.Sequential() net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) net.add(resnet_block(64, 2, first_block=True), resnet_block(128, 2), resnet_block(256, 2), resnet_block(512, 2)) net.add(nn.GlobalAvgPool2D(), nn.Dense(10)) X = nd.random.uniform(shape=(1, 1, 224, 224)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) lr = 0.05 num_epochs = 5 batch_size = 256 ctx = gb.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) loss = gloss.SoftmaxCrossEntropyLoss() trainer_iter, test_iter = gb.load_data_fashion_mnist(batch_size, resize=96) gb.train(trainer_iter, test_iter, net, loss, trainer, ctx, num_epochs)
import sys sys.path.append("..") import gluonbook as gb from mxnet import gluon, nd, autograd, init from mxnet.gluon import loss as gloss, nn net = nn.Sequential() net.add(nn.Dense(128, activation='tanh')) net.add(nn.Dense(10)) net.add(nn.Dense(10)) net.initialize(init.Normal(sigma=0.01)) batch_szie = 256 train_iter, test_iter = gb.load_data_fashion_mnist(batch_szie) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) epochs = 50 gb.train_cpu(net, train_iter, test_iter, loss, epochs, batch_szie, trainer=trainer)
net.add(nn.Conv2D(channels=16, kernel_size=3, activation="relu"), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=32, kernel_size=3, activation="relu"), nn.MaxPool2D(pool_size=2, strides=2), nn.Dense(32, activation="relu"), nn.Dense(16, activation="relu"), nn.Dense(10)) return net if __name__ == "__main__": batch_size = 256 learning_rate = 0.5 weight_decay = 0.005 num_epochs = 5 root = os.path.join(os.getcwd(), "data", "fashion-mnist") train_iter, val_iter = gb.load_data_fashion_mnist(batch_size, root=root) net = get_lenet() ctx = mx.cpu() net.initialize(init.Xavier(), ctx=ctx) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), optimizer="sgd", optimizer_params={ "learning_rate": learning_rate, "wd": weight_decay }) for epoch in range(num_epochs): for X, y in train_iter: X, y = X.as_in_context(ctx), y.as_in_context(ctx) with autograd.record(): y_ = net(X)
channels = init_channels for i, layers in enumerate(block_layers): net.add(DenseBlock(layers, growth_rate)) channels += layers * growth_rate if i != len(block_layers) - 1: net.add(transition_block(channels//2)) net.add( nn.BatchNorm(), nn.Activation('relu'), nn.AvgPool2D(pool_size=1), nn.Flatten(), nn.Dense(num_class) ) import sys sys.path.append('..') import gluonbook as gb from mxnet import gluon from mxnet import init train_data, test_data = gb.load_data_fashion_mnist( batch_size=64, resize=32) ctx = gb.try_gpu() net = dense_net() net.initialize(ctx=ctx, init=init.Xavier()) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=1)
#!/usr/bin/env python #-*- coding:utf-8 -*- import sys sys.path.append("..") import gluonbook as gb from mxnet import autograd, gluon, init, nd from mxnet.gluon import loss as gloss, nn drop_prob1 = 0.2 drop_prob2 = 0.5 net = nn.Sequential() with net.name_scope(): net.add(nn.Dense(256, activation='relu'), nn.Dropout((drop_prob1)), nn.Dense(256, activation='relu'), nn.Dropout((drop_prob2)), nn.Dense(10)) net.initialize(init.Normal(sigma=0.01)) num_epochs = 5 batch_size = 256 train_iter, test_iter = gb.load_data_fashion_mnist() loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.05}) gb.train_cpu(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)