def train(net, epochs=5): loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': learning_rate, 'wd': 0.001 }) gb.train(train_data, valid_data, net, loss, trainer, ctx, epochs) net.export("model")
def train_fine_tunning(net, lr, batch_size, num_epochs): train_iter = gdata.DataLoader(train_imgs.transform_first(train_augs), batch_size, shuffle=True) test_iter = gdata.DataLoader(test_imgs.transform_first(test_augs), batch_size) ctx = [mx.gpu(0), mx.gpu(1)] net.collect_params().reset_ctx(ctx) net.hybridize() loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": lr, "wd": 0.001}) gb.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs)
def train(train_augs, test_augs, lr=.1): batch_size = 256 ctx = gb.try_all_gpus() net = gb.resnet18(10) net.initialize(ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) loss = gluon.loss.SoftmaxCrossEntropyLoss() train_data = load_cifar10(True, train_augs, batch_size) test_data = load_cifar10(False, test_augs, batch_size) gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=8)
def train_fine_tuning(net, learning_rate, batch_size=128, num_epochs=40): train_iter = gdata.DataLoader(train_imgs.transform_first(train_augs), batch_size, shuffle=True) test_iter = gdata.DataLoader(test_imgs.transform_first(test_augs), batch_size) ctx = gb.try_all_gpus() net.collect_params().reset_ctx(ctx) net.hybridize() loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': learning_rate, 'wd': 0.001 }) gb.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs)
# 进一步增大了输出通道数。前两个卷积层后不使用池化层来减小输入的高宽。 nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), # 使用比 LeNet 输出大数倍了全连接层。其使用丢弃层来控制复杂度。 nn.Dense(4096, activation="relu"), nn.Dropout(.5), nn.Dense(4096, activation="relu"), nn.Dropout(.5), # 输出层。我们这里使用 FashionMNIST,所以用 10,而不是论文中的 1000。 nn.Dense(10) ) X = nd.random.uniform(shape=(1,1,224,224)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) train_data, test_data = gb.load_data_fashion_mnist(batch_size=128, resize=224) lr = 0.01 ctx = gb.try_gpu() net.collect_params().initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) loss = gluon.loss.SoftmaxCrossEntropyLoss() gb.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=5)
if i == 0 and not first_block: blk.add(Residual(num_channels, use_1x1conv=True, strides=2)) else: blk.add(Residual(num_channels)) return blk net = nn.Sequential() net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) net.add(resnet_block(64, 2, first_block=True), resnet_block(128, 2), resnet_block(256, 2), resnet_block(512, 2)) net.add(nn.GlobalAvgPool2D(), nn.Dense(10)) X = nd.random.uniform(shape=(1, 1, 224, 224)) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) lr = 0.05 num_epochs = 5 batch_size = 256 ctx = gb.try_gpu() net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) loss = gloss.SoftmaxCrossEntropyLoss() trainer_iter, test_iter = gb.load_data_fashion_mnist(batch_size, resize=96) gb.train(trainer_iter, test_iter, net, loss, trainer, ctx, num_epochs)
net.add(vgg_block(num_convs, num_channels)) net.add(nn.Dense(4096, activation='relu'), nn.Dropout(0.5), nn.Dense(4096, activation='relu'), nn.Dropout(0.5), nn.Dense(10)) return net X = nd.random.uniform(shape=(1, 1, 224, 224)) # 出于测试的目的我们构造一个通道数更小,或者说更窄的网络来训练FashionMNIST。 ratio = 4 small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch] net = vgg(small_conv_arch) # net = vgg(conv_arch) net.initialize() for layer in net: X = layer(X) print(layer.name, 'output shape:\t', X.shape) lr = 0.05 ctx = gb.try_gpu() net.initialize(force_reinit=True, init=init.Xavier()) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) train_iter, test_iter = gb.load_data_fashion_mnist(batch_size=128, resize=224) loss = gloss.SoftmaxCrossEntropyLoss() gb.train(train_iter, test_iter, net, loss, trainer, mx.cpu(), num_epochs=3) net.load_params()
activation='relu'), nn.Conv2D(channels=channels, kernel_size=1, strides=strides, padding=0, activation='relu'), ) if max_pooling: out.add(nn.MaxPool2D(pool_size=3, strides=2)) return out net = nn.Sequential() with net.name_scope(): net.add(mlpconv(channels=96, kernel_size=11, padding=0, strides=4), mlpconv(channels=256, kernel_size=5, padding=2), mlpconv(channels=384, kernel_size=3, padding=1), nn.Dropout(.5), mlpconv(10, 3, 1, max_pooling=False), nn.GlobalAvgPool2D(), nn.Flatten()) import sys sys.path.append('..') import gluonbook as gb from mxnet import gluon from mxnet import init train_data, test_data = gb.load_data_fashion_mnist(batch_size=64, resize=224) ctx = gb.try_gpu() net.initialize(ctx=ctx, init=init.Xavier()) loss = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) gb.train(train_data, test_data, net, loss, trainer, ctx, 5)