nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) b2 = nn.Sequential(*resnet_block(64, 256, 3)) # can't get first_block figured out b3 = nn.Sequential(*resnet_block(256, 512, 4)) b4 = nn.Sequential(*resnet_block(512, 1024, 6)) b5 = nn.Sequential(*resnet_block(1024, 2048, 3)) # ./resnet18.svg.png net = nn.Sequential(b1, b2, b3, b4, b5, nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(), nn.Linear(2048, 10)) # Examine the net X = torch.rand(size=(1, 1, 224, 224)) for layer in net: X = layer(X) print(layer.__class__.__name__, 'output shape:\t', X.shape) # RUN lr, num_epochs, batch_size = 0.05, 10, 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr) # loss 0.152, train acc 0.943, test acc 0.852 # 642.2 examples/sec on cuda:0 # Resnet50 Conclusions: # test acc about the same as resnet18. # train acc ~10 points higher # loss much lower
def forward(self, X): # If `X` is not on the main memory, copy `moving_mean` and # `moving_var` to the device where `X` is located if self.moving_mean.device != X.device: self.moving_mean = self.moving_mean.to(X.device) self.moving_var = self.moving_var.to(X.device) # Save the updated `moving_mean` and `moving_var` Y, self.moving_mean, self.moving_var = batch_norm( X, self.gamma, self.beta, self.moving_mean, self.moving_var, eps=1e-5, momentum=0.9) return Y net = nn.Sequential(nn.Conv2d(1, 6, kernel_size=5), BatchNorm(6, num_dims=4), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Conv2d(6, 16, kernel_size=5), BatchNorm(16, num_dims=4), nn.Sigmoid(), nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(), nn.Linear(16 * 4 * 4, 120), BatchNorm(120, num_dims=2), nn.Sigmoid(), nn.Linear(120, 84), BatchNorm(84, num_dims=2), nn.Sigmoid(), nn.Linear(84, 10)) lr, num_epochs, batch_size = 1.0, 10, 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())