def train_fine_tuning(net, learning_rate, batch_size=128, num_epochs=5):
    net = net.to('cuda')
    train_imgs.transform = train_augs
    train_iter = DataLoader(train_imgs, batch_size, shuffle=True)
    test_imgs.transform = test_augs
    test_iter = DataLoader(test_imgs, batch_size)
    trainer = torch.optim.SGD([{
        'params': other,
        'lr': learning_rate
    }, {
        'params': output,
        'lr': learning_rate * 10
    }],
                              lr=learning_rate,
                              weight_decay=0.1)
    d2l.train_ch5(net,
                  train_iter,
                  test_iter,
                  trainer,
                  num_epochs,
                  device='cuda')
Beispiel #2
0

class NLeNet(nn.Module):
    def __init__(self, X_shape, in_channels=1):
        super().__init__()
        X_test = torch.rand(1, in_channels, *X_shape)
        self.conv_part = nn.Sequential(
            nn.Conv2d(in_channels, 6, kernel_size=5), nn.BatchNorm2d(6),
            nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5), nn.BatchNorm2d(16), nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        X_test = self.conv_part(X_test)
        self.flatten = X_test.shape[1] * X_test.shape[2] * X_test.shape[3]
        self.linear_part = nn.Sequential(nn.Linear(self.flatten, 120),
                                         nn.BatchNorm1d(120), nn.Sigmoid(),
                                         nn.Linear(120, 84),
                                         nn.BatchNorm1d(84), nn.Sigmoid(),
                                         nn.Linear(84, 10))

    def forward(self, X):
        X = self.conv_part(X)
        return self.linear_part(X.view(-1, self.flatten))


if __name__ == '__main__':
    lr, num_epochs, batch_size, device = 5.0, 5, 256, torch.device("cuda")
    net = NLeNet((28, 28)).to(device)
    trainer = torch.optim.SGD(net.parameters(), lr=lr)
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    d2l.train_ch5(net, train_iter, test_iter, trainer, num_epochs, device)
Beispiel #3
0
        X_test = torch.rand(1, in_channels, *in_shape)
        self.conv_part = nn.Sequential(
            nn.Conv2d(in_channels, 6, kernel_size=5), BatchNorm(6, num_dims=4),
            nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6, 16, kernel_size=5), BatchNorm(16, num_dims=4),
            nn.Sigmoid(), nn.MaxPool2d(kernel_size=2, stride=2))
        X_test = self.conv_part(X_test)
        self.flatten = X_test.shape[1] * X_test.shape[2] * X_test.shape[3]
        self.linear_part = nn.Sequential(nn.Linear(self.flatten, 120),
                                         BatchNorm(120, num_dims=2),
                                         nn.Sigmoid(), nn.Linear(120, 84),
                                         BatchNorm(84, num_dims=2),
                                         nn.Sigmoid(), nn.Linear(84, 10))

    def forward(self, X):
        X = self.conv_part(X)
        return self.linear_part(X.view(-1, self.flatten))


if __name__ == '__main__':
    lr, num_epochs, batch_size, device = 1.0, 5, 256, torch.device("cuda")
    net = NLeNet((28, 28))
    d2l.initial(net)
    trainer = torch.optim.SGD(net.parameters(), lr=lr)
    train_tier, test_iter = d2l.load_data_fashion_mnist(batch_size)
    d2l.train_ch5(net, train_tier, test_iter, trainer, num_epochs)
    for layer in net.modules():
        if isinstance(layer, BatchNorm):
            print(layer.gamma.view(-1, ), layer.beta.view(-1, ), sep='\n')
            break