) def forward(self, img): feature = self.conv(img) output = self.fc(feature.view(img.shape[0], -1)) return output net = LeNet() net, '''测试''' with st.echo(): batch_size = st.slider(label='批量', min_value=256, max_value=2560, value=256, step=256) lr = st.slider(label='学习率', min_value=0.001, max_value=1.0, value=0.001, step=0.001) num_epochs = st.slider(label='迭代周期', min_value=5, max_value=100, value=5, step=5) train_iter, test_iter = load_data_fashion_mnist(batch_size) # 因为卷积神经网络计算比多层感知机要复杂,建议使用GPU来加速计算。因此,我们对3.6节(softmax回归的从零开始实现)中描述的evaluate_accuracy函数略作修改,使其支持GPU计算。 def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs): net = net.to(device) device, loss = torch.nn.CrossEntropyLoss() batch_count = 0 for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time() for X, y in train_iter: X = X.to(device) y = y.to(device) y_hat = net(X) l = loss(y_hat, y) optimizer.zero_grad()
net, ''' ### 读取数据 虽然论文中AlexNet使用ImageNet数据集,但因为ImageNet数据集训练时间较长,我们仍用前面的Fashion-MNIST数据集来演示AlexNet。读取数据的时候我们额外做了一步将图像高和宽扩大到AlexNet使用的图像高和宽224。这个可以通过torchvision.transforms.Resize实例来实现。也就是说,我们在ToTensor实例前使用Resize实例,然后使用Compose实例来将这两个变换串联以方便调用。 ''' with st.echo(): batch_size = st.slider(label='批量', min_value=1, max_value=2560, value=128, step=128) lr = st.slider(label='学习率', min_value=0.001, max_value=1.0, value=0.001, step=0.001) num_epochs = st.slider(label='迭代周期', min_value=1, max_value=100, value=5, step=5) # 如出现“out of memory”的报错信息,可减小batch_size或resize train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224, num_workers=0) optimizer = torch.optim.Adam(net.parameters(), lr=lr) train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)