Ejemplo n.º 1
0
def train_lenet(device, dataset_path):
    train_loader, valid_loader, test_loader = get_data_loaders(dataset_path)

    model = LeNet(35)
    optimizer = optim.Adam(model.parameters(),
                           lr=Consts.lr,
                           weight_decay=Consts.weight_decay)
    loss_criterion = torch.nn.NLLLoss()
    model.apply(weight_init)
    model.to(device)
    train_loss = []
    val_loss = []
    val_acc = []
    for epoch in range(Consts.epochs):
        t_loss = train(model, train_loader, optimizer, loss_criterion, device)
        v_loss, v_acc = evaluation(model, valid_loader, loss_criterion, device)
        torch.save(model.state_dict(), f'models/epoch-{epoch + 1}.pth')
        train_loss.append(t_loss)
        val_loss.append(v_loss)
        val_acc.append(v_acc)
        print(f'train loss in epoch {epoch + 1} is: {t_loss}')
        print(f'validation loss in epoch {epoch + 1} is: {v_loss}')
        print(f'validation accuracy in epoch {epoch + 1} is: {v_acc}')

    plot_loss(train_loss, val_loss, val_acc)
    test_loss, test_acc = test_model(model, test_loader, loss_criterion,
                                     val_loss, device, 'models/')
Ejemplo n.º 2
0
def main():
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # 50000张训练图片
    # 第一次使用时要将download设置为True才会自动去下载数据集
    train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=36,
                                               shuffle=True, num_workers=0)

    # 10000张验证图片
    # 第一次使用时要将download设置为True才会自动去下载数据集
    val_set = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=False, transform=transform)
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=5000,
                                             shuffle=False, num_workers=0)
    val_data_iter = iter(val_loader)
    val_image, val_label = val_data_iter.next()

    # classes = ('plane', 'car', 'bird', 'cat',
    #            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    net = LeNet()
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    for epoch in range(5):  # loop over the dataset multiple times

        running_loss = 0.0
        for step, data in enumerate(train_loader, start=0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = net(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if step % 500 == 499:  # print every 500 mini-batches
                with torch.no_grad():
                    outputs = net(val_image)  # [batch, 10]
                    predict_y = torch.max(outputs, dim=1)[1]
                    accuracy = (predict_y == val_label).sum().item() / val_label.size(0)

                    print('[%d, %5d] train_loss: %.3f  test_accuracy: %.3f' %
                          (epoch + 1, step + 1, running_loss / 500, accuracy))
                    running_loss = 0.0

    print('Finished Training')

    save_path = './Lenet.pth'
    torch.save(net.state_dict(), save_path)
Ejemplo n.º 3
0
def trainModel(EPOCH_NUM=50, save=False, show=False):
    print("Train Start:")
    net = LeNet().to(devices)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(net.parameters(),
                                    lr=LR,
                                    alpha=0.9,
                                    eps=1e-08,
                                    weight_decay=0,
                                    momentum=0,
                                    centered=False)
    #x,trainloss,trainacc,testacc = [],[],[],[]
    batch, batchloss = [], []
    for epoch in range(EPOCH_NUM):
        sum_loss = 0.0
        acc = 0
        iter = 0
        for i, (inputs, labels) in enumerate(trainLoader):
            inputs, labels = inputs.to(devices), labels.to(devices)
            # forward and backward
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()
            _, pred = torch.max(outputs.data, 1)
            acc += (pred == labels).sum()
            iter = iter + 1
            batch.append(i)
            batchloss.append(loss.item())
        if show == True:
            plt.figure()
            plt.plot(batch, batchloss, 'b')
            plt.title('one epoch')
            plt.xlabel('iteration')
            plt.ylabel('loss')
            plt.show()
    # trainloss.append(sum_loss/iter)
    # trainacc.append(100*acc/len(trainData))
    # x.append(epoch)
        print('Epoch [%d] : loss [%f]' % (epoch + 1, sum_loss / iter))
        print('train accuracy = %f%%' % (100 * acc / len(trainData)))
        #with torch.no_grad():
        #    correct = 0
        #    total = 0
        #    for data in testLoader:
        #        images, labels = data
        #        images, labels = images.to(devices), labels.to(devices)
        #        outputs = net(images)
        #        _, predicted = torch.max(outputs.data, 1)
        #        total += labels.size(0)
        #        correct += (predicted == labels).sum()
        #print('test accuracy = %f%%'%(100*correct/total))
        #testacc.append(100*correct/total)
    if save == True:
        torch.save(net.state_dict(), 'MNIST_Model.pth')
Ejemplo n.º 4
0
def gpu_train():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    # 或者
    # device = torch.device("cuda")
    # 或者
    # device = torch.device("cpu")

    net = LeNet()
    net.to(device) # 将网络分配到指定的device中
    loss_function = nn.CrossEntropyLoss() 
    optimizer = optim.Adam(net.parameters(), lr=0.001) 

    for epoch in range(5): 

        running_loss = 0.0
        time_start = time.perf_counter()
        for step, data in enumerate(train_loader, start=0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs.to(device))				  # 将inputs分配到指定的device中
            loss = loss_function(outputs, labels.to(device))  # 将labels分配到指定的device中
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if step % 1000 == 999:    
                with torch.no_grad(): 
                    outputs = net(test_image.to(device)) # 将test_image分配到指定的device中
                    predict_y = torch.max(outputs, dim=1)[1]
                    accuracy = (predict_y == test_label.to(device)).sum().item() / test_label.size(0) # 将test_label分配到指定的device中

                    print('[%d, %5d] train_loss: %.3f  test_accuracy: %.3f' %
                        (epoch + 1, step + 1, running_loss / 1000, accuracy))

                    print('%f s' % (time.perf_counter() - time_start))
                    running_loss = 0.0

    print('Finished Training')

    save_path = './Lenet.pth'
    torch.save(net.state_dict(), save_path)
Ejemplo n.º 5
0
def main():
    class Args():
        def __init__(self):
            pass

    args = Args()
    args.batch_size = 64
    args.test_batch_size = 2
    args.epochs = 10
    args.lr = 0.0001
    args.momentum = 0.5
    args.no_cuda = False
    args.seed = 1
    args.log_interval = 100
    args.save_model = True

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    # torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    transform = transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))])
    dataset_dir = Path(os.environ['HOME'])/"datasets/mnist"
    train_dataset = MnistDataset(root_dir=dataset_dir/"train", transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4)
    test_dataset = MnistDataset(root_dir=dataset_dir/"test", transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=True, num_workers=4)

    model = LeNet().to(device)
    # model = nn.DataParallel(model)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pth")
Ejemplo n.º 6
0
def usually_train():
    net = LeNet()						  				# 定义训练的网络模型
    loss_function = nn.CrossEntropyLoss() 				# 定义损失函数为交叉熵损失函数 
    optimizer = optim.Adam(net.parameters(), lr=0.001)  # 定义优化器(训练参数,学习率)

    for epoch in range(5):  # 一个epoch即对整个训练集进行一次训练
        running_loss = 0.0
        time_start = time.perf_counter()
        
        for step, data in enumerate(train_loader, start=0):   # 遍历训练集,step从0开始计算
            inputs, labels = data 	# 获取训练集的图像和标签
            optimizer.zero_grad()   # 清除历史梯度
            
            # forward + backward + optimize
            outputs = net(inputs)  				  # 正向传播
            loss = loss_function(outputs, labels) # 计算损失
            loss.backward() 					  # 反向传播
            optimizer.step() 					  # 优化器更新参数

            # 打印耗时、损失、准确率等数据
            running_loss += loss.item()
            if step % 1000 == 999:    # print every 1000 mini-batches,每1000步打印一次
                with torch.no_grad(): # 在以下步骤中(验证过程中)不用计算每个节点的损失梯度,防止内存占用
                    outputs = net(test_image) 				 # 测试集传入网络(test_batch_size=10000),output维度为[10000,10]
                    predict_y = torch.max(outputs, dim=1)[1] # 以output中值最大位置对应的索引(标签)作为预测输出
                    accuracy = (predict_y == test_label).sum().item() / test_label.size(0)
                    
                    print('[%d, %5d] train_loss: %.3f  test_accuracy: %.3f' %  # 打印epoch,step,loss,accuracy
                        (epoch + 1, step + 1, running_loss / 1000, accuracy))
                    
                    print('%f s' % (time.perf_counter() - time_start))        # 打印耗时
                    running_loss = 0.0

    print('Finished Training')

    # 保存训练得到的参数
    save_path = './Lenet.pth'
    torch.save(net.state_dict(), save_path)
Ejemplo n.º 7
0
#lr=0.01
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(model.parameters(),lr=args.lr,momentum=0.9,weight_decay=5e-4)
epoch=5
for epoch_id in range(epoch):
    train_loss=0
    correct=0
    total=0
    for batch_idx, (inputs,targets) in enumerate(data_train_loader):
        optimizer.zero_grad()
        outputs=model(inputs)
        loss=criterion(outputs,targets)
        loss.backward()
        optimizer.step()

        train_loss+=loss.item()
        _,predicted=outputs.max(1)
        total+=targets.size(0)
        correct+=predicted.eq(targets).sum().item()

        print(epoch_id, batch_idx,len(data_train_loader),'Loss:%.3f|Acc:%.3f%%(%d/%d)'%(train_loss/(batch_idx+1),100.*correct/total,correct,total))

save_info={
    "iter_num":epoch,                #迭代步数
    "optimizer":optimizer.state_dict(),  #优化器的状态字典
    "model":model.state_dict(),  #模型的状态字典
}
save_path="./model_save/model.pth"
#保存信息
torch.save(save_info,save_path)
Ejemplo n.º 8
0
def train():
    device = torch.device("cuda:0" if opt.cuda else "cpu")
    utils.set_seed()
    # ============================ step 1/5 数据 ============================
    norm_mean = [0.485, 0.456, 0.406]
    norm_std = [0.229, 0.224, 0.225]

    train_transform = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize(norm_mean, norm_std),
    ])

    valid_transform = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(norm_mean, norm_std),
    ])

    # 构建MyDataset
    train_data = RMBDataset(data_dir=opt.train_dir, transform=train_transform)
    valid_data = RMBDataset(data_dir=opt.valid_dir, transform=valid_transform)

    # 构建DataLoader
    train_loader = DataLoader(dataset=train_data,
                              batch_size=opt.batch_size,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_data, batch_size=opt.batch_size)

    # ============================ step 2/5 模型 ============================
    net = LeNet(classes=2)
    net.to(device)
    # net.initialize_weights()

    # ============================ step 3/5 损失函数 ============================
    criterion = nn.CrossEntropyLoss()

    # ============================ step 4/5 优化器 ============================
    optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    # ============================ step 5/5 训练 ============================
    train_curve = list()
    valid_curve = list()

    for epoch in range(opt.epochs):
        loss_mean = 0.
        correct = 0.
        total = 0.

        net.train()
        for i, data in enumerate(train_loader):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = net(inputs)

            optimizer.zero_grad()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # 统计分类情况
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).squeeze().sum().to("cpu").numpy()

            # 打印训练信息
            loss_mean += loss.item()
            train_curve.append(loss.item())
            if (i + 1) % opt.log_interval == 0:
                loss_mean = loss_mean / opt.log_interval
                print(
                    "Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}"
                    .format(epoch, opt.epochs, i + 1, len(train_loader),
                            loss_mean, correct / total))
                loss_mean = 0.

        scheduler.step()  # 更新学习率

        if (epoch + 1) % opt.val_interval == 0:
            correct_val = 0.
            total_val = 0.
            loss_val = 0.
            net.eval()
            with torch.no_grad():
                for j, data in enumerate(valid_loader):
                    inputs, labels = data
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    outputs = net(inputs)
                    loss = criterion(outputs, labels)

                    _, predicted = torch.max(outputs.data, 1)
                    total_val += labels.size(0)
                    correct_val += (
                        predicted == labels).squeeze().sum().to("cpu").numpy()

                    loss_val += loss.item()

                valid_curve.append(loss_val)
                print(
                    "Valid:\t Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}"
                    .format(epoch, opt.epochs, j + 1, len(valid_loader),
                            loss_val, correct / total))

    utils.loss_picture(train_curve, train_loader, valid_curve,
                       opt.val_interval)
    # 保存模型参数
    net_state_dict = net.state_dict()
    torch.save(net_state_dict, opt.path_state_dict)
    print("模型保存成功")
Ejemplo n.º 9
0
                          momentum=args.momentum)
else:
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

best_valid_loss = np.inf
iteration = 0
epoch = 1

if not os.path.isfile(
        '/content/drive/My Drive/PyTorch_Classifier/classifier.pt'):
    #save model
    model_save_name = 'classifier.pt'
    path = F"/content/drive/My Drive/PyTorch_Classifier/{model_save_name}"
    torch.save(model.state_dict(), path)

#load model
model_save_name = 'classifier.pt'
path = F"/content/drive/My Drive/PyTorch_Classifier/{model_save_name}"
model.load_state_dict(torch.load(path))

# trainint with early stopping
while (epoch < args.epochs + 1) and (iteration < args.patience):
    train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval)
    valid_loss = test(valid_loader, model, args.cuda)
    if valid_loss > best_valid_loss:
        iteration += 1
        print('Loss was not improved, iteration {0}'.format(str(iteration)))
    else:
        print('Saving model...')
Ejemplo n.º 10
0
def train():
    save_flag = True
    trainloader = get_dataset(r'H:/DataSet_All/猫狗识别/gpu/train',
                              batch_size=64,
                              imageindex=0)
    validationloader = get_dataset(r'H:/DataSet_All/猫狗识别/gpu/test',
                                   batch_size=64,
                                   imageindex=0)

    save_dir = 'output/'
    my_model = LeNet()
    my_model = model_init(my_model)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = optim.Adam(my_model.parameters(), lr=0.001)
    epochs = 300
    # 训练

    for epoch in range(epochs):
        loss_list = []
        acc_list = []
        set_learning_rate(optimizer, epoch)

        learning_rate = optimizer.param_groups[0]['lr']
        tq = tqdm.tqdm(trainloader, desc='train')
        tq.set_description('train Epoch{}    lr{}'.format(
            epoch, learning_rate))

        for images, labels in tq:
            images = images.to(device)
            labels = labels.to(device)
            outputs = my_model(images)
            loss = criterion(outputs, labels)

            my_model.zero_grad()
            loss.backward()
            optimizer.step()

            loss_list.append(loss.item())
            loss_ave = sum(loss_list) / len(loss_list)

            _, predicted = torch.max(outputs.data, 1)
            accuracy = (predicted == labels).sum().float() / labels.size(0)
            acc_list.append(accuracy)
            acc_ave = sum(acc_list) / len(acc_list)
            tq.set_postfix(
                loss="%.4f  accuracy:%.4f   loss_ave:%.5f  acc_ave:%.5f  " %
                (loss.item(), accuracy, loss_ave, acc_ave))

        if save_flag:
            log = "\ntrain \tEpoch {}/{} \t Learning rate: {:.5f} \t Train loss_ave: {:.5f} \t  acc_ave: {:.5f} \t  " \
                   .format(epoch, epochs, learning_rate, loss_ave, acc_ave )
            # print(log)
            logFile = open(save_dir + '/log.txt', 'a')
            logFile.write(log + '\n')
            torch.save(my_model.state_dict(), save_dir + '/model_lastest.pt')

        if epoch % 1 == 0:
            loss_list = []
            acc_list = []
            with torch.no_grad():
                tq = tqdm.tqdm(validationloader, desc='teat')
                for images, labels in tq:
                    images = images.to(device)
                    labels = labels.to(device)
                    outputs = my_model(images)
                    validation_loss = criterion(outputs, labels)
                    loss_list.append(validation_loss.item())
                    _, predicted = torch.max(outputs.data, 1)
                    accuracy = (predicted
                                == labels).sum().float() / labels.size(0)
                    acc_list.append(accuracy)
                    acc_ave = sum(acc_list) / len(acc_list)
                    loss_ave = sum(loss_list) / len(loss_list)
                    tq.set_postfix(
                        test_loss=
                        "%.4f  acc:%.4f   loss_ave:%.5f  acc_ave:%.5f  " %
                        (validation_loss, accuracy, loss_ave, acc_ave))
                log = "\ntest \tEpoch {}/{} \t Learning rate: {:.5f} \t Train loss_ave: {:.5f} \t  acc_ave: {:.5f} \t  " \
                    .format(epoch, epochs, learning_rate, loss_ave, acc_ave)
                # print(log)
                logFile = open(save_dir + '/log.txt', 'a')
                logFile.write(log + '\n')
Ejemplo n.º 11
0
    ####
    for batch_idx, (inputs, targets) in enumerate(data_train_loader):
        output = model(inputs)  ##识别10个手写数字(0~9),因此output输出10个概率值
        loss = loss_define(output, targets)  ##前向传播计算出损失
        loss.backward()  ##对损失进行反向传播
        optimizer.step()  ##根据学习率等超参数进行梯度更新(本文使用Adam)

        ###  未完待续,8月4日
        train_loss += loss.item()  # 该步的训练损失
        _, predict = output.max(1)  # predict输出的将是output中的最大的一个概率
        total += targets.size(0)  # 参与训练的总样本数
        correct += predict.eq(targets).sum().item()  #预测正确的数量(使output与target对比)
        print(
            batch_idx, len(data_train_loader),
            'Loss: %.3f | Acc: %.3f%%(%d,%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))

    loss_plot.append(train_loss / (batch_idx + 1))

plt.plot(range(epoch_num), loss_plot, '--')
plt.show()
## 模型保存
save_info = {
    "epoch_num": epoch_num,
    "optimizer": optimizer.state_dict(),
    "model": model.state_dict(),
}
# save_dict = model.state_dict()
torch.save(save_info, 'D:/数据结构学习/LeNet_master/model_save/model.pth')
Ejemplo n.º 12
0
        for i, data in enumerate(train_loader):
            inputs, labels = data
            # 若CUDA可用 可将cpu改成CUDA
            inputs, labels = Variable(inputs).cpu(), Variable(labels).cpu()
            optimizer.zero_grad()  # 梯度归零
            outputs = net(inputs)  # 将数据传入网络进行前向运算
            loss = criterion(outputs, labels)  # 得到损失函数
            loss.backward()  # 反向传播
            optimizer.step()  # 优化更新
            sum_loss += loss.item()
            if i % 100 == 99:
                print('epoch:%d, step:%d, loss:%.03f' %
                      (epoch + 1, i + 1, sum_loss / 100))
                sum_loss = 0.0
    print("train finished, model saved.")
    torch.save(net.state_dict(), './model_save/LeNet.pth')
    print("-----------------start testing-----------------")

    net.eval()  # 将模型变换为测试模式
    correct = 0
    total = 0
    for data_test in test_loader:
        images, labels = data_test
        images, labels = Variable(images).cpu(), Variable(labels).cpu()
        output_test = net(images)
        _, predicted = torch.max(output_test, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    print("Test finished. Test acc = {0}".format(correct.item() /
                                                 len(test_data_set)))
Ejemplo n.º 13
0
if args.lenet:
    model = LeNet()
else:
    model = Net()

if args.cuda:
    model.cuda()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

model.train()

for epoch in range(args.epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))
    if args.lenet:
        torch.save(model.state_dict(), "mnist_lenet.pt")
    else:
        torch.save(model.state_dict(), "mnist_cnn.pt")

    validate(model)
Ejemplo n.º 14
0
num_batches = len(train_loader)

for epoch in range(num_epochs):
    for idx, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if ((idx + 1) % 100 == 0):
            print("epoch is {}/{} Step is: {}/{} loss is: {}".format(
                epoch, num_epochs, idx, num_batches, loss.item()))

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for idx, (inputs, labels) in enumerate(test_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        preds = model(inputs)
        values, indices = torch.max(preds, 1)
        total += labels.shape[0]
        correct += (labels == indices).sum().item()
    print("Accuracy of the network is: {}%".format(100 * correct / total))

torch.save(model.state_dict(), 'model.pth')
Ejemplo n.º 15
0
    running_loss = 0.0
    for step, data in enumerate(train_loader, start=0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if step % 500 == 499:  # print every 500 mini-batches
            with torch.no_grad():
                outputs = net(val_image)  # [batch, 10]
                predict_y = torch.max(outputs, dim=1)[1]
                accuracy = (predict_y
                            == val_label).sum().item() / val_label.size(0)

                print('[%d, %5d] train_loss: %.3f  test_accuracy: %.3f' %
                      (epoch + 1, step + 1, running_loss / 500, accuracy))
                running_loss = 0.0

print('Finished Training')

save_path = './Alexnet.pth'
torch.save(net.state_dict(), save_path)
Ejemplo n.º 16
0
                          lr=args.lr,
                          momentum=args.momentum)

best_valid_loss = np.inf
iteration = 0
epoch = 1

# trainint with early stopping
while (epoch < args.epochs + 1) and (iteration < args.patience):
    train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval)
    valid_loss = test(valid_loader, model, args.cuda)
    if valid_loss > best_valid_loss:
        iteration += 1
        print('Loss was not improved, iteration {0}'.format(str(iteration)))
    else:
        print('Saving model...')
        iteration = 0
        best_valid_loss = valid_loss
        state = {
            'net': model.module if args.cuda else model,
            'acc': valid_loss,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(model.state_dict(), './checkpoint/ckpt.t7')
    epoch += 1

# test model
test(test_loader, model, args.cuda)
Ejemplo n.º 17
0
# trainint with early stopping
while (epoch < args.epochs + 1) and (iteration < args.patience):
    train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval)
    print('Epoch {0}'.format(str(epoch)))
    valid_loss, acc = test(valid_loader, model, args.cuda, is_valid=True)
    acc_save = 1-acc
    if acc_save > best_valid_loss:
        iteration += 1
        print('Loss was not improved, iteration {0}'.format(str(iteration)))
    else:
        print('Saving model...')
        iteration = 0
        best_valid_loss = acc_save
        state = {
            'net': model.module.state_dict() if args.cuda else model.state_dict(),
            'acc': valid_loss,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/{}.t7'.format(args.arc))
    epoch += 1

# test model
checkpoint = torch.load('./checkpoint/{}.t7'.format(args.arc),map_location = lambda storage, loc: storage)

if args.arc == 'LeNet':
    test_model = LeNet(num_classes=num_classes)
elif args.arc.startswith('VGG'):
    test_model = VGG(args.arc, num_classes=num_classes)
Ejemplo n.º 18
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--dataset',
                        type=str,
                        default="mnist",
                        choices=["mnist", "cifar10"],
                        metavar='D',
                        help='training dataset (mnist or cifar10)')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--percent',
                        type=list,
                        default=[0.8, 0.92, 0.991, 0.93],
                        metavar='P',
                        help='pruning percentage (default: 0.8)')
    parser.add_argument('--alpha',
                        type=float,
                        default=5e-4,
                        metavar='L',
                        help='l2 norm weight (default: 5e-4)')
    parser.add_argument('--rho',
                        type=float,
                        default=1e-2,
                        metavar='R',
                        help='cardinality weight (default: 1e-2)')
    parser.add_argument(
        '--l1',
        default=False,
        action='store_true',
        help='prune weights with l1 regularization instead of cardinality')
    parser.add_argument('--l2',
                        default=False,
                        action='store_true',
                        help='apply l2 regularization')
    parser.add_argument('--num_pre_epochs',
                        type=int,
                        default=3,
                        metavar='P',
                        help='number of epochs to pretrain (default: 3)')
    parser.add_argument('--num_epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--num_re_epochs',
                        type=int,
                        default=3,
                        metavar='R',
                        help='number of epochs to retrain (default: 3)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-3,
                        metavar='LR',
                        help='learning rate (default: 1e-2)')
    parser.add_argument('--adam_epsilon',
                        type=float,
                        default=1e-8,
                        metavar='E',
                        help='adam epsilon (default: 1e-8)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument('--structured',
                        action='store_true',
                        default=False,
                        help='Enabling Structured Pruning')
    parser.add_argument('--test',
                        action='store_true',
                        default=False,
                        help='For Testing the current Model')
    parser.add_argument(
        '--stat',
        action='store_true',
        default=False,
        help='For showing the statistic result of the current Model')
    parser.add_argument('--n1',
                        type=int,
                        default=2,
                        metavar='N',
                        help='ReRAM OU size (row number) (default: 2)')
    parser.add_argument('--n2',
                        type=int,
                        default=2,
                        metavar='N',
                        help='ReRAM OU size (column number) (default: 2)')
    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    if args.dataset == "mnist":
        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            'data',
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)

        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST('data',
                           train=False,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307, ), (0.3081, ))
                           ])),
            batch_size=args.test_batch_size,
            shuffle=True,
            **kwargs)

    else:
        args.percent = [0.8, 0.92, 0.93, 0.94, 0.95, 0.99, 0.99, 0.93]
        args.num_pre_epochs = 5
        args.num_epochs = 20
        args.num_re_epochs = 5
        train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
            'data',
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.49139968, 0.48215827, 0.44653124),
                                     (0.24703233, 0.24348505, 0.26158768))
            ])),
                                                   shuffle=True,
                                                   batch_size=args.batch_size,
                                                   **kwargs)

        test_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10('data',
                             train=False,
                             download=True,
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Normalize(
                                     (0.49139968, 0.48215827, 0.44653124),
                                     (0.24703233, 0.24348505, 0.26158768))
                             ])),
            shuffle=True,
            batch_size=args.test_batch_size,
            **kwargs)

    model = LeNet().to(device) if args.dataset == "mnist" else AlexNet().to(
        device)
    optimizer = PruneAdam(model.named_parameters(),
                          lr=args.lr,
                          eps=args.adam_epsilon)

    structured_tag = "_structured{}x{}".format(
        args.n1, args.n2) if args.structured else ""

    model_file = "mnist_cnn{}.pt".format(structured_tag) if args.dataset == "mnist" \
            else 'cifar10_cnn{}.pt'.format(structured_tag)

    if args.stat or args.test:
        print("=> loading model '{}'".format(model_file))

        if os.path.isfile(model_file):
            model.load_state_dict(torch.load(model_file))
            print("=> loaded model '{}'".format(model_file))
            if args.test:
                test(args, model, device, test_loader)
            if args.stat:
                show_statistic_result(args, model)
        else:
            print("=> loading model failed '{}'".format(model_file))

    else:
        checkpoint_file = 'checkpoint{}.pth.tar'.format(
            "_mnist" if args.dataset == "mnist" else "_cifar10")

        if not os.path.isfile(checkpoint_file):
            pre_train(args, model, device, train_loader, test_loader,
                      optimizer)
            torch.save(
                {
                    'epoch': args.num_pre_epochs,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                }, checkpoint_file)
        else:
            print("=> loading checkpoint '{}'".format(checkpoint_file))
            checkpoint = torch.load(checkpoint_file)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}'".format(checkpoint_file))

        train(args, model, device, train_loader, test_loader, optimizer)
        mask = apply_l1_prune(model, device, args) if args.l1 else apply_prune(
            model, device, args)
        print_prune(model)
        test(args, model, device, test_loader)
        retrain(args, model, mask, device, train_loader, test_loader,
                optimizer)

        if args.save_model:
            torch.save(model.state_dict(), model_file)
Ejemplo n.º 19
0
def main(args):

    check_path(args)

    # CIFAR-10的全部类别,一共10类
    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    # 数据集
    data_builder = DataBuilder(args)
    dataSet = DataSet(data_builder.train_builder(),
                      data_builder.test_builder(), classes)

    # 选择模型
    if args.lenet:
        net = LeNet()
        model_name = args.name_le
    elif args.vgg:
        net = Vgg16_Net()
        model_name = args.name_vgg
    else:
        raise "Sorry, you can only select LeNet or VGG."

    # 交叉熵损失函数
    criterion = nn.CrossEntropyLoss()

    # SGD优化器
    optimizer = optim.SGD(net.parameters(),
                          lr=args.learning_rate,
                          momentum=args.sgd_momentum,
                          weight_decay=args.weight_decay)

    # 模型的参数保存路径,默认为 "./model/state_dict"
    model_path = os.path.join(args.model_path, model_name)

    # 指定在GPU / CPU上运行程序
    device = t.device("cuda:0" if (
        t.cuda.is_available() and not args.no_cuda) else "cpu")

    # 启动训练
    if args.do_train:
        print("Training...")
        trainer = Trainer(net, criterion, optimizer, dataSet.train_loader,
                          args)
        trainer.train(epochs=args.epoch)
        # 只保存模型参数
        t.save(net.state_dict(), model_path)

    # 启动测试
    if args.do_eval:
        if not os.path.exists(model_path):
            print(
                "Sorry, there's no saved model yet, you need to train first.")
            return
        print("Testing...")
        device = t.device("cuda:0" if t.cuda.is_available() else "cpu")
        net.load_state_dict(t.load(model_path, map_location=device))
        # net.eval()
        tester = Tester(dataSet.test_loader, net, args)
        tester.test()

    if args.show_model:
        if not os.path.exists(model_path):
            print(
                "Sorry, there's no saved model yet, you need to train first.")
            return
        show_model(args)

    if args.do_predict:
        net.load_state_dict(t.load(model_path, map_location=device))
        predictor = Predictor(net, classes)
        # img_path = 'test'
        # img_name = [os.path.join(img_path, x) for x in os.listdir(img_path)]
        # for img in img_name:
        #     predictor.predict(img)
        img_path = 'test/cat0.jpg'
        predictor.predict(img_path)
Ejemplo n.º 20
0
            ave_loss = ave_loss * 0.9 + loss.item() * 0.1
            loss.backward()
            optimizer.step()
            writer.add_scalar('train/loss', loss,
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/acc', acc_train,
                              epoch * len(train_loader) + batch_idx)

            if (batch_idx + 1) % 10 == 0 or (batch_idx +
                                             1) == len(train_loader):
                print(
                    '==>>> epoch: {}, batch index: {}, train loss: {:.6f}, acc: {:.3f}'
                    .format(epoch, batch_idx + 1, ave_loss, acc_train))

        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }
        os.makedirs("./weights/{}".format(date), exist_ok=True)
        torch.save(checkpoint,
                   "./weights/{}/checkpoint_{}.pt".format(date, epoch))

        correct_cnt, ave_loss = 0, 0
        total_cnt = 0
        acc_val = []
        loss_val = []

        with torch.no_grad():
            model.eval()
            for batch_idx, (x, target) in enumerate(test_loader):
                x = x.to(device)
Ejemplo n.º 21
0
            for i in range(train_len):
                if val_label[i] in top5_val_preds[i]:
                    top5_valid_correct += 1

    epoch_loss = train_loss // total_batch_train
    epoch_acc = 100 * train_correct // train_total  #altered
    top5_epoch_acc = 100 * top5_train_correct // train_total
    top5_val_epoch_acc = 100 * top5_valid_correct // valid_total

    train_loss_log.append(epoch_loss)
    train_accuracy_log.append(epoch_acc)
    top5_train_accuracy_log.append(top5_epoch_acc)

    val_epoch_loss = validation_loss // total_batch_val
    val_epoch_acc = 100 * validation_correct // valid_total  #altered
    valid_loss_log.append(val_epoch_loss)
    valid_accuracy_log.append(val_epoch_acc)
    top5_valid_accuracy_log.append(top5_val_epoch_acc)

    print("===================================================")
    print(f'[epoch: {epoch + 1}]')
    print(
        f'training loss: {epoch_loss:.4f}, training accuracy: {epoch_acc:.2f} %(top1) {top5_epoch_acc:.2f}%(top5)'
    )
    print(
        f'validation loss: {val_epoch_loss:.4f}, validation accuracy: {val_epoch_acc:.2f}%(top1) {top5_val_epoch_acc:.2f}%(top5)'
    )

# Save model
torch.save(model.state_dict(), f'./weight/mnist_{epochs}.pth')
Ejemplo n.º 22
0
                        mode='hard',
                        start_img=args.start_img,
                        num_img=args.num_img,
                        sigma=args.sigma,
                        beta=args.beta,
                        matfile=(None if matdir is None else os.path.join(
                            matdir, '{}.mat'.format(epoch))))
                t2 = time.time()
                print('Elapsed time: {}'.format(t2 - t1))

                if ckptdir is not None:
                    # Save checkpoint
                    print('==> Saving {}.pth..'.format(epoch))
                    try:
                        state = {
                            'net': base_model.state_dict(),
                            'epoch': epoch,
                        }
                        torch.save(state, '{}/{}.pth'.format(ckptdir, epoch))
                    except OSError:
                        print('OSError while saving {}.pth'.format(epoch))
                        print('Ignoring...')

    else:
        # Test routine
        certify(model,
                device,
                testset,
                transform_test,
                num_classes,
                mode='both',
Ejemplo n.º 23
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    batch_size = 16
    epochs = 200

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        "val":
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    data_root = os.path.abspath(os.path.join(os.getcwd(),
                                             "."))  # get data root path
    image_path = os.path.join(data_root, "data_set",
                              "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(
        image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(
        image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(
        train_num, val_num))

    # create model
    net = LeNet(num_classes=5)

    # load pretrained weights
    # download url: https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth
    # download url: https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth
    # model_weight_path = "weights/LeNet_pretrained.pth"
    # assert os.path.exists(model_weight_path), "file {} dose not exist.".format(model_weight_path)
    # pre_weights = torch.load(model_weight_path, map_location=device)

    # delete classifier weights
    # pre_dict = {k: v for k, v in pre_weights.items() if net.state_dict()[k].numel() == v.numel()}
    # missing_keys, unexpected_keys = net.load_state_dict(pre_dict, strict=False)
    #
    # # freeze features weights
    # for param in net.conv_stem.parameters():
    #     param.requires_grad = False
    #
    # for param in net.bn1.parameters():
    #     param.requires_grad = False
    #
    # for param in net.act1.parameters():
    #     param.requires_grad = False
    #
    # for param in net.blocks.parameters():
    #     param.requires_grad = False

    net.to(device)

    # define loss function
    loss_function = nn.CrossEntropyLoss()

    # construct an optimizer
    params = [p for p in net.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=0.0001)

    best_acc = 0.0
    save_path = 'weights/lenet.pth'
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            logits = net(images.to(device))
            loss = loss_function(logits, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(
                epoch + 1, epochs, loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                # loss = loss_function(outputs, test_labels)
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

                val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs)
        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')
Ejemplo n.º 24
0
def main():
    transform = transforms.Compose(  #  ^ transforms.Compose将使用的预处理方法打包成一个整体
        [
            transforms.ToTensor(),
            # ToTensor自己查看定义:将 H*W*C的0~255的图像变为 C*H*W的0~1的张量
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    # 标准化,  Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``

    # 50000张训练图片
    # 第一次使用时要将download设置为True才会自动去下载数据集
    # torchvision.datasets里面有很多数据集,这里使用的是CIFAR10
    train_set = torchvision.datasets.CIFAR10(
        root='./data',
        train=True,  # root指定位置;train=True表示导入训练集
        download=False,
        transform=transform)  # 使用的预处理方法transform

    # 将训练集导入,并分为一个个批次
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=36,
                                               shuffle=True,
                                               num_workers=0)
    # shuffle=True是打乱,随机提取到batch; # ^ num_workers载入的线程数,win下只能为0

    # 10000张验证图片
    # 第一次使用时要将download设置为True才会自动去下载数据集
    val_set = torchvision.datasets.CIFAR10(
        root='./data',
        train=False,  # root指定位置;train=False表示导入测试集
        download=False,
        transform=transform)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=5000,
                                             shuffle=False,
                                             num_workers=0)

    val_data_iter = iter(val_loader)  # ^ iter 变成一个可迭代对象;然后通过.next()就可以获取到一批数据
    val_image, val_label = val_data_iter.next()  # 得到了测试图像及对应的标签值

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')  # 元祖类型

    #  ! # 如果想查看图片,使用下面的代码;
    #     # 参考官方文档 https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py

    # def imshow(img):
    #     img = img / 2 + 0.5     # unnormalize,对图像反标准化处理; transforms.Normalize是对图像均值-0.5,方差是0.5;所以这里反向操作
    #     npimg = img.numpy()     # 将图像从tensor变为numpy格式
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))      # 维度还原,tensor的C*H*W变为numpy的H*W*C
    #     plt.show()

    # # print labels
    # # print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))
    # print(' '.join('%5s' % classes[val_label[j]] for j in range(4)))  # 不要太大了,就显示4张图片

    # # show images
    # # imshow(torchvision.utils.make_grid(images))
    # imshow(torchvision.utils.make_grid(val_image))

    net = LeNet()  # 实例化模型
    loss_function = nn.CrossEntropyLoss(
    )  # 定义损失函数,CrossEntropyLoss里面其实就有了softmax函数
    # This criterion combines :class:`~torch.nn.LogSoftmax` and :class:`~torch.nn.NLLLoss` in one single class
    optimizer = optim.Adam(net.parameters(),
                           lr=0.001)  # 定义优化器:传入需要训练的网络参数net.parameters()

    for epoch in range(
            5):  # loop over the dataset multiple times; epoch表示迭代多少轮

        running_loss = 0.0  # running_loss累计损失
        for step, data in enumerate(train_loader, start=0):  # 遍历训练集样本
            # enumerate函数,不仅返回每一个批次的data,还会返回对应的索引;start=0,索引从0开始
            # get the inputs; data is a list of [inputs, labels]

            inputs, labels = data  # 分离成图像和标签

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)  # 正向传播,得到输出
            loss = loss_function(outputs, labels)  # 通过真实值和输出值得到loss
            loss.backward()  # 通过loss进行反向传播
            optimizer.step()  # 使用optimizer.step()更新参数

            # print statistics 打印过程
            running_loss += loss.item()  # 每次计算好loss,都累加如running_loss
            if step % 500 == 499:  # print every 500 mini-batches;每500步打印一次信息
                with torch.no_grad():  # 接下来不要计算每个节点误差损失梯度(测试集)
                    # ~ 这样可以节省算力和内存;如果不用torch.no_grad这个函数,在测试阶段内存很可能崩溃
                    outputs = net(
                        val_image)  # 输出的维度为[batch, 10],第0个维度表示batch,第1个维度是类别
                    predict_y = torch.max(outputs,
                                          dim=1)[1]  # 在第1个维度上找到输出最大的index
                    # ^ max返回两个值,第0个是数值,第1个是位置;所以有 [1]
                    accuracy = torch.eq(
                        predict_y, val_label).sum().item() / val_label.size(0)
                    # 比较预测的标签类别和真实的标签类别,并统计求和
                    # ^ 都是在tensor中计算的,所以通过.item()得到相应的数值
                    # 最后除以测试样本的数目得到准确率

                    print('[%d, %5d] train_loss: %.3f  test_accuracy: %.3f' %
                          (epoch + 1, step + 1, running_loss / 500, accuracy))
                    # 训练的第epoch轮,该轮的step步,平均训练误差(因为每500步打印一次),准确率
                    running_loss = 0.0  # running_loss清零,进行下个500steps的测试

    print('Finished Training')  # 全部训练完后,打印该信息

    save_path = './Lenet.pth'
    torch.save(net.state_dict(), save_path)  # 保存训练的模型,torch.save保存所有的参数