Esempio n. 1
0
def train():
    # BCEWithLogitsLoss、CrossEntropyLoss参考:
    # https://blog.csdn.net/qq_22210253/article/details/85222093
    criterion = nn.BCEWithLogitsLoss()
    # criterion = nn.CrossEntropyLoss()

    # optimizer = optim.RMSprop(
    #     model.parameters(),
    #     lr=lr,
    #     momentum=momentum,
    #     weight_decay=w_decay,
    # )

    # optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0)

    # decay LR by a factor of 0.5 every 30 epochs
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=step_size,
                                    gamma=gamma)

    vis = Visualizer(env='main')
    vis2 = VisdomLinePlotter(env_name='lossAndAccuracy')
    bestLoss = np.inf
    for epoch in range(epochs):
        model.train()
        trainTotalLoss = 0.
        start = time.time()
        for trainIter, (testImgTensor,
                        labelTensor) in enumerate(trainDataloader):
            testImgTensor = Variable(testImgTensor)
            labelTensor = Variable(labelTensor)
            testImgTensor = testImgTensor.to(device)
            labelTensor = labelTensor.to(device)

            optimizer.zero_grad()

            output = model(testImgTensor)
            acc = Accuracy(output, labelTensor)

            trainLoss = criterion(output, labelTensor)
            trainTotalLoss += trainLoss.data.item()

            optimizer.zero_grad()
            trainLoss.backward()
            optimizer.step()

            debug = False  # 调试使用

            if (trainIter + 1) % 10 == 0:
                meanTotalLoss = trainTotalLoss / (trainIter + 1)
                print(
                    'Epoch [%d/%d], Iter [%d/%d] iterLoss: %.4f, epochAccumulatedMeanLoss: %.4f, acc: %.4f%%'
                    % (epoch + 1, epochs, trainIter + 1, len(trainDataloader),
                       trainLoss.data.item(), meanTotalLoss, acc * 100))
                vis.plot_train_val(loss_train=meanTotalLoss)  # 每十个itera可视化一次
                vis2.plot(var_name='loss',
                          split_name='train',
                          title_name='Loss',
                          x=epoch * len(trainDataloader) + trainIter,
                          y=meanTotalLoss)
                vis2.plot(var_name='acc',
                          split_name='train',
                          title_name='Accuracy',
                          x=epoch * len(trainDataloader) + trainIter,
                          y=acc)

            if debug and bestLoss > meanTotalLoss and (trainIter +
                                                       1) % 10 == 0:
                bestLoss = meanTotalLoss
                print('get best test trainLoss %.5f' % bestLoss)
                torch.save(
                    model.state_dict(),
                    './modelWeights/model-epoch-{}-itera{}-trainLoss-{}.pth'.
                    format(epoch, trainIter, meanTotalLoss))
        usedTime = time.time() - start
        print('This epoch uses %ds.' % usedTime)
        torch.save(
            model.state_dict(),
            './modelWeights/model-epoch-{}-itera{}-trainLoss-{:.3f}.pth'.
            format(epoch, trainIter, meanTotalLoss))
        valTotalLoss = 0.0
        model.eval()  # dropout层及batch normalization层进入 evalution 模态
        with torch.no_grad():
            for valIter, (testImgTensor,
                          labelTensor) in enumerate(valDataloader):
                testImgTensor = Variable(testImgTensor)
                labelTensor = Variable(labelTensor)
                testImgTensor = testImgTensor.to(device)
                labelTensor = labelTensor.to(device)

                output = model(testImgTensor)
                acc = Accuracy(output, labelTensor)
                valLoss = criterion(output, labelTensor)

                valTotalLoss += valLoss.data.item()

        meanValLoss = valTotalLoss / len(valDataloader)
        print('*' * 40)
        print(
            'Epoch [%d/%d], Iter [%d/%d] thisIterLoss: %.4f, EpochAccumulatedMeanLoss: %.4f'
            % (epoch + 1, epochs, valIter + 1, len(valDataloader),
               valLoss.data.item(), meanValLoss))
        print('*' * 40)
        vis.plot_train_val(loss_val=meanValLoss)
        vis2.plot(var_name='loss',
                  split_name='val',
                  title_name='Class Loss',
                  x=epoch * len(trainDataloader) + trainIter,
                  y=meanValLoss)
        vis2.plot(var_name='acc',
                  split_name='val',
                  title_name='Accuracy',
                  x=epoch * len(trainDataloader) + trainIter,
                  y=acc)
Esempio n. 2
0
def train():
    vis = Visualizer(opt.env)
    # 网络部分======================================================开始
    # True则返回预训练好的VGG16模型
    net = vgg16(pretrained=True)
    # 提取特征层不动
    # 修改分类层最后三层全连接层
    # 修改vgg的分类层结构
    # 修改vgg16的分类部分
    net.classifier = nn.Sequential(
        nn.Linear(512 * 7 * 7, 4096),
        nn.ReLU(True),
        nn.Dropout(),
        # 取消一层全连接层
        # nn.Linear(4096, 4096),
        # nn.ReLU(True),
        # nn.Dropout(),
        # 最后一层修改为1470   即为1470代表一张图的信息(1470=7x7x30)
        nn.Linear(4096, 1470),
    )
    # 初始化网络的线性层 权重及偏向
    for m in net.modules():
        if isinstance(m, nn.Linear):
            m.weight.data.normal_(0, 0.01)
            m.bias.data.zero_()
    # 将模型加载到内存中(CPU)
    if opt.load_model_path:
        net.load_state_dict(
            torch.load(opt.load_model_path,
                       map_location=lambda storage, loc: storage))
    # 再将模型转移到GPU上
    if opt.use_gpu:
        net.cuda()
    # 输出网络结构
    print(net)
    print('加载好预先训练好的模型')
    # 将模型调整为训练模式
    net.train()
    # 网络部分======================================================结束

    # 加载数据部分====================================================开始
    # 自定义封装数据集
    train_dataset = yoloDataset(root=opt.file_root,
                                list_file=opt.voc_2012train,
                                train=True,
                                transform=[transforms.ToTensor()])
    # 数据集加载器  shuffle:打乱顺序    num_workers:线程数
    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=4)
    test_dataset = yoloDataset(root=opt.test_root,
                               list_file=opt.voc_2007test,
                               train=False,
                               transform=[transforms.ToTensor()])
    test_loader = DataLoader(test_dataset,
                             batch_size=opt.batch_size,
                             shuffle=False,
                             num_workers=4)
    # 加载数据部分====================================================结束

    #自定义的损失函数  7代表将图像分为7x7的网格   2代表一个网格预测两个框   5代表 λcoord  更重视8维的坐标预测     0.5代表没有object的bbox的confidence loss
    criterion = yoloLoss(7, 2, 5, 0.5)
    # 优化器
    optimizer = torch.optim.SGD(net.parameters(),
                                lr=opt.learning_rate,
                                momentum=opt.momentum,
                                weight_decay=opt.weight_decay)

    print('训练集有 %d 张图像' % (len(train_dataset)))
    print('一个batch的大小为 %d' % (opt.batch_size))
    # 将训练过程的信息写入log文件中
    logfile = open('log/log.txt', 'w')
    # inf为正无穷大
    best_test_loss = np.inf

    for epoch in range(opt.num_epochs):
        if epoch == 1:
            opt.learning_rate = 0.0005
        if epoch == 2:
            opt.learning_rate = 0.00075
        if epoch == 3:
            opt.learning_rate = 0.001
        if epoch == 80:
            opt.learning_rate = 0.0001
        if epoch == 100:
            opt.learning_rate = 0.00001
        for param_group in optimizer.param_groups:
            param_group['lr'] = opt.learning_rate
        # 第几次epoch  及 当前epoch的学习率
        print('\n\n当前的epoch为 %d / %d' % (epoch + 1, opt.num_epochs))
        print('当前epoch的学习率: {}'.format(opt.learning_rate))

        # 每轮epoch的总loss
        total_loss = 0.
        # 开始训练
        for i, (images, target) in enumerate(train_loader):
            images = Variable(images)
            target = Variable(target)
            if opt.use_gpu:
                images, target = images.cuda(), target.cuda()
            # 前向传播,得到预测值
            pred = net(images)
            # 计算损失  yoloLoss继承nn.Module,调用方法名即自动进行前向传播,执行forward方法
            loss = criterion(pred, target)
            total_loss += loss.data[0]
            # 优化器梯度清零
            optimizer.zero_grad()
            #loss反向传播
            loss.backward()
            # 更新参数
            optimizer.step()
            if (i + 1) % opt.print_freq == 0:
                print(
                    '在训练集上:当前epoch为 [%d/%d], Iter [%d/%d] 当前batch损失为: %.4f, 当前epoch到目前为止平均损失为: %.4f'
                    % (epoch + 1, opt.num_epochs, i + 1, len(train_loader),
                       loss.data[0], total_loss / (i + 1)))
                # 画出训练集的平均损失
                vis.plot_train_val(loss_train=total_loss / (i + 1))
        # 保存最新的模型
        torch.save(net.state_dict(), opt.current_epoch_model_path)
        # =========================================================看到此
        # 一次epoch验证
        validation_loss = 0.0
        # 模型调整为验证模式
        net.eval()
        # 每轮epoch之后用VOC2007测试集进行验证
        for i, (images, target) in enumerate(test_loader):
            images = Variable(images, volatile=True)
            target = Variable(target, volatile=True)
            if opt.use_gpu:
                images, target = images.cuda(), target.cuda()
            # 前向传播得到预测值
            pred = net(images)
            # loss
            loss = criterion(pred, target)
            validation_loss += loss.data[0]
        # 计算在VOC2007测试集上的平均损失
        validation_loss /= len(test_loader)
        # 画出验证集的平均损失
        vis.plot_train_val(loss_val=validation_loss)
        # 训练模型的目标是 在验证集上的loss最小
        # 保存到目前为止 在验证集上的loss最小 的模型
        if best_test_loss > validation_loss:
            best_test_loss = validation_loss
            print('当前得到最好的验证集的平均损失为  %.5f' % best_test_loss)
            torch.save(net.state_dict(), opt.best_test_loss_model_path)
        # 将当前epoch的参数写入log文件中
        logfile.writelines(str(epoch) + '\t' + str(validation_loss) + '\n')
        logfile.flush()