Example #1
0
    def _pre_model(self):
        ''' Prepare the model '''

        model, train_params = None, None

        if self.model_name == 'C3D':
            model = C3D_model.C3D(num_classes=self.num_classes, pretrained=self.pretrained)
            train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': self.lr},
                            {'params': C3D_model.get_10x_lr_params(model), 'lr': self.lr * 10}]
        else:
            raise TypeError('Unknown model name ...')

        model.to(self.device)
        criterion = nn.CrossEntropyLoss().to(self.device)

        optimizer = optim.SGD(train_params, lr=self.lr, momentum=0.9, weight_decay=5e-4)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

        if self.resume_epoch == 0:
            print("Training {} from scratch...".format(self.model_name))
        else:
            resume_file = PathSet.model_dir(model_name = self.model_name, cur_epochs =self.resume_epoch)

            checkpoint = torch.load(resume_file, map_location=lambda storage, loc: storage)

            print("Initializing weights from: {}...".format(resume_file.split('/')[-1]))
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['opt_dict'])

        print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))

        #model.to(self.device); criterion.to(self.device)

        model_cache = (model, criterion, optimizer, scheduler)

        return model_cache
Example #2
0
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=True)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    
    else:
        print('We only implemented C3D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10,
                                          gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                       map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train',clip_len=16), batch_size=1, shuffle=True, num_workers=0)
    val_dataloader   = DataLoader(VideoDataset(dataset=dataset, split='val',  clip_len=16), batch_size=1, num_workers=0)
    test_dataloader  = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16), batch_size=1, num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            for inputs, labels in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)

                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                labels=labels.long()
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        

    writer.close()
Example #3
0
def start():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    nEpochs = 101  # 训练集epoch
    resume_epoch = 0  # 是否从头开始训练,0表示从头,如果是99,表示从99次训练之后的模型开始训练
    useTest = True  # 训练的时候是否看下训练集的效果
    nTestInterval = 20  # 训练集的epoch
    snapshot = 25  # 训练多少个epoch之后保存
    lr = 1e-5  # Learning rate

    dataset = "ucf101"
    num_classes = 101

    # 获取当前路径和文件所属的目录
    save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__)))
    # videoprocess
    exp_name = os.path.dirname(os.path.abspath(__file__)).split('/')[-1]

    # 如果是要从之前的model开始训练,拿到最后一次run的id,如果从头开始,最后一次的id再加1
    # glob.glob()函数的作用:将拼接的符合的文件或者文件夹名字全部读取
    if resume_epoch != 0:
        runs = sorted(glob.glob(os.path.join(save_dir_root, 'run', 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) if runs else 0
    else:
        runs = sorted(glob.glob(os.path.join(save_dir_root, 'run', 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0

    save_dir = os.path.join(save_dir_root, 'run', 'run_' + str(run_id))
    modelName = 'C3D'
    saveName = modelName + '-' + dataset

    # 创建模型
    model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
    train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                    {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]

    criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10,
                                          gamma=0.1)

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
            map_location=lambda storage, loc: storage)  # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    # 查看网络的参数个数
    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=16), batch_size=6, shuffle=True,
                                  num_workers=0)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset, split='val', clip_len=16), batch_size=6, num_workers=0)
    test_dataloader = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=16), batch_size=6, num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, nEpochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()
            # tqdm可以展示训练的进度条
            for inputs, labels in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = torch.tensor(inputs, requires_grad=True).to(device)
                labels = torch.tensor(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.long())

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % snapshot == (snapshot - 1):
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % nTestInterval == (nTestInterval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in tqdm(test_dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.long())

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(3, 4, 6, 3))
        # model = resnet.ResNet(num_classes=num_classes, layers=(3, 4, 6, 3), sample_size=112, sample_duration=16)
        train_params = model.parameters()
    elif modelName == 'R2D':
        model = R2Dnet.R2DClassifier(group_num_classes=num_classes, pretrained=True)
        # model = resnet.ResNet(num_classes=num_classes, layers=(3, 4, 6, 3), sample_size=112, sample_duration=16)
        train_params = model.parameters()
    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10,
                                          gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    model.to(device)  #move here because resume need .cuda()

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                       map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    # model.to(device)
    criterion.to(device)

    log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VolleyballDataset(dataset=dataset, split='train',clip_len=16), batch_size=4, shuffle=True, \
                                  num_workers=0)
    val_dataloader   = DataLoader(VolleyballDataset(dataset=dataset, split='val',  clip_len=16), batch_size=4, num_workers=0)
    test_dataloader  = DataLoader(VolleyballDataset(dataset=dataset, split='test', clip_len=16), batch_size=4, num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            torch.backends.cudnn.benchmark=False
            # for inputs, bbox_inputs, labels, adjacent_matrix in tqdm(trainval_loaders[phase]):
            # for inputs, labels in tqdm(trainval_loaders[phase]):
            for inputs, labels, dists in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                # bbox_inputs = Variable(bbox_inputs, requires_grad=True).to(device)
                # adjacent_matrix = Variable(adjacent_matrix, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                dists = Variable(dists, requires_grad = True).to(device)
                # dist_num = Variable(dist_num).to(device)
                optimizer.zero_grad()
                if phase == 'train':
                    outputs = model(inputs, dists)
                    # outputs = model(inputs, bbox_inputs, adjacent_matrix)
                else:
                    with torch.no_grad():
                        # outputs = model(inputs, bbox_inputs, adjacent_matrix)
                        outputs = model(inputs, dists)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)
                print("labels",labels)
                print("outputs",outputs)
                print("loss",loss)

                torch.backends.cudnn.benchmark = False
                if phase == 'train':
                    loss.backward(retain_graph=True)
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            print(os.path.join(save_dir, 'models', saveName + \
            '_epoch-' + str(epoch) + '.pth.tar'))
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, bbox_inputs, labels, adjacent_matrix in tqdm(test_dataloader):
            # for inputs, labels in tqdm(test_dataloader):
                bbox_inputs = Variable(bbox_inputs, requires_grad=True).to(device)
                adjacent_matrix = Variable(adjacent_matrix, requires_grad=True).to(device)
                inputs = Variable(inputs.to(device))

                with torch.no_grad():
                    # outputs = model(inputs)
                    outputs = model(inputs, bbox_inputs, adjacent_matrix)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
def train_model(dataset=dataset, save_dir=SAVE_FILE_FOLDER, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=IF_PRETRAIN)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'C3D_td5':
        model = C3D_model.C3D_td5(num_classes=num_classes, pretrained=IF_PRETRAIN)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = model.parameters()
    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    if _optimizer == "SGD":
        optimizer = optim.SGD(train_params, lr=lr, momentum=MOMENTUM, weight_decay=WD)
    elif _optimizer == "Adam":
        optimizer = optim.Adam(train_params, lr=lr, weight_decay=WD)
    # print(optimizer)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=SCHEDULER_STEP_SIZE,
                                          gamma=SCHEDULER_GAMMA)  # the scheduler divides the lr by 10 every 10 epochs

    model.to(device)
    criterion.to(device)

    # if resume_epoch == 0:
    if resume_model_path == None:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(
            resume_model_path,
            map_location=lambda storage, loc: storage)  # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(resume_model_path))
        model.load_state_dict(checkpoint['state_dict'])
        if RESUM_OPTIMIZER:
            optimizer.load_state_dict(checkpoint['opt_dict'])
        # resume_epoch
    # else:
    #     checkpoint = torch.load(os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
    #                             map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
    #     print("Initializing weights from: {}...".format(
    #         os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
    #     model.load_state_dict(checkpoint['state_dict'])
    #     optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    writer = SummaryWriter(logdir=LOG_PATH)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=clip_len, preprocess=IF_PREPROCESS_TRAIN, grayscale=grayscale), batch_size=BS, shuffle=True, num_workers=N_WORKERS)
    val_dataloader   = DataLoader(VideoDataset(dataset=dataset, split='val',  clip_len=clip_len, preprocess=IF_PREPROCESS_VAL, grayscale=grayscale), batch_size=BS, num_workers=N_WORKERS)
    test_dataloader  = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=clip_len, preprocess=IF_PREPROCESS_TEST, grayscale=grayscale), batch_size=BS, num_workers=N_WORKERS)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    cudnn.benchmark = True

    global_best_val_acc = 0

    for epoch in range(num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0
            # running_roc = 0.0

            list_pred = list()
            list_label = list()

            # print(optimizer)

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            # for inputs, labels in tqdm(trainval_loaders[phase]):
            run_count = 0
            for inputs, labels in trainval_loaders[phase]:
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                # try:
                #     running_roc += roc_auc_score(labels.data.cpu(), preds.cpu())
                # except:
                #     y_true = labels.data.cpu().tolist()
                #     y_true_2 = y_true.copy()
                #     for i_cls in range(N_CLASSES):
                #         y_true_2.append(i_cls)
                #
                #     y_pred = preds.cpu().tolist()
                #     y_pred_2 = y_pred.copy()
                #     for i_cls in range(N_CLASSES):
                #         y_pred_2.append(i_cls)
                #
                #     running_roc += roc_auc_score(y_true_2, y_pred_2)
                #
                # run_count += 1
                list_label += labels.data.cpu().tolist()
                list_pred += preds.cpu().tolist()

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]
            epoch_roc = multiclass_roc_score(label=list_label, pred=list_pred, n_cls=N_CLASSES)

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
                writer.add_scalar('data/train_roc_epoch', epoch_roc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)
                writer.add_scalar('data/val_roc_epoch', epoch_roc, epoch)
                # if epoch_acc >= global_best_val_acc:
                #     torch.save({
                #         'epoch': epoch + 1,
                #         'state_dict': model.state_dict(),
                #         'opt_dict': optimizer.state_dict(),
                #     }, os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(epoch) + 'ValAcc_{:10.4f}_'.format(epoch_loss) + '.pth.tar'))
                #     print("Save model at {}\n".format(
                #         os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(epoch) + 'ValAcc_{:10.4f}_'.format(epoch_loss) + '.pth.tar')))

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}, ROC:{}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc, epoch_roc))
            stop_time = timeit.default_timer()
            # print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0
            # running_roc = 0.0
            list_pred = list()
            list_label = list()

            # for inputs, labels in tqdm(test_dataloader):
            run_count = 0
            for inputs, labels in test_dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                # try:
                #     running_roc += roc_auc_score(labels.data.cpu(), preds.cpu())
                # except:
                #     running_roc += 0.5
                # run_count += 1
                list_label += labels.data.cpu().tolist()
                list_pred += preds.cpu().tolist()

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size
            epoch_roc = multiclass_roc_score(label=list_label, pred=list_pred, n_cls=N_CLASSES)

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)
            writer.add_scalar('data/test_roc_epoch', epoch_roc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc:{} ROC: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc, epoch_roc))
            stop_time = timeit.default_timer()
            # print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Example #6
0
def train_model(dataset=dataset,
                save_dir=save_dir,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot,
                useTest=useTest,
                test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=True)
        train_params = [{
            'params': C3D_model.get_1x_lr_params(model),
            'lr': lr
        }, {
            'params': C3D_model.get_10x_lr_params(model),
            'lr': lr * 10
        }]
    elif modelName == 'R2Plus1D':
        # model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        # train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
        #                 {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
        model = models.video.r2plus1d_18(pretrained=True, progress=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 2)
        model = model.to(device)
        train_params = model.parameters()
        # for name, param in model.named_parameters():
        #     print(name, param.data)
        #     jdks
    elif modelName == 'R3D':
        # model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        # train_params = model.parameters()
        model = models.video.r3d_18(pretrained=True, progress=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 2)
        model = model.to(device)
        train_params = model.parameters()
    elif modelName == 'MC3':
        # model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        # train_params = model.parameters()
        model = models.video.mc3_18(pretrained=True, progress=True)
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 2)
        model = model.to(device)
        train_params = model.parameters()

    elif modelName == 'I3D':
        model = I3D.InceptionI3d(num_classes=157)
        load_file = 'rgb_charades.pt'
        model = model.to(device)
        model.load_state_dict(torch.load(load_file))
        model.replace_logits(num_classes=2)
        train_params = model.parameters()

    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss(weight=torch.tensor(
        [1.0 / 375,
         1.0 / 4388]))  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(
        optimizer, step_size=10,
        gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs
    #sampler = torch.utils.data.WeightedRandomSampler([1.0/212, 1.0/4388], 8, replacement=True)

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        #checkpoint = torch.load(os.path.join(save_dir, 'checkpoints', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
        #               map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
        #print("Initializing weights from: {}...".format(
        #    os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        checkpoint = torch.load(
            'run\\run_10\\models\\I3D-celeb-df_epoch-19.pth.tar',
            map_location=lambda storage, loc: storage)

        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])
        print("Chekpoint loaded")

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    log_dir = os.path.join(
        save_dir, 'models',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                               split='train',
                                               clip_len=16,
                                               preprocess=False),
                                  batch_size=8,
                                  shuffle=True,
                                  num_workers=4)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                             split='val',
                                             clip_len=16),
                                batch_size=8,
                                num_workers=4,
                                shuffle=True)
    test_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                              split='test',
                                              clip_len=16),
                                 batch_size=8,
                                 num_workers=4,
                                 shuffle=True)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {
        x: len(trainval_loaders[x].dataset)
        for x in ['train', 'val']
    }
    test_size = len(test_dataloader.dataset)

    training_loss_history = []
    val_loss_history = []
    for epoch in range(resume_epoch, num_epochs):
        #each epoch has a training and validation step
        # for phase in ['train', 'val']:
        #     start_time = timeit.default_timer()

        #     # reset the running loss and corrects
        #     running_loss = 0.0
        #     running_corrects = 0.0

        #     # set model to train() or eval() mode depending on whether it is trained
        #     # or being validated. Primarily affects layers such as BatchNorm or Dropout.
        #     if phase == 'train':
        #         # scheduler.step() is to be called once every epoch during training
        #         scheduler.step()
        #         model.train()
        #     else:
        #         model.eval()

        #     for inputs, labels in tqdm(trainval_loaders[phase]):
        #         # move inputs and labels to the device the training is taking place on
        #         inputs = Variable(inputs, requires_grad=True).to(device)
        #         labels = Variable(labels).to(device)
        #         optimizer.zero_grad()

        #         if phase == 'train':
        #             outputs = model(inputs)

        #         else:
        #             with torch.no_grad():
        #                 outputs = model(inputs)

        #         probs = nn.Softmax(dim=1)(outputs)
        #         preds = torch.max(probs, 1)[1]

        #         loss = criterion(outputs, labels.type(torch.long))

        #         if phase == 'train':
        #             loss.backward()
        #             #torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
        #             optimizer.step()
        #             training_loss_history.append(loss.item())
        #         else:
        #             val_loss_history.append(loss.item())

        #         running_loss += loss.item() * inputs.size(0)
        #         running_corrects += torch.sum(preds == labels.data)
        #         #print("Running loss: ", running_loss)
        #         #print("Running corrects: ", running_corrects)

        #     epoch_loss = running_loss / trainval_sizes[phase]
        #     epoch_acc = running_corrects.double() / trainval_sizes[phase]

        #     if phase == 'train':
        #         writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
        #         writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
        #     else:
        #         writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
        #         writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

        #     save_loss(training_loss_history, val_loss_history)

        #     print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
        #     stop_time = timeit.default_timer()
        #     print("Execution time: " + str(stop_time - start_time) + "\n")

        # if epoch % save_epoch == (save_epoch - 1):
        #     torch.save({
        #         'epoch': epoch + 1,
        #         'state_dict': model.state_dict(),
        #         'opt_dict': optimizer.state_dict(),
        #     }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
        #     print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            cat_probs = None
            cat_labels = None

            for inputs, labels in tqdm(test_dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.type(torch.long))

                if type(cat_probs) != type(None):
                    cat_probs = torch.cat((cat_probs, probs), dim=0)
                    cat_labels = torch.cat((cat_labels, labels), dim=0)
                else:
                    cat_probs = probs
                    cat_labels = labels

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            save_roc_curve(cat_labels, cat_probs)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)
            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(
                epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Example #7
0
def train_model(dataset=dataset,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=True)
        train_params = [{
            'params': C3D_model.get_1x_lr_params(model),
            'lr': lr
        }, {
            'params': C3D_model.get_10x_lr_params(model),
            'lr': lr * 10
        }]

    elif modelName == 'resnet':
        model = resnet.generate_model(model_depth=50,
                                      n_classes=700,
                                      n_input_channels=3,
                                      shortcut_type='B',
                                      conv1_t_size=7,
                                      conv1_t_stride=1,
                                      no_max_pool=False,
                                      widen_factor=1.0)
        model = resnet.load_pretrained_model(model,
                                             'network/r3d50_K_200ep.pth',
                                             modelName, num_classes)
        train_params = resnet.get_fine_tuning_parameters(model, 'fc')

    criterion = nn.CrossEntropyLoss(
    )  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(
        optimizer, step_size=10,
        gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    print("Training {} from scratch...".format(modelName))

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                               split='train',
                                               clip_len=16),
                                  batch_size=20,
                                  shuffle=True,
                                  num_workers=4)

    train_size = len(train_dataloader.dataset)

    for epoch in range(0, num_epochs):
        start_time = timeit.default_timer()

        # reset the running loss and corrects
        running_loss = 0.0
        running_corrects = 0.0

        # scheduler.step() is to be called once every epoch during training
        scheduler.step()
        model.train()

        for inputs, labels in tqdm(train_dataloader):
            # move inputs and labels to the device the training is taking place on
            inputs = Variable(inputs, requires_grad=True).to(device)
            labels = Variable(labels).to(device)
            optimizer.zero_grad()

            outputs = model(inputs)

            probs = nn.Softmax(dim=1)(outputs)
            preds = torch.max(probs, 1)[1]
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / train_size
        epoch_acc = running_corrects.double() / train_size

        print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(
            phase, epoch + 1, nEpochs, epoch_loss, epoch_acc))
        stop_time = timeit.default_timer()
        print("Execution time: " + str(stop_time - start_time) + "\n")

    if epoch % save_epoch == (save_epoch - 1):
        torch.save(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(str(epoch) + '.pth'))
Example #8
0
def train_model(dataset=dataset,
                save_dir=save_dir,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot,
                useTest=useTest,
                test_interval=nTestInterval):
    # 1 导入模型
    model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
    train_params = [{
        'params': C3D_model.get_1x_lr_params(model),
        'lr': lr
    }, {
        'params': C3D_model.get_10x_lr_params(model),
        'lr': lr * 10
    }]

    # 2 损失函数
    criterion = nn.CrossEntropyLoss()
    # 3 优化函数
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    # 4 学习率下降
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    model.cuda()
    criterion.cuda()

    # 6 设置训练集和测试机
    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                               split='train',
                                               clip_len=16),
                                  batch_size=4,
                                  shuffle=True,
                                  num_workers=0)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                             split='val',
                                             clip_len=16),
                                batch_size=4,
                                num_workers=0)
    test_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                              split='test',
                                              clip_len=16),
                                 batch_size=4,
                                 num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {
        x: len(trainval_loaders[x].dataset)
        for x in ['train', 'val']
    }
    test_size = len(test_dataloader.dataset)

    # 8 开始训练模型
    for epoch in range(0, num_epochs):
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()
            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            if phase == 'train':
                scheduler.step()
                model.train()
            else:
                model.eval()

            for inputs, labels in tqdm(trainval_loaders[phase]):
                inputs = inputs.cuda()
                labels = labels.cuda()
                optimizer.zero_grad()
                # 根据训练集还是测试集进行模型输出选择
                if phase == 'train':
                    outputs = model(inputs)
                else:
                    # 禁用梯度下降上下文管理器
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.long())

                # 训练集,进行反向传播
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(
                phase, epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        # 保存模型
        if epoch % save_epoch == (save_epoch - 1):
            torch.save(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'opt_dict': optimizer.state_dict(),
                },
                os.path.join(save_dir, 'models',
                             saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(
                os.path.join(save_dir, 'models',
                             saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        # 走测试集
        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in tqdm(test_dataloader):
                inputs = inputs.cuda()
                labels = labels.cuda()

                with torch.no_grad():
                    outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels.long())

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(
                epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")