def train_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(3, 4, 6, 3))
        # model = resnet.ResNet(num_classes=num_classes, layers=(3, 4, 6, 3), sample_size=112, sample_duration=16)
        train_params = model.parameters()
    elif modelName == 'R2D':
        model = R2Dnet.R2DClassifier(group_num_classes=num_classes, pretrained=True)
        # model = resnet.ResNet(num_classes=num_classes, layers=(3, 4, 6, 3), sample_size=112, sample_duration=16)
        train_params = model.parameters()
    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10,
                                          gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    model.to(device)  #move here because resume need .cuda()

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                       map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    # model.to(device)
    criterion.to(device)

    log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VolleyballDataset(dataset=dataset, split='train',clip_len=16), batch_size=4, shuffle=True, \
                                  num_workers=0)
    val_dataloader   = DataLoader(VolleyballDataset(dataset=dataset, split='val',  clip_len=16), batch_size=4, num_workers=0)
    test_dataloader  = DataLoader(VolleyballDataset(dataset=dataset, split='test', clip_len=16), batch_size=4, num_workers=0)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            torch.backends.cudnn.benchmark=False
            # for inputs, bbox_inputs, labels, adjacent_matrix in tqdm(trainval_loaders[phase]):
            # for inputs, labels in tqdm(trainval_loaders[phase]):
            for inputs, labels, dists in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                # bbox_inputs = Variable(bbox_inputs, requires_grad=True).to(device)
                # adjacent_matrix = Variable(adjacent_matrix, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                dists = Variable(dists, requires_grad = True).to(device)
                # dist_num = Variable(dist_num).to(device)
                optimizer.zero_grad()
                if phase == 'train':
                    outputs = model(inputs, dists)
                    # outputs = model(inputs, bbox_inputs, adjacent_matrix)
                else:
                    with torch.no_grad():
                        # outputs = model(inputs, bbox_inputs, adjacent_matrix)
                        outputs = model(inputs, dists)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)
                print("labels",labels)
                print("outputs",outputs)
                print("loss",loss)

                torch.backends.cudnn.benchmark = False
                if phase == 'train':
                    loss.backward(retain_graph=True)
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            print(os.path.join(save_dir, 'models', saveName + \
            '_epoch-' + str(epoch) + '.pth.tar'))
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, bbox_inputs, labels, adjacent_matrix in tqdm(test_dataloader):
            # for inputs, labels in tqdm(test_dataloader):
                bbox_inputs = Variable(bbox_inputs, requires_grad=True).to(device)
                adjacent_matrix = Variable(adjacent_matrix, requires_grad=True).to(device)
                inputs = Variable(inputs.to(device))

                with torch.no_grad():
                    # outputs = model(inputs)
                    outputs = model(inputs, bbox_inputs, adjacent_matrix)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
def train_model(dataset=dataset, save_dir=SAVE_FILE_FOLDER, num_classes=num_classes, lr=lr,
                num_epochs=nEpochs, save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=IF_PRETRAIN)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'C3D_td5':
        model = C3D_model.C3D_td5(num_classes=num_classes, pretrained=IF_PRETRAIN)
        train_params = [{'params': C3D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': C3D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes, layer_sizes=(2, 2, 2, 2))
        train_params = model.parameters()
    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss()  # standard crossentropy loss for classification
    if _optimizer == "SGD":
        optimizer = optim.SGD(train_params, lr=lr, momentum=MOMENTUM, weight_decay=WD)
    elif _optimizer == "Adam":
        optimizer = optim.Adam(train_params, lr=lr, weight_decay=WD)
    # print(optimizer)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=SCHEDULER_STEP_SIZE,
                                          gamma=SCHEDULER_GAMMA)  # the scheduler divides the lr by 10 every 10 epochs

    model.to(device)
    criterion.to(device)

    # if resume_epoch == 0:
    if resume_model_path == None:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(
            resume_model_path,
            map_location=lambda storage, loc: storage)  # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(resume_model_path))
        model.load_state_dict(checkpoint['state_dict'])
        if RESUM_OPTIMIZER:
            optimizer.load_state_dict(checkpoint['opt_dict'])
        # resume_epoch
    # else:
    #     checkpoint = torch.load(os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
    #                             map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
    #     print("Initializing weights from: {}...".format(
    #         os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
    #     model.load_state_dict(checkpoint['state_dict'])
    #     optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    writer = SummaryWriter(logdir=LOG_PATH)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset, split='train', clip_len=clip_len, preprocess=IF_PREPROCESS_TRAIN, grayscale=grayscale), batch_size=BS, shuffle=True, num_workers=N_WORKERS)
    val_dataloader   = DataLoader(VideoDataset(dataset=dataset, split='val',  clip_len=clip_len, preprocess=IF_PREPROCESS_VAL, grayscale=grayscale), batch_size=BS, num_workers=N_WORKERS)
    test_dataloader  = DataLoader(VideoDataset(dataset=dataset, split='test', clip_len=clip_len, preprocess=IF_PREPROCESS_TEST, grayscale=grayscale), batch_size=BS, num_workers=N_WORKERS)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ['train', 'val']}
    test_size = len(test_dataloader.dataset)

    cudnn.benchmark = True

    global_best_val_acc = 0

    for epoch in range(num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0
            # running_roc = 0.0

            list_pred = list()
            list_label = list()

            # print(optimizer)

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            # for inputs, labels in tqdm(trainval_loaders[phase]):
            run_count = 0
            for inputs, labels in trainval_loaders[phase]:
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                # try:
                #     running_roc += roc_auc_score(labels.data.cpu(), preds.cpu())
                # except:
                #     y_true = labels.data.cpu().tolist()
                #     y_true_2 = y_true.copy()
                #     for i_cls in range(N_CLASSES):
                #         y_true_2.append(i_cls)
                #
                #     y_pred = preds.cpu().tolist()
                #     y_pred_2 = y_pred.copy()
                #     for i_cls in range(N_CLASSES):
                #         y_pred_2.append(i_cls)
                #
                #     running_roc += roc_auc_score(y_true_2, y_pred_2)
                #
                # run_count += 1
                list_label += labels.data.cpu().tolist()
                list_pred += preds.cpu().tolist()

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]
            epoch_roc = multiclass_roc_score(label=list_label, pred=list_pred, n_cls=N_CLASSES)

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
                writer.add_scalar('data/train_roc_epoch', epoch_roc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)
                writer.add_scalar('data/val_roc_epoch', epoch_roc, epoch)
                # if epoch_acc >= global_best_val_acc:
                #     torch.save({
                #         'epoch': epoch + 1,
                #         'state_dict': model.state_dict(),
                #         'opt_dict': optimizer.state_dict(),
                #     }, os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(epoch) + 'ValAcc_{:10.4f}_'.format(epoch_loss) + '.pth.tar'))
                #     print("Save model at {}\n".format(
                #         os.path.join(SAVE_FILE_FOLDER, 'models', EXP_NAME + '_epoch-' + str(epoch) + 'ValAcc_{:10.4f}_'.format(epoch_loss) + '.pth.tar')))

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}, ROC:{}".format(phase, epoch+1, nEpochs, epoch_loss, epoch_acc, epoch_roc))
            stop_time = timeit.default_timer()
            # print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            torch.save({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'opt_dict': optimizer.state_dict(),
            }, os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(os.path.join(SAVE_FILE_FOLDER, EXP_NAME + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0
            # running_roc = 0.0
            list_pred = list()
            list_label = list()

            # for inputs, labels in tqdm(test_dataloader):
            run_count = 0
            for inputs, labels in test_dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                # try:
                #     running_roc += roc_auc_score(labels.data.cpu(), preds.cpu())
                # except:
                #     running_roc += 0.5
                # run_count += 1
                list_label += labels.data.cpu().tolist()
                list_pred += preds.cpu().tolist()

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size
            epoch_roc = multiclass_roc_score(label=list_label, pred=list_pred, n_cls=N_CLASSES)

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)
            writer.add_scalar('data/test_roc_epoch', epoch_roc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc:{} ROC: {}".format(epoch+1, nEpochs, epoch_loss, epoch_acc, epoch_roc))
            stop_time = timeit.default_timer()
            # print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Beispiel #3
0
def train_model(dataset=dataset,
                save_dir=save_dir,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot,
                useTest=useTest,
                test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
        train_params = [{
            'params': C3D_model.get_1x_lr_params(model),
            'lr': lr
        }, {
            'params': C3D_model.get_10x_lr_params(model),
            'lr': lr * 10
        }]
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes,
                                                  layer_sizes=(3, 4, 6, 3))
        train_params = [{
            'params': R2Plus1D_model.get_1x_lr_params(model),
            'lr': lr
        }, {
            'params': R2Plus1D_model.get_10x_lr_params(model),
            'lr': lr * 10
        }]
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes,
                                        layer_sizes=(3, 4, 6, 3))
        train_params = model.parameters()
    elif modelName == 'P3D':
        model = p3d_model.P3D63(num_classes=num_classes)
        train_params = model.parameters()
    elif modelName == 'I3D':
        model = I3D_model.InceptionI3d(num_classes=num_classes, in_channels=3)
        train_params = model.parameters()
    elif modelName == 'T3D':
        model = T3D_model.inception_v1(num_classes=num_classes)
        train_params = model.parameters()
    elif modelName == 'STP':
        model = STP_model.STP(num_classes=num_classes, in_channels=3)
        train_params = model.parameters()
    else:
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss(
    )  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(
        optimizer, step_size=10,
        gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(os.path.join(
            save_dir, 'models',
            saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                                map_location=lambda storage, loc: storage
                                )  # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(
                save_dir, 'models',
                saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    # log_dir = os.path.join(save_dir, 'models', datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())``
    # print(log_dir)
    log_dir = "./logs"
    writer = SummaryWriter(logdir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                               split='train',
                                               clip_len=16,
                                               modelName=modelName),
                                  batch_size=8,
                                  shuffle=True,
                                  num_workers=4)
    val_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                             split='val',
                                             clip_len=16,
                                             modelName=modelName),
                                batch_size=8,
                                num_workers=4)
    test_dataloader = DataLoader(VideoDataset(dataset=dataset,
                                              split='test',
                                              clip_len=16,
                                              modelName=modelName),
                                 batch_size=8,
                                 num_workers=4)

    trainval_loaders = {'train': train_dataloader, 'val': val_dataloader}
    trainval_sizes = {
        x: len(trainval_loaders[x].dataset)
        for x in ['train', 'val']
    }
    test_size = len(test_dataloader.dataset)

    for epoch in range(resume_epoch, num_epochs):
        # each epoch has a training and validation step
        for phase in ['train', 'val']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            for inputs, labels in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()
                # print(labels.shape)
                if phase == 'train':
                    if not modelName == 'STP':
                        import torch.nn.parallel
                        outputs = nn.parallel.data_parallel(
                            model, inputs, range(2))
                    else:
                        outputs, index = nn.parallel.data_parallel(
                            model, inputs, range(2))
                    # outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)
                # print(outputs.shape)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                if modelName == 'I3D':
                    labels = labels.reshape(labels.shape[0], 1)
                    loss = criterion(outputs, labels)
                else:
                    loss = criterion(outputs, labels)

                if modelName == 'STP':
                    sp_loss = -torch.log(
                        torch.sum(index[:, :, 0:int(index.size(2) / 2), :, :])
                        / int(index.size(2))) + torch.log(1 - torch.sum(
                            index[:, :,
                                  int(index.size(2) / 2) + 1:, :, :]) //
                                                          int(index.size(2)))
                    loss = loss + sp_loss

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(
                phase, epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            torch.save(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'opt_dict': optimizer.state_dict(),
                },
                os.path.join('models',
                             saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(
                os.path.join(save_dir, 'models',
                             saveName + '_epoch-' + str(epoch) + '.pth.tar')))

        if useTest and epoch % test_interval == (test_interval - 1):
            model.eval()
            start_time = timeit.default_timer()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in tqdm(test_dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.no_grad():
                    outputs = model(inputs)
                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                if modelName == 'I3D':
                    labels = labels.reshape(labels.shape[0], 1)
                    loss = criterion(outputs, labels)
                else:
                    loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / test_size
            epoch_acc = running_corrects.double() / test_size

            writer.add_scalar('data/test_loss_epoch', epoch_loss, epoch)
            writer.add_scalar('data/test_acc_epoch', epoch_acc, epoch)

            print("[test] Epoch: {}/{} Loss: {} Acc: {}".format(
                epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

    writer.close()
Beispiel #4
0
def train_model(dataset=dataset,
                save_dir=save_dir,
                num_classes=num_classes,
                lr=lr,
                num_epochs=nEpochs,
                save_epoch=snapshot):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """
    if modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes,
                                                  layer_sizes=(2, 2, 2, 2))
        train_params = [{
            'params': R2Plus1D_model.get_1x_lr_params(model),
            'lr': lr
        }, {
            'params': R2Plus1D_model.get_10x_lr_params(model),
            'lr': lr * 10
        }]
    else:
        print('We only implemented C3D and R2Plus1D models.')
        raise NotImplementedError
    criterion = nn.CrossEntropyLoss(
    )  # standard crossentropy loss for classification
    optimizer = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.StepLR(
        optimizer, step_size=10,
        gamma=0.1)  # the scheduler divides the lr by 10 every 10 epochs

    if resume_epoch == 0:
        print("Training {} from scratch...".format(modelName))
    else:
        checkpoint = torch.load(os.path.join(
            save_dir, 'models',
            saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                                map_location=lambda storage, loc: storage
                                )  # Load all tensors onto the CPU
        print("Initializing weights from: {}...".format(
            os.path.join(
                save_dir, 'models',
                saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['opt_dict'])

    print('Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)
    criterion.to(device)

    log_dir = os.path.join(
        save_dir, 'models',
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)

    print('Training model on {} dataset...'.format(dataset))
    train_dataloader = DataLoader(VideoDataset(config=config,
                                               dataset=dataset,
                                               split='train'),
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=1)

    for epoch in range(resume_epoch, num_epochs):
        # each epoch has a training and validation step
        for phase in ['train']:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == 'train':
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            for inputs, labels in train_dataloader:
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()

                if phase == 'train':
                    outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(train_dataloader.dataset)
            epoch_acc = running_corrects.double() / len(
                train_dataloader.dataset)

            if phase == 'train':
                writer.add_scalar('data/train_loss_epoch', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_epoch', epoch_acc, epoch)

            print("[{}] Epoch: {}/{} Loss: {} Acc: {}".format(
                phase, epoch + 1, nEpochs, epoch_loss, epoch_acc))
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

        if epoch % save_epoch == (save_epoch - 1):
            torch.save(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'opt_dict': optimizer.state_dict(),
                },
                os.path.join(save_dir, 'models',
                             saveName + '_epoch-' + str(epoch) + '.pth.tar'))
            print("Save model at {}\n".format(
                os.path.join(save_dir, 'models',
                             saveName + '_epoch-' + str(epoch) + '.pth.tar')))

    writer.close()
Beispiel #5
0
def test_model(dataset=dataset, save_dir=save_dir, num_classes=num_classes, lr=lr,
                save_epoch=snapshot, useTest=useTest, test_interval=nTestInterval):
    """
        Args:
            num_classes (int): Number of classes in the data
            num_epochs (int, optional): Number of epochs to train for.
    """

    if modelName == 'C3DVA':
        model = C3DVA_model.C3DVA(num_classes=2, pretrained=True)
        train_params = model.parameters()
    elif modelName == 'CSN':
        model = CSN_model.csn26(num_classes=2, add_landmarks=ADD_LANDMARKS)
        train_params = model.parameters()
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DRegressor(num_classes=2, layer_sizes=LAYER_SIZES)
        train_params = [{'params': R2Plus1D_model.get_1x_lr_params(model), 'lr': lr},
                        {'params': R2Plus1D_model.get_10x_lr_params(model), 'lr': lr * 10}]
    elif modelName == 'Transformer':
        model = transformer_v3.Semi_Transformer(num_classes=40, seq_len=CLIP_LEN)
        train_params = model.parameters()
    elif modelName == 'Resnet3d':
        model = resnet3D.resnet3d18(num_classes=400, pretrained=None)
        train_params = model.parameters()

    checkpoint = torch.load(os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar'),
                       map_location=lambda storage, loc: storage)   # Load all tensors onto the CPU
    print("Initializing weights from: {}...".format(
        os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar')))
    model.load_state_dict(checkpoint['state_dict'], strict=False)

    print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0))
    model.to(device)

    print('Training model on {} dataset...'.format(dataset))
    test_dataloader  = DataLoader(VideoDataset(dataset=dataset, label_type="VA_Set", split=split, \
        clip_len=CLIP_LEN, stride=FRAME_STRIDE, add_landmarks=ADD_LANDMARKS, triplet_label=True), batch_size=1, shuffle=False, num_workers=0,\
         drop_last=True, pin_memory=False)
    test_size = len(test_dataloader.dataset)

    # testing
    model.eval()
    start_time = timeit.default_timer()
    criterion = SetVACriterion(num_classes=20, use_mse=False, is_test=True)
    criterion.to(device)

    val_cccs = []
    aro_cccs = []
    val_mses = []
    aro_mses = []
    with open('test_log.txt', 'a') as test_log:
        test_log.write('=======start=======\n')
        test_log.write('model path: ' + 
            os.path.join(save_dir, 'models', saveName + '_epoch-' + str(resume_epoch - 1) + '.pth.tar') + '\n')

    for frames, label_path in (test_dataloader):
        
        print(frames[0][0].split('/')[-2])

        frame_ids = [] # 图片对应的帧号
        for each in frames:
            fid = int(each[0].split('/')[-1].split('.')[0])
            frame_ids.append(fid)

        frame_count = len(frames)
        frame_root_dir = '/' + os.path.join(*frames[0][0].split('/')[1:-1])
        
        # valence_label = np.empty((frame_count-CLIP_LEN*FRAME_STRIDE+1, 1), np.dtype('float32'))
        # arousal_label = np.empty((frame_count-CLIP_LEN*FRAME_STRIDE+1, 1), np.dtype('float32'))
        # preds_all = np.empty((frame_count-CLIP_LEN*FRAME_STRIDE+1, 2), np.dtype('float32'))

        valence_label = np.zeros((frame_ids[-1], 1), np.dtype('float32'))
        arousal_label = np.zeros((frame_ids[-1], 1), np.dtype('float32'))
        preds_all = np.zeros((frame_ids[-1], 2), np.dtype('float32'))

        
        '''读取视频的所有帧'''
        for center in tqdm( frame_ids ):
            if ADD_LANDMARKS:
                buffer = np.empty(( CLIP_LEN, RESIZE_HEIGHT, RESIZE_WIDTH, 4), np.dtype('float32'))
            else:
                buffer = np.empty(( CLIP_LEN, RESIZE_HEIGHT, RESIZE_WIDTH, 3), np.dtype('float32'))
        
            
            '''读取图像'''
            clip = list(range(center-int(CLIP_LEN/2)*FRAME_STRIDE, center+int(CLIP_LEN/2)*FRAME_STRIDE, FRAME_STRIDE))
            
            for i, frame_id in enumerate(clip):
                frame_name = None
                if (frame_id < 1 or frame_id > frame_ids[-1]):
                    frame = np.ones((RESIZE_HEIGHT, RESIZE_WIDTH, 3))*128
                else:
                    frame_name = os.path.join(frame_root_dir,str(frame_id).zfill(5)+'.jpg')
                    #print(center, frames[i])
                    
                    if os.path.exists(frame_name):
                        frame = np.array(cv2.imread(frame_name)).astype(np.float32)
                    else:
                        frame = np.ones((RESIZE_HEIGHT, RESIZE_WIDTH, 3))*128

                    frame = cv2.resize(frame, (RESIZE_HEIGHT, RESIZE_WIDTH))
                # cv2.imshow(frame_name, frame/256)
                # cv2.waitKey(1000)
                # cv2.destroyAllWindows()

                if ADD_LANDMARKS:
                    # 读landmarks
                    if frame_name != None:
                        lm_name = frame_name.replace('image', 'landmarks')
                        lm_img = np.array(cv2.imread(lm_name, cv2.IMREAD_GRAYSCALE)).astype(np.float32)
                        lm_img = cv2.resize(lm_img, (RESIZE_HEIGHT, RESIZE_WIDTH))
                        lm_img = np.expand_dims(lm_img, 2)
                    else:
                        lm_img = np.zeros((RESIZE_HEIGHT, RESIZE_WIDTH, 1))
                    # 把landmark拼接上
                    buffer[i] = np.concatenate((frame, lm_img),2)
                else:
                    buffer[i] = frame
            buffer = (buffer - 128) / 128
            buffer = buffer.transpose((3, 0, 1, 2))
            inputs = torch.from_numpy(buffer).unsqueeze(0)

            # if not split == 'Submission_Set':
            #     avgface_path = frame_root_dir.replace('image', 'avgfaces')
            #     avgface_path = os.path.join(avgface_path, 'avgface.png')
            #     avgface = cv2.imread(avgface_path, cv2.IMREAD_GRAYSCALE)
            #     avgface = np.stack((avgface,)*3, axis=-1)
            #     avgface = (avgface - 128) / 128
            #     avgface = torch.from_numpy(avgface.transpose(2,0,1)).float().unsqueeze(0)

            if not split == 'Submission_Set':
                # 取label
                with open(label_path[0], 'rt') as f:
                    lines = f.read().splitlines()
                    line = lines[center] #去掉第一行
                valence_label[center-1, 0] = float(line.split(',')[0])
                arousal_label[center-1, 0] = float(line.split(',')[1])

            with torch.no_grad():
                # 方式1
                outputs_va, outputs_expr, _, _ = model(inputs.cuda())
                outputs_v = outputs_va[:,:20]
                outputs_a = outputs_va[:,20:40]
                _, valence_pred_reg, arousal_pred_reg = criterion(outputs_v, outputs_a, \
                    valence_label, arousal_label)

                # 方式2
                # logits1, logits2, logits3, corr_t_div_C, corr_s_div_C, feat = model(inputs.cuda())
                # outputs_v2, outputs_a2 = logits2[0][:,:20], logits2[0][:,20:40]
                # _, valence_pred_reg2, arousal_pred_reg2 = criterion(outputs_v2, outputs_a2, \
                #         valence_label, arousal_label)
                # valence_pred_reg = valence_pred_reg2
                # arousal_pred_reg = arousal_pred_reg2

                # visualize_feature(feat, center)

            # visualize_attention(inputs, corr_t_div_C, corr_s_div_C)

            pred_concat = np.concatenate((np.expand_dims(valence_pred_reg,0), np.expand_dims(arousal_pred_reg,0)), 1)
            preds_all[center-1, :] = pred_concat

            # 将无效label去掉
            if valence_label[center-1, 0] == -5:
                preds_all[center-1, :] = -5

        preds_all_valid = []
        valence_label_valid = []
        arousal_label_valid = []
        for each in preds_all:
            if not each[0] == -5:
                preds_all_valid.append(each)

        for each in valence_label:
            if not each[0] == -5:
                valence_label_valid.append(each)

        for each in arousal_label:
            if not each[0] == -5:
                arousal_label_valid.append(each)

        print(len(preds_all_valid), len(valence_label_valid), len(arousal_label_valid))

        preds_all = torch.from_numpy(np.array(preds_all_valid))
        valence_label = torch.from_numpy(np.array(valence_label_valid))
        arousal_label = torch.from_numpy(np.array(arousal_label_valid))

        val_cc2 = concord_cc2(preds_all[:,0] , valence_label[:,0])
        aro_cc2 = concord_cc2(preds_all[:,1], arousal_label[:,0])
        val_mse = torch.nn.MSELoss()(preds_all[:,0], valence_label[:,0])
        aro_mse = torch.nn.MSELoss()(preds_all[:,1], arousal_label[:,0])


        if not split == 'Submission_Set':
            if not os.path.exists(os.path.join(save_dir, 'res')):
                os.mkdir(os.path.join(save_dir, 'res'))
            with open(os.path.join(save_dir, 'res', 'res_' + frames[0][0].split('/')[-2] + '.txt'), 'w') as res:
                for i, each in enumerate(preds_all):
                    res.write(str(preds_all.numpy()[i,0]) + ", " + str(valence_label.numpy()[i,0]) + ', ' \
                    + str(preds_all.numpy()[i,1]) + ", " + str(arousal_label.numpy()[i,0]) + '\n')
        
        if split == 'Submission_Set':
            if not os.path.exists(os.path.join(save_dir, 'submission')):
                os.mkdir(os.path.join(save_dir, 'submission'))
            with open(os.path.join(save_dir, 'submission', frames[0][0].split('/')[-2] + '.txt'), 'w') as res:
                res.write("valence,arousal\n")
                for i, each in enumerate(preds_all):
                    res.write(str(preds_all.numpy()[i,0]) + "," + str(preds_all.numpy()[i,1]) + '\n')
        
            
        print("Val CCC: {:.4f}  Aro CCC: {:.4f}  Val MSE: {:.4f}  Aro MSE: {:.4f}".format(val_cc2, aro_cc2, val_mse, aro_mse))

        with open('test_log.txt', 'a') as test_log:
            log = frames[0][0].split('/')[-2] + ',' + str(float(val_cc2)) + ',' + str(float(aro_cc2)) + ',' + \
                str(float(val_mse)) + ',' + str(float(aro_mse))
            test_log.write(log + '\n')

        val_cccs.append(val_cc2)
        aro_cccs.append(aro_cc2)
        val_mses.append(val_mse)
        aro_mses.append(aro_mse)
    print("[test] Val CCC: {:.4f}  Aro CCC: {:.4f}  Val MSE: {:.4f}  Aro MSE: {:.4f}".format(np.mean(val_cccs), \
       np.mean(aro_cccs), np.mean(val_mses), np.mean(aro_mses)))

    with open('test_log.txt', 'a') as test_log:
            test_log.write(("[test] Val CCC: {:.4f}  Aro CCC: {:.4f}  Val MSE: {:.4f}  Aro MSE: {:.4f}\n".format(np.mean(val_cccs), \
       np.mean(aro_cccs), np.mean(val_mses), np.mean(aro_mses))))
    stop_time = timeit.default_timer()
    print("Execution time: " + str(stop_time - start_time) + "\n")
Beispiel #6
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device being used:", device)

    with open('./dataloaders/ferryboat_labels.txt', 'r') as f:
        class_names = f.readlines()
        f.close()
    # init model
    num_classes = 4
    modelName = 'STP'
    if modelName == 'I3D':
        model = I3D_model.InceptionI3d(num_classes=num_classes, in_channels=3)
        size = (240, 284)
        crop_size = 224
    elif modelName == 'R2Plus1D':
        model = R2Plus1D_model.R2Plus1DClassifier(num_classes=num_classes,
                                                  layer_sizes=(3, 4, 6, 3))
        size = (171, 128)
        crop_size = 112
    elif modelName == 'C3D':
        model = C3D_model.C3D(num_classes=num_classes, pretrained=False)
        size = (171, 128)
        crop_size = 112
    elif modelName == 'P3D':
        model = p3d_model.P3D63(num_classes=num_classes)
        size = (176, 210)
        crop_size = 160
    elif modelName == 'R3D':
        model = R3D_model.R3DClassifier(num_classes=num_classes,
                                        layer_sizes=(3, 4, 6, 3))
        size = (171, 128)
        crop_size = 112
    elif modelName == 'STP':
        model = STP_model.STP(num_classes=num_classes, in_channels=3)
        size = (240, 284)
        crop_size = 224

    checkpoint = torch.load('./models/I3D-ferryboat4_epoch-199.pth.tar',
                            map_location=lambda storage, loc: storage)
    model_dict = model.state_dict()
    checkpoint_load = {
        k: v
        for k, v in (checkpoint['state_dict']).items() if k in model_dict
    }
    model_dict.update(checkpoint_load)
    model.load_state_dict(model_dict)

    model.to(device)
    model.eval()

    for root, dirs, files in os.walk('./VAR/ferryboat/test/'):

        l_names = locals()

        l_names['Inshore'] = 0
        l_names['Neg'] = 0
        l_names['Offshore'] = 0
        l_names['Traffic'] = 0
        l_names['Inshore1'] = 0
        l_names['Neg1'] = 0
        l_names['Offshore1'] = 0
        l_names['Traffic1'] = 0

        if len(dirs) > 4:
            video_name = dirs
            for name in video_name:
                class_name = name.split('_')[1]
                video = './ferryboat/' + class_name + "/" + name + '.avi'
                clip = []
                cap = cv2.VideoCapture(video)
                retaining = True
                while retaining:
                    retaining, frame = cap.read()
                    if not retaining and frame is None:
                        continue

                    tmp_ = center_crop(cv2.resize(frame, size), crop_size)
                    tmp = tmp_

                    clip.append(tmp)
                    if len(clip) == 16:
                        inputs = np.array(clip).astype(np.float32)
                        inputs = np.expand_dims(inputs, axis=0)
                        inputs = np.transpose(inputs, (0, 4, 1, 2, 3))
                        inputs = torch.from_numpy(inputs)
                        inputs = torch.autograd.Variable(
                            inputs, requires_grad=False).to(device)
                        with torch.no_grad():
                            if modelName == 'STP':
                                outputs, index = model.forward(inputs)
                            else:
                                outputs = model.forward(inputs)
                        iii = index.cpu().data
                        probs = torch.nn.Softmax(dim=1)(outputs)
                        label = torch.max(probs,
                                          1)[1].detach().cpu().numpy()[0]
                        if modelName == 'I3D':
                            label = int(label[0])
                        pre = class_names[label].split(' ')[1][:-1]
                        l_names[str(class_name)] = l_names[str(class_name)] + 1

                        if str(pre) == str(class_name):
                            l_names[str(class_name) +
                                    '1'] = l_names[str(class_name) + '1'] + 1
                        elif str(class_name) == 'Neg':
                            l_names[str(class_name) +
                                    '1'] = l_names[str(class_name) + '1'] + 1
                        elif str(class_name) == 'Traffic':
                            l_names[str(class_name) +
                                    '1'] = l_names[str(class_name) + '1'] + 1

                        clip.pop(0)

                    cv2.waitKey(30)

                cap.release()
                cv2.destroyAllWindows()
            print(
                str(class_name) + '_acc:' + str(
                    int(l_names[str(class_name) + '1']) /
                    int(l_names[str(class_name)])))