Example #1
0
def get_inference_utils(opt):
    assert opt.inference_crop in ['center', 'nocrop']

    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)

    spatial_transform = [Resize(opt.sample_size)]
    if opt.inference_crop == 'center':
        spatial_transform.append(CenterCrop(opt.sample_size))
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        SlidingWindow(opt.sample_duration, opt.inference_stride))
    temporal_transform = TemporalCompose(temporal_transform)

    inference_data, collate_fn = get_inference_data(
        opt.video_path, opt.annotation_path, opt.dataset, opt.input_type,
        opt.file_type, opt.inference_subset, spatial_transform,
        temporal_transform)

    inference_loader = torch.utils.data.DataLoader(
        inference_data,
        batch_size=opt.inference_batch_size,
        shuffle=False,
        num_workers=opt.n_threads,
        pin_memory=True,
        worker_init_fn=worker_init_fn,
        collate_fn=collate_fn)

    return inference_loader, inference_data.class_names
Example #2
0
testBatchSize=1

trainX, trainY, testX, testY = make_split(data_path)
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
normalize = Normalize(mean=mean, std=std)
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                             ToTensor(), normalize])

vidSeqTrain = makeDataset(trainX, trainY, spatial_transform=spatial_transform,
                                seqLen=seqLen)

trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=args.trainBatchSize,
                            shuffle=True, num_workers=0)

test_spatial_transform = Compose([Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std)])

vidSeqTest = makeDataset(testX, testY, seqLen=seqLen,
    spatial_transform=test_spatial_transform)

testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize,
                        shuffle=False, num_workers=1)

# trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
# trainLoader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

# testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
# testLoader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

# classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
Example #3
0
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen,
             memSize, evalInterval, evalMode, numWorkers, outDir,
             fightsDir_train, noFightsDir_train, fightsDir_test,
             noFightsDir_test):

    train_dataset_dir_fights = fightsDir_train
    train_dataset_dir_noFights = noFightsDir_train
    test_dataset_dir_fights = fightsDir_test
    test_dataset_dir_noFights = noFightsDir_test

    trainDataset, trainLabels, trainNumFrames = make_split(
        train_dataset_dir_fights, train_dataset_dir_noFights)
    testDataset, testLabels, testNumFrames = make_split(
        test_dataset_dir_fights, test_dataset_dir_noFights)

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vidSeqTrain = VideoDataset(trainDataset,
                               trainLabels,
                               trainNumFrames,
                               spatial_transform=spatial_transform,
                               seqLen=seqLen)

    trainLoader = torch.utils.data.DataLoader(vidSeqTrain,
                                              batch_size=trainBatchSize,
                                              shuffle=True,
                                              num_workers=numWorkers,
                                              pin_memory=True,
                                              drop_last=True)

    if evalMode == 'centerCrop':
        test_spatial_transform = Compose(
            [Scale(256), CenterCrop(224),
             ToTensor(), normalize])
        testBatchSize = 1
    elif evalMode == 'tenCrops':
        test_spatial_transform = Compose(
            [Scale(256), TenCrops(size=224, mean=mean, std=std)])
        testBatchSize = 1
    elif evalMode == 'fiveCrops':
        test_spatial_transform = Compose(
            [Scale(256), FiveCrops(size=224, mean=mean, std=std)])
        testBatchSize = 1
    elif evalMode == 'horFlip':
        test_spatial_transform = Compose([
            Scale(256),
            CenterCrop(224),
            FlippedImagesTest(mean=mean, std=std)
        ])
        testBatchSize = 1

    vidSeqTest = VideoDataset(testDataset,
                              testLabels,
                              testNumFrames,
                              seqLen=seqLen,
                              spatial_transform=test_spatial_transform)

    testLoader = torch.utils.data.DataLoader(vidSeqTest,
                                             batch_size=testBatchSize,
                                             shuffle=False,
                                             num_workers=int(numWorkers / 2),
                                             pin_memory=True)

    numTrainInstances = vidSeqTrain.__len__()
    numTestInstances = vidSeqTest.__len__()

    print('Number of training samples = {}'.format(numTrainInstances))
    print('Number of testing samples = {}'.format(numTestInstances))

    modelFolder = './experiments_' + outDir  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(modelFolder):
        print(modelFolder + ' exists!!!')
        sys.exit()
    else:
        os.makedirs(modelFolder)
    # Log files
    writer = SummaryWriter(modelFolder)
    trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w')
    trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w')
    testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w')
    testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w')

    model = ViolenceModel(mem_size=memSize)

    trainParams = []
    for params in model.parameters():
        params.requires_grad = True
        trainParams += [params]
    model.train(True)
    model.cuda()

    lossFn = nn.CrossEntropyLoss()
    optimizerFn = torch.optim.RMSprop(trainParams, lr=lr)
    optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize,
                                                     decayRate)

    minAccuracy = 50

    for epoch in range(numEpochs):
        optimScheduler.step()
        epochLoss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.train(True)
        print('Epoch = {}'.format(epoch + 1))
        writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(trainLoader):
            iterPerEpoch += 1
            optimizerFn.zero_grad()
            inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            outputLabel = model(inputVariable1)
            loss = lossFn(outputLabel, labelVariable)
            loss.backward()
            optimizerFn.step()
            outputProb = torch.nn.Softmax(dim=1)(outputLabel)
            _, predicted = torch.max(outputProb.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epochLoss += loss.data[0]
        avgLoss = epochLoss / iterPerEpoch
        trainAccuracy = (numCorrTrain / numTrainInstances) * 100
        print('Training: Loss = {} | Accuracy = {}% '.format(
            avgLoss, trainAccuracy))
        writer.add_scalar('train/epochLoss', avgLoss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        trainLogLoss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avgLoss))
        trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % evalInterval == 0:
            model.train(False)
            print('Evaluating...')
            testLossEpoch = 0
            testIter = 0
            numCorrTest = 0
            for j, (inputs, targets) in enumerate(testLoader):
                testIter += 1
                if evalMode == 'centerCrop':
                    inputVariable1 = Variable(inputs.permute(1, 0, 2, 3,
                                                             4).cuda(),
                                              volatile=True)
                else:
                    inputVariable1 = Variable(inputs[0].cuda(), volatile=True)
                labelVariable = Variable(targets.cuda(async=True),
                                         volatile=True)
                outputLabel = model(inputVariable1)
                outputLabel_mean = torch.mean(outputLabel, 0, True)
                testLoss = lossFn(outputLabel_mean, labelVariable)
                testLossEpoch += testLoss.data[0]
                _, predicted = torch.max(outputLabel_mean.data, 1)
                numCorrTest += (predicted == targets[0]).sum()
            testAccuracy = (numCorrTest / numTestInstances) * 100
            avgTestLoss = testLossEpoch / testIter
            print('Testing: Loss = {} | Accuracy = {}% '.format(
                avgTestLoss, testAccuracy))
            writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1)
            writer.add_scalar('test/accuracy', testAccuracy, epoch + 1)
            testLogLoss.write('Test Loss after {} epochs = {}\n'.format(
                epoch + 1, avgTestLoss))
            testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, testAccuracy))
            if testAccuracy > minAccuracy:
                savePathClassifier = (modelFolder + '/bestModel.pth')
                torch.save(model, savePathClassifier)
                minAccuracy = testAccuracy
    trainLogAcc.close()
    testLogAcc.close()
    trainLogLoss.close()
    testLogLoss.close()
    writer.export_scalars_to_json(modelFolder + "/all_scalars.json")
    writer.close()
    return True
Example #4
0
def main_run(dataset, stage, trainDatasetDir, valDatasetDir, stage1_dict,
             stackSize, out_dir, seqLen, trainBatchSize, valBatchSize,
             numEpochs, lr1, decay_factor, decay_step, memSize, alphaX,
             alphaY):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join(
        './', out_dir, 'attConvLSTM', str(seqLen),
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)
    ])
    spatial_transform2 = Compose([Scale((7, 7)), ToTensor()])

    vid_seq_train = makeDataset(trainDatasetDir,
                                spatial_transform2,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                numSeg=1,
                                stackSize=stackSize,
                                fmt='.png',
                                seqLen=seqLen)

    trainInstances = vid_seq_train.__len__()

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    if valDatasetDir is not None:
        vid_seq_val = makeDataset(valDatasetDir,
                                  spatial_transform2,
                                  spatial_transform=Compose(
                                      [Scale(256), CenterCrop(224)]),
                                  sequence=False,
                                  numSeg=1,
                                  stackSize=stackSize,
                                  fmt='.png',
                                  phase='Test',
                                  seqLen=seqLen)
        valInstances = vid_seq_val.__len__()

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)

    train_params = []
    if stage == 1:
        model = attentionModel(num_classes=num_classes, mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:  # stage == 2
        model = attentionModel(num_classes=num_classes, mem_size=memSize)
        model.load_state_dict(torch.load(stage1_dict), strict=False)
        model.train(False)

        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    loss_fn_regression = nn.MSELoss()  # Loss function for the regression model

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0
    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        x_loss = 0
        y_loss = 0
        trainSamples = 0
        iterPerEpoch = 0

        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)

        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)

        #for i, (inputs, targets) in enumerate(train_loader):
        for flowX, flowY, inputs, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()

            flowX = flowX.cuda()
            flowY = flowY.cuda()

            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)

            output_label, _, flowXprediction, flowYprediction = model(
                inputVariable)

            #Reshaping predictions and inputs in order
            #to correctly regress on the inputs
            flowXprediction = flowXprediction.view(-1)
            flowX = torch.reshape(flowX, (-1, )).float()

            flowYprediction = flowYprediction.view(-1)
            flowY = torch.reshape(flowY, (-1, )).float()

            #print(f'Prediction: {flowXprediction.size()}')
            #print(f'Input : {flowX.size()}')

            #sys.exit()

            lossX = alphaX * loss_fn_regression(flowXprediction, flowX)
            lossY = alphaY * loss_fn_regression(flowYprediction, flowY)
            loss = loss_fn(output_label, labelVariable)

            #Weighting the loss of the ss task
            #by multiplying it by alpha
            total_loss = loss + lossX + lossY
            total_loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            x_loss += lossX.item()
            y_loss += lossY.item()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_x_loss = x_loss / iterPerEpoch
        avg_y_loss = y_loss / iterPerEpoch
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        print('X loss after {} epoch = {}% '.format(epoch + 1, avg_x_loss))
        print('Y loss after {} epoch = {}% '.format(epoch + 1, avg_y_loss))

        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        writer.add_scalar('x_train_loss', avg_x_loss, epoch + 1)
        writer.add_scalar('y_train_loss', avg_y_loss, epoch + 1)

        train_log_loss.write('Training X loss after {} epoch= {}'.format(
            epoch + 1, avg_x_loss))
        train_log_loss.write('Training Y loss after {} epoch= {}'.format(
            epoch + 1, avg_y_loss))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if valDatasetDir is not None:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_x_loss = 0
            val_y_loss = 0
            val_samples = 0
            numCorr = 0
            mmap_loss = 0

            with torch.no_grad():
                #for j, (inputs, targets) in enumerate(val_loader):
                for flowX, flowY, inputs, targets in val_loader:
                    val_iter += 1
                    val_samples += inputs.size(0)

                    flowX = flowX.cuda()
                    flowY = flowY.cuda()

                    inputVariable = Variable(
                        inputs.permute(1, 0, 2, 3, 4).cuda())
                    labelVariable = Variable(targets.cuda(async=True))
                    #labelVariable = Variable(targets.cuda())

                    output_label, _, flowXprediction, flowYprediction = model(
                        inputVariable)

                    #Reshaping predictions and inputs in order
                    #to correctly regress on the inputs
                    flowXprediction = flowXprediction.view(-1)
                    flowX = torch.reshape(flowX, (-1, )).float()

                    flowYprediction = flowXprediction.view(-1)
                    flowY = torch.reshape(flowX, (-1, )).float()

                    lossX = alphaX * loss_fn_regression(flowXprediction, flowX)
                    lossY = alphaY * loss_fn_regression(flowYprediction, flowY)

                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()
                    val_x_loss += lossX.item()
                    val_y_loss += lossY.item()

                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()

            avg_x_val_loss = val_x_loss / val_iter
            avg_y_val_loss = val_y_loss / val_iter
            val_accuracy = torch.true_divide(numCorr, val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter

            print('Val X Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_x_val_loss))
            print('Val Y Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_y_val_loss))
            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))

            writer.add_scalar('val x/epoch_loss', avg_x_val_loss, epoch + 1)
            writer.add_scalar('val y/epoch_loss', avg_y_val_loss, epoch + 1)
            writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
            writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
            val_log_loss.write('Val X Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_x_val_loss))
            val_log_loss.write('Val Y Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_y_val_loss))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))

            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Example #5
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, regressor):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    model_folder = os.path.join('./', out_dir, dataset, 'MS',str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')
    train_log_loss_ms= open((model_folder + '/train_log_loss_ms.txt'), 'w')
    val_log_loss_ms = open((model_folder + '/val_log_loss_ms.txt'), 'w')
    train_log_acc_ms= open((model_folder + '/train_log_acc_ms.txt'), 'w')
    val_log_acc_ms = open((model_folder + '/val_log_acc_ms.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)])

    vid_seq_train = makeDataset(train_data_dir,
                                spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png',phase='train', regressor=regressor)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)
    if val_data_dir is not None:

        vid_seq_val = makeDataset(val_data_dir,
                                   spatial_transform=Compose([Scale(256), CenterCrop(224)]),
                                   seqLen=seqLen, fmt='.png',phase='test', regressor=regressor)

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()


    trainInstances = vid_seq_train.__len__()

    train_params = []
    if stage == 1:

        model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:

        model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor)
        model.load_state_dict(torch.load(stage1_dict),strict=False)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]
        for params in model.conv.parameters():
            params.requires_grad = True
            train_params += [params]
        for params in model.clas.parameters():
            params.requires_grad = True
            train_params += [params]
        model.conv.train(True)
        model.clas.train(True)
        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]
        


    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()
    loss_fms = nn.NLLLoss()
    loss_reg = nn.MSELoss()
    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                           gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        numCorrTrain_ms = 0
        trainSamples = 0
        iterPerEpoch = 0
        epoch_loss_ms = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        if stage == 2:
            model.conv.train(True)
            model.clas.train(True)
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        for i, (inputs ,binary_map, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, output_ms = model(inputVariable)
            
            loss = loss_fn(output_label, labelVariable)

            if stage==2 :
                loss.backward(retain_graph=True)
            else:
                loss.backward()
            if regressor == 0:
                binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda())
                output_ms = output_ms.view(-1,2)
            elif regressor == 1:
                binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda())
                output_ms = output_ms.view(-1)
            binary_map =binary_map.contiguous().view(-1)
                        
            
            if stage==2:
                if regressor == 1:
                    loss_ms=loss_reg(output_ms, binary_map)
                    loss_ms.backward()
                    
                    epoch_loss_ms+=loss_ms.item()
                elif regressor == 0:
                    loss_ms=loss_fn(output_ms, binary_map)
                    loss_ms.backward()
                    _, predicted = torch.max(output_ms.data, 1)
                    numCorrTrain_ms += torch.sum(predicted == binary_map.data).data.item()
                    epoch_loss_ms+=loss_ms.item()
        
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += torch.sum(predicted == labelVariable.data).data.item()
            epoch_loss += loss.item()

            
        avg_loss = epoch_loss/iterPerEpoch
        if stage ==2:
            trainAccuracy = (numCorrTrain_ms / trainSamples) * 100
            avg_loss_ms= epoch_loss_ms/iterPerEpoch
            #avg_loss = avg_loss + avg_loss_ms
            train_log_loss_ms.write('Train Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms))
            if regressor == 0:train_log_acc_ms.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))

        trainAccuracy = (numCorrTrain / trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
        
        train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))
        if val_data_dir is not None:
            if (epoch+1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                numCorr_ms = 0
                epoch_loss_ms_val=0
                
                for j, (inputs, binary_map, targets) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
                    labelVariable = Variable(targets.cuda(async=True), volatile=True)
                    output_label, output_ms = model(inputVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()
                    if regressor == 0:
                        binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda())
                        output_ms = output_ms.view(-1,2)
                    elif regressor == 1:
                        binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda())
                        output_ms = output_ms.view(-1)
                    binary_map =binary_map.contiguous().view(-1)
                    if stage==2:
                        if regressor == 1:
                            loss_ms=loss_reg(output_ms, binary_map)
                            
                            epoch_loss_ms_val+=loss_ms.item()
                        elif regressor == 0:
                            loss_ms=loss_fn(output_ms, binary_map)
                            _, predicted = torch.max(output_ms.data, 1)
                            numCorr_ms += torch.sum(predicted == binary_map.data).data.item()
                            epoch_loss_ms_val+=loss_ms.item()
                                
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += torch.sum(predicted == labelVariable.data).data.item()
                
                avg_val_loss = val_loss_epoch / val_iter
                if stage ==2:
                    avg_loss_ms= epoch_loss_ms_val/ val_iter
                    val_accuracy = (numCorr_ms / val_samples) * 100
                    #avg_loss = avg_loss + avg_loss_ms 
                    val_log_loss_ms.write('Val Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms))
                    if regressor == 0:val_log_acc_ms.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                val_accuracy = (numCorr / val_samples) * 100
                print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_ms_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_ms_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    train_log_loss_ms.close()
    val_log_loss_ms.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
    optim_scheduler.step()
Example #6
0
    train_logger = Logger(os.path.join(cfg.custom_logdir, 'train.log'),
                          ['epoch', 'loss', 'acc', 'lr'])
    train_batch_logger = Logger(
        os.path.join(cfg.custom_logdir, 'train_batch.log'),
        ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

    optimizer = model.get_optimizer(lr1=cfg.lr, lr2=cfg.lr2)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               'min',
                                               patience=cfg.lr_patience)
    print('##########################################')
    print('####### val')
    print('##########################################')
    spatial_transform = Compose([
        Scale(cfg.sample_size),
        CenterCrop(cfg.sample_size),
        ToTensor(cfg.norm_value), norm_method
    ])
    temporal_transform = LoopPadding(cfg.sample_duration)
    target_transform = ClassLabel()
    validation_data = get_validation_set(cfg, spatial_transform,
                                         temporal_transform, target_transform)
    val_loader = torch.utils.data.DataLoader(validation_data,
                                             batch_size=cfg.batch_size,
                                             shuffle=False,
                                             num_workers=cfg.n_threads,
                                             drop_last=False,
                                             pin_memory=True)
    val_logger = Logger(os.path.join(cfg.custom_logdir, 'val.log'),
                        ['epoch', 'loss', 'acc'])
Example #7
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict,
             out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1,
             decay_factor, decay_step, memSize, regression, rloss, debug,
             verbose, CAM):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]

    if debug:
        n_workers = 0
        device = 'cpu'
    else:
        n_workers = 4
        device = 'cuda'

    model_folder = os.path.join(
        './', out_dir, dataset, 'rgb',
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        # ToTensor(),
        # normalize
    ])
    transform_rgb = Compose([ToTensor(), normalize])
    transform_MS = Compose([Resize((7, 7)), ToTensor()])

    vid_seq_train = makeDataset(train_data_dir,
                                splits=train_splits,
                                spatial_transform=spatial_transform,
                                transform_rgb=transform_rgb,
                                transform_MS=transform_MS,
                                seqLen=seqLen,
                                fmt='.png',
                                regression=regression)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=n_workers,
                                               pin_memory=True)

    vid_seq_val = makeDataset(train_data_dir,
                              splits=val_splits,
                              spatial_transform=Compose(
                                  [Scale(256), CenterCrop(224)]),
                              transform_rgb=transform_rgb,
                              transform_MS=transform_MS,
                              seqLen=seqLen,
                              fmt='.png',
                              regression=regression,
                              verbose=False)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=n_workers,
                                             pin_memory=True)
    valInstances = vid_seq_val.__len__()
    '''
    if val_data_dir is not None:

        vid_seq_val = makeDataset(val_data_dir,
                                  spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                  seqLen=seqLen, fmt='.jpg')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                                 shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()
    '''
    trainInstances = vid_seq_train.__len__()

    train_params = []
    if stage == 1:
        if regression:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize,
                                                 n_channels=1)
        else:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:
        if regression:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize,
                                                 n_channels=1)
        else:
            model = SelfSupervisedAttentionModel(num_classes=num_classes,
                                                 mem_size=memSize)

        model.load_state_dict(torch.load(stage1_dict), strict=False)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

        # Add params from ms_module
        for params in model.ms_module.parameters():
            params.requires_grad = True
            train_params += [params]

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.ms_module.train(True)
    model.to(device)

    # wandb.init(project="first_person_action_recognition")

    loss_fn = nn.CrossEntropyLoss()
    if regression:
        if rloss == 'MSE':
            # Mean Squared Error loss
            loss_ms_fn = nn.MSELoss()  # it should work
        elif rloss == 'L1':
            # L1 loss
            loss_ms_fn = nn.L1Loss()
        elif rloss == 'SmoothL1':
            # Huber Loss or Smooth L1 Loss
            loss_ms_fn = nn.SmoothL1Loss()
        elif rloss == 'KLdiv':
            # Kullback-Leiber Loss
            loss_ms_fn = nn.KLDivLoss()
    else:
        # classification
        loss_ms_fn = nn.CrossEntropyLoss()  # TODO: check paper Planamente

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=4e-5,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0

        #model.train(True)
        model.lstm_cell.train(True)
        model.classifier.train(True)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)

            model.ms_module.train(True)

        for i, (inputsRGB, inputsMS, targets) in enumerate(train_loader):
            # Inputs:
            #   - inputsRGB : the rgb frame input
            # Labels :
            #   - inputsMS  : the motion task label
            #   - targets   : output

            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to(device)
            labelVariable = targets.to(device)
            msVariable = inputsMS.to(device)
            trainSamples += inputsRGB.size(0)
            output_label, _, output_ms = model(inputVariable, device)
            loss_c = loss_fn(output_label, labelVariable)
            if regression:
                msVariable = torch.reshape(
                    msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                output_ms = torch.sigmoid(output_ms)
                output_ms = torch.reshape(output_ms,
                                          (seqLen * 7 * 7, output_ms.size(0)))
            else:
                # classification task
                msVariable = torch.reshape(
                    msVariable, (seqLen * 7 * 7, msVariable.size(0))).long()
                output_ms = torch.reshape(
                    output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))  #

            loss_ms = loss_ms_fn(output_ms, msVariable)
            loss = loss_c + loss_ms
            if verbose:
                print(loss_c)
                print(loss_ms)
                print(loss)
                print()
            # loss = loss_fn(output_label, labelVariable) + loss_ms_fn(output_ms, inputsMS) # TODO (forse): invertire 0 e 1 dim per inputsMS # output1 = F.softmax(torch.reshape(output_ms, (32, 7, 2, 7*7))[0, 0, :, :], dim=0)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            epoch_loss += loss.data.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100

        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))

        # VALIDATION PHASE
        #if val_data_dir is not None:
        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputsRGB, inputsMS, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputsRGB.size(0)
                inputVariable = inputsRGB.permute(1, 0, 2, 3, 4).to(
                    device)  # la permutazione è a solo scopo di computazione
                labelVariable = targets.to(device)
                msVariable = inputsMS.to(device)
                output_label, _, output_ms = model(inputVariable, device)
                loss_c = loss_fn(output_label, labelVariable)
                if regression:
                    msVariable = torch.reshape(
                        msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                    output_ms = torch.sigmoid(output_ms)
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, output_ms.size(0)))
                else:
                    # classification task
                    msVariable = torch.reshape(
                        msVariable,
                        (seqLen * 7 * 7, msVariable.size(0))).long()
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))
                loss_ms = loss_ms_fn(output_ms, msVariable)
                val_loss = loss_c + loss_ms
                # val_loss = loss_fn(output_label, labelVariable) # TODO: add ms Loss
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(device)).sum()
            val_accuracy = (numCorr.data.item() / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Valid: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))

            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
            '''else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)
                '''
        optim_scheduler.step()

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen,
             memSize):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    testBatchSize = 1
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=False,
                               numSeg=1,
                               stackSize=stackSize,
                               fmt='.jpg',
                               phase='Test',
                               seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    model = twoStreamAttentionModel(stackSize=5,
                                    memSize=512,
                                    num_classes=num_classes)
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()

    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0

    predicted_labels = []
    true_labels = []
    for j, (inputFlow, inputFrame, targets) in enumerate(test_loader):
        inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(),
                                      volatile=True)
        inputVariableFlow = Variable(inputFlow.cuda(), volatile=True)
        output_label = model(inputVariableFlow, inputVariableFrame)
        _, predictedTwoStream = torch.max(output_label.data, 1)
        numCorrTwoStream += (predictedTwoStream == targets.cuda()).sum()
        predicted_labels.append(predictedTwoStream)
        true_labels.append(targets)
    test_accuracyTwoStream = (numCorrTwoStream / float(test_samples)) * 100
    print('Accuracy {:.02f}%'.format(test_accuracyTwoStream))
    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = np.linspace(0, 60, num=61)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-twoStreamJoint.jpg', bbox_inches='tight')
    plt.show()
Example #9
0
def main():
    global args
    global best_prec1
    args = parser.parse_args()

    print('Training arguments:')
    for k, v in vars(args).items():
        print('\t{}: {}'.format(k, v))

    if args.data_name == 'ucf101':
        num_class = 101
    elif args.data_name == 'hmdb51':
        num_class = 51
    elif args.data_name == 'mine':
        num_class = 2
    else:
        raise ValueError('Unknown dataset ' + args.data_name)

    model = Model(num_class,
                  args.num_segments,
                  args.representation,
                  base_model=args.arch)
    print(model)

    if 'resnet3D' in args.arch:
        train_crop_min_ratio = 0.75
        train_crop_min_scale = 0.25
        mean = [0.4345, 0.4051, 0.3775]
        std = [0.2768, 0.2713, 0.2737]
        value_scale = 1

        train_transform = Compose([
            RandomResizedCrop(
                model.crop_size, (train_crop_min_scale, 1.0),
                (train_crop_min_ratio, 1.0 / train_crop_min_ratio)),
            RandomHorizontalFlip(),
            ToTensor(),
            ScaleValue(value_scale),
            Normalize(mean, std)
        ])
        test_trainsform = Compose([
            Resize(model.crop_size),
            CenterCrop(model.crop_size),
            ToTensor(),  # range [0, 255] -> [0.0,1.0]
            ScaleValue(1),
            Normalize(mean, std)
        ])

    train_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.data_name,
            video_list=args.train_list,
            num_segments=args.num_segments,
            representation=args.representation,
            transform=model.get_augmentation(),  #train_transform, 
            is_train=True,
            accumulate=(not args.no_accumulation),
            model_name=args.arch),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=worker_init_fn)

    val_loader = torch.utils.data.DataLoader(
        CoviarDataSet(
            args.data_root,
            args.data_name,
            video_list=args.test_list,
            num_segments=args.num_segments,
            representation=args.representation,
            transform=torchvision.transforms.Compose([
                GroupScale(int(model.scale_size)),
                GroupCenterCrop(model.crop_size)
            ]),  #test_trainsform, 
            is_train=True,
            accumulate=(not args.no_accumulation),
            model_name=args.arch),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
        worker_init_fn=worker_init_fn)

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()
    cudnn.benchmark = True

    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        decay_mult = 0.0 if 'bias' in key else 1.0

        if ('module.base_model.conv1' in key or 'module.base_model.bn1' in key
                or 'data_bn'
                in key) and args.representation in ['mv', 'residual']:
            lr_mult = 0.1
        elif '.fc.' in key:
            lr_mult = 1.0
        else:
            lr_mult = 0.01

        params += [{
            'params': value,
            'lr': args.lr,
            'lr_mult': lr_mult,
            'decay_mult': decay_mult
        }]

    #optimizer = torch.optim.SGD(params, weight_decay=0.001, momentum=0.9, nesterov=False)
    #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
    optimizer = torch.optim.Adam(params,
                                 weight_decay=args.weight_decay,
                                 eps=0.001)
    criterion = torch.nn.CrossEntropyLoss().cuda()

    for epoch in range(args.epochs):
        cur_lr = adjust_learning_rate(optimizer, epoch, args.lr_steps,
                                      args.lr_decay)
        #cur_lr = get_lr(optimizer)

        train(train_loader, model, criterion, optimizer, epoch, cur_lr)
        #prec1, prev_val_loss = validate(val_loader, model, criterion)
        #scheduler.step(prev_val_loss)

        if epoch % args.eval_freq == 0 or epoch == args.epochs - 1:
            prec1, _ = validate(val_loader, model, criterion)

            # 紀錄訓練歷程
            np.savez("train_history/train_history.npz",
                     loss=np.array(train_loss),
                     top1=np.array(train_prec),
                     lr=np.array(train_lr))
            np.savez("train_history/valid_history.npz",
                     loss=np.array(valid_loss),
                     top1=np.array(valid_prec))

            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            if is_best or epoch % SAVE_FREQ == 0:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_prec1': best_prec1,
                    },
                    is_best,
                    filename='checkpoint.pth.tar')
Example #10
0
def classify_video(video_dir, video_name, class_names, model, opt):
    assert opt.mode in ['score', 'feature']

    spatial_transform = Compose([
        Scale(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor(),
        Normalize(opt.mean, [1, 1, 1])
    ])
    temporal_transform = LoopPadding(opt.sample_duration)
    data = Video(video_dir,
                 spatial_transform=spatial_transform,
                 temporal_transform=temporal_transform,
                 sample_duration=opt.sample_duration)
    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=opt.batch_size,
                                              shuffle=False,
                                              num_workers=opt.n_threads,
                                              pin_memory=True)

    video_outputs = []
    video_segments = []
    for i, (inputs, segments) in enumerate(data_loader):
        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)
        outputs = F.softmax(outputs, dim=1)
        video_outputs.append(outputs.cpu().data)
        video_segments.append(segments)

    video_outputs = torch.cat(video_outputs)

    video_segments = torch.cat(video_segments)

    results = {'video': video_name, 'clips': []}

    for i in range(video_outputs.size(0)):
        clip_results = {
            'segment': video_segments[i].tolist(),
        }
        label = get_video_results(video_outputs[i], class_names, 5)
        clip_results['label'] = label
        results['clips'].append(clip_results)

#     _, max_indices = video_outputs.max(dim=1)
#     for i in range(video_outputs.size(0)):
#         clip_results = {
#             'segment': video_segments[i].tolist(),
#         }

#         if opt.mode == 'score':
#             clip_results['label'] = class_names[max_indices[i]]
#             clip_results['scores'] = video_outputs[i, max_indices[i]].item()
#         elif opt.mode == 'feature':
#             clip_results['features'] = video_outputs[i].tolist()

#         results['clips'].append(clip_results)

#     average_scores = torch.mean(video_outputs, dim=0)
#     video_results, predicted_labels = get_video_results(average_scores, class_names, 1)

#     video_results = get_video_results(average_scores, class_names, 5)
#     results = {
#         'video': video_name,
#         'result': video_results,
# #         'predicted_labels': predicted_labels
#     }
    return results
Example #11
0
def main_run(version, stage, train_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step, mem_size):
    num_classes = 61

    model_folder = os.path.join("./", out_dir, version)

    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Train val partitioning
    train_usr = ["S1", "S3", "S4"]
    val_usr = ["S2"]

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    spatial_transform = Compose(
        [Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
         ToTensor(), normalize])

    vid_seq_train = makeDataset(train_data_dir, train_usr,
                                spatial_transform=spatial_transform, seqLen=seqLen)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                                               shuffle=True, num_workers=4, pin_memory=True)

    vid_seq_val = makeDataset(train_data_dir, val_usr,
                              spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                              seqLen=seqLen)

    val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                             shuffle=False, num_workers=2, pin_memory=True)

    train_params = []

    # stage 1: train only lstm
    if stage == 1:

        model = attentionModel(num_classes=num_classes, mem_size=mem_size)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False

    # stage 2: train lstm, layer4, spatial attention and final fc
    else:
        model = attentionModel(num_classes=num_classes, mem_size=mem_size)
        model.load_state_dict(torch.load(stage1_dict))  # pretrained
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():  # fully connected layer
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():  # for both stages we train the lstm
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():  # for both stages we train the last classifier (after the lstm and avg pooling)
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)
    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                           gamma=decay_factor)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        for i, (inputs, inputsF, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE))
            labelVariable = Variable(targets.to(DEVICE))
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(DEVICE)).sum()  # evaluating number of correct classifications
            epoch_loss += loss.data.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.data.item() / trainSamples)

        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss))  # log file
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy))  # log file
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy))

        if (epoch + 1) % VAL_FREQUENCY == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputs, inputsF, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputs.size(0)
                inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE))
                labelVariable = Variable(targets.to(DEVICE))
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(DEVICE)).sum()  # evaluating number of correct classifications
            val_accuracy = (numCorr.data.item() / val_samples)
            avg_val_loss = val_loss_epoch / val_iter
            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))  # log file
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))  # log file
            if val_accuracy > min_accuracy:
                save_path_model = (
                        model_folder + '/model_rgb_state_dict.pth')  # every epoch, check if the val accuracy is improved, if so, save that model
                torch.save(model.state_dict(),
                           save_path_model)  # in that way, even if the model overfit, you will get always the best model
                min_accuracy = val_accuracy  # in this way you don't have to care too much about the number of epochs

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
Example #12
0
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize,
             valBatchSize, numEpochs, lr1, decay_factor, decay_step,
             uniform_sampling, debug):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]

    if debug:
        n_workers = 0
        device = 'cpu'
    else:
        n_workers = 4
        device = 'cuda'

    min_accuracy = 0

    model_folder = os.path.join('./', outDir, dataset,
                                'flow')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    #num_workers = 4
    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(trainDir,
                                train_splits,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                stackSize=stackSize,
                                fmt='.png',
                                uniform_sampling=uniform_sampling)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               sampler=None,
                                               num_workers=n_workers,
                                               pin_memory=True)

    vid_seq_val = makeDataset(trainDir,
                              val_splits,
                              spatial_transform=Compose([
                                  Scale(256),
                                  CenterCrop(224),
                                  ToTensor(), normalize
                              ]),
                              sequence=False,
                              stackSize=stackSize,
                              fmt='.png',
                              phase='Test',
                              uniform_sampling=uniform_sampling)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=n_workers,
                                             pin_memory=True)
    valInstances = vid_seq_val.__len__()

    trainInstances = vid_seq_train.__len__()
    print('Number of samples in the dataset: training = {} | validation = {}'.
          format(trainInstances, valInstances))

    model = flow_resnet34(True,
                          channels=2 * stackSize,
                          num_classes=num_classes)
    model.train(True)
    train_params = list(model.parameters())

    model.to(device)

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.SGD(train_params,
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.train(True)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputs.to(device)
            labelVariable = targets.to(device)
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            epoch_loss += loss.data.item()

        optim_scheduler.step()

        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.data.item() / trainSamples) * 100
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                val_samples += inputs.size(0)
                inputVariable = inputs.to(device)
                labelVariable = targets.to(device)
                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)
                val_loss_epoch += val_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(device)).sum()
            val_accuracy = (numCorr.data.item() / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_flow_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        else:
            if (epoch + 1) % 10 == 0:
                save_path_model = (model_folder +
                                   '/model_flow_state_dict_epoch' +
                                   str(epoch + 1) + '.pth')
                torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
Example #13
0
def main_run(dataset, root_dir, checkpoint_path, seqLen, testBatchSize, memSize, outPool_size, split):


    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    test_split = split
    seqLen = seqLen
    memSize = memSize
    c_cam_classes = outPool_size
    dataset = dataset
    testBatchSize = testBatchSize
    checkpoint_path = checkpoint_path

    if dataset == 'gtea_61':
        num_classes = 61
    elif dataset == 'gtea_71':
        num_classes = 71
    elif dataset == 'egtea_gaze+':
        num_classes = 106
    else:
        print('Wrong dataset')
        sys.exit()
    dataset_dir = os.path.join(root_dir, dataset)
    print('Preparing dataset...')

    if dataset == 'egtea_gaze+':
        trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames = gen_split_egtea_gazePlus(dataset_dir,
                                                                                                               test_split)
    else:
        trainDatasetF, testDatasetF, trainLabels, testLabels, trainNumFrames, testNumFrames, _ = gen_split(dataset_dir,
                                                                                                  test_split)


    vid_seq_test = makeDataset(testDatasetF, testLabels, testNumFrames,
                               spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                               fmt='.jpg', seqLen=seqLen)

    print('Number of test samples = {}'.format(vid_seq_test.__len__()))

    print("Dataset shape: ", len(vid_seq_test.__getitem__(0)), vid_seq_test.__getitem__(0)[0].shape , end='\n\n\n')
    test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=testBatchSize,
                            shuffle=False, num_workers=0, pin_memory=True)


    model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes)
    if os.path.exists(checkpoint_path):
            print('Loading weights from checkpoint file {}'.format(checkpoint_path))
    else:
            print('Checkpoint file {} does not exist'.format(checkpoint_path))
            sys.exit()
    last_checkpoint = torch.load(checkpoint_path) #, map_location=torch.device('cpu'))
    model.load_state_dict(last_checkpoint['model_state_dict'])
    model.cuda()
    model.train(False)
    model.eval()

    print('Testing...')
    test_iter = 0
    test_samples = 0
    numCorr = 0
    for j, (inputs, targets) in tqdm(enumerate(test_loader)):
        test_iter += 1
        test_samples += inputs.size(0)
        with torch.no_grad():
            print(inputs.shape, targets.shape)
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            output_label, _ = model(inputVariable)
            del inputVariable
            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.cuda()).sum()
    test_accuracy = (numCorr.cpu().item() / test_samples) * 100
    print('Test Accuracy after = {}%'.format(test_accuracy))
Example #14
0
def get_val_utils(opt):
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    spatial_transform = [
        Resize(opt.sample_size),
        CenterCrop(opt.sample_size),
        ToTensor()
    ]
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.extend([ScaleValue(opt.value_scale), normalize])
    spatial_transform = Compose(spatial_transform)

    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    temporal_transform.append(
        TemporalEvenCrop(opt.sample_duration, opt.n_val_samples))
    temporal_transform = TemporalCompose(temporal_transform)

    val_data_checkpoint_path = opt.result_path / Path('val_data_' +
                                                      opt.dataset + '.data')
    val_collate_checkpoint_path = opt.result_path / Path('val_coll_' +
                                                         opt.dataset + '.data')
    if os.path.exists(val_data_checkpoint_path) and os.path.exists(
            val_collate_checkpoint_path) and opt.save_load_data_checkpoint:
        with open(val_data_checkpoint_path, 'rb') as filehandle:
            val_data = pickle.load(filehandle)
        with open(val_collate_checkpoint_path, 'rb') as filehandle:
            collate_fn = pickle.load(filehandle)
    else:
        val_data, collate_fn = get_validation_data(
            opt.video_path, opt.annotation_path, opt.dataset, opt.input_type,
            opt.file_type, spatial_transform, temporal_transform)
        if opt.save_load_data_checkpoint:
            with open(val_data_checkpoint_path, 'wb') as filehandle:
                pickle.dump(val_data, filehandle)
            with open(val_collate_checkpoint_path, 'wb') as filehandle:
                pickle.dump(collate_fn, filehandle)

    if opt.distributed:
        val_sampler = torch.utils.data.distributed.DistributedSampler(
            val_data, shuffle=False)
    else:
        val_sampler = None
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=(opt.batch_size //
                                                         opt.n_val_samples),
                                             shuffle=False,
                                             num_workers=opt.n_threads,
                                             pin_memory=True,
                                             sampler=val_sampler,
                                             worker_init_fn=worker_init_fn,
                                             collate_fn=collate_fn)

    if opt.is_master_node:
        val_logger = Logger(opt.result_path / 'val.log',
                            ['epoch', 'loss', 'acc'])
    else:
        val_logger = None

    return val_loader, val_logger
Example #15
0
def main_run(stage, train_data_dir, val_data_dir, stage1Dict, stage1Dict_rgb,
             stage1Dict_fc, out_dir, seqLen, trainBatchSize, valBatchSize,
             numEpochs, lr1, decay_factor, decay_step, memSize):
    #dataset = 'gtea61'
    num_classes = 61

    model_folder = os.path.join(
        './', out_dir, 'attConvLSTMDoubleResnet', str(seqLen),
        'stage' + str(stage))  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(train_data_dir,
                                seqLen=seqLen,
                                fmt='.png',
                                users=['S1', 'S3', 'S4'],
                                spatial_transform=spatial_transform)
    trainInstances = vid_seq_train.__len__()

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    if val_data_dir is not None:
        vid_seq_val = makeDataset(val_data_dir,
                                  seqLen=seqLen,
                                  fmt='.png',
                                  users=['S2'],
                                  train=False,
                                  spatial_transform=Compose([
                                      Scale(256),
                                      CenterCrop(224),
                                      ToTensor(), normalize
                                  ]))
        valInstances = vid_seq_val.__len__()

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)

    train_params = []

    model = twoStreamFlowCol(num_classes=num_classes,
                             memSize=memSize,
                             frameModel=stage1Dict_rgb,
                             flowModel=stage1Dict_fc)
    model.train(False)
    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.flowModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.flowModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.flowModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.flowModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    model.cuda()

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.SGD(train_params,
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn,
                                                      step_size=decay_step,
                                                      gamma=decay_factor)
    train_iter = 0
    min_accuracy = 0
    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0

        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)

        #for i, (inputs, targets) in enumerate(train_loader):
        for inputs, inputsSN, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()

            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)

            output_label, _ = model(inputVariable, inputSNVariable)

            loss = loss_fn(output_label, labelVariable)
            loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        if val_data_dir is not None:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            val_samples = 0
            numCorr = 0

            with torch.no_grad():
                #for j, (inputs, targets) in enumerate(val_loader):
                for inputs, inputsSN, targets in val_loader:
                    val_iter += 1
                    val_samples += inputs.size(0)

                    inputVariable = Variable(
                        inputs.permute(1, 0, 2, 3, 4).cuda())
                    inputSNVariable = Variable(
                        inputsSN.permute(1, 0, 2, 3, 4).cuda())
                    labelVariable = Variable(targets.cuda(async=True))
                    #labelVariable = Variable(targets.cuda())

                    output_label, _ = model(inputVariable, inputSNVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.item()

                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()

            val_accuracy = torch.true_divide(numCorr, val_samples) * 100
            avg_val_loss = val_loss_epoch / val_iter

            print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                epoch + 1, avg_val_loss, val_accuracy))
            writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
            writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))

            if val_accuracy > min_accuracy:
                save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
                lr=opt.learning_rate,
                momentum=opt.momentum,
                # dampening=dampening,
                weight_decay=opt.weight_decay,
                nesterov=True)
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 [15, 25, 40, 45, 50, 55, 60],
                                                 gamma=0.1)

    if not opt.no_val:

        ##--------------------------------------------------------------------------------------------
        if opt.model == 'I3D':
            spatial_transform = Compose([
                Scale((256, 256)),
                CenterCrop(224),
                ToTensor(opt.norm_value), norm_method
            ])
            temporal_transform = LoopPadding(0)
            target_transform = ClassLabel()
            validation_data = get_validation_set(opt, spatial_transform,
                                                 temporal_transform,
                                                 target_transform)
            val_loader = torch.utils.data.DataLoader(validation_data,
                                                     batch_size=1,
                                                     shuffle=False,
                                                     num_workers=opt.n_threads,
                                                     pin_memory=True)
        elif opt.model == 'resnet_50':
            spatial_transform = Compose([
                Scale(256),
Example #17
0
def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize,
             trainDatasetDir, valDatasetDir, outDir, trainBatchSize,
             valBatchSize, lr1, numEpochs, decay_step, decay_factor,
             uniformSampling):
    # GTEA 61
    num_classes = 61

    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]
    directory = trainDatasetDir

    model_folder = os.path.join(
        './', outDir, dataset,
        'twoStream')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDataset(directory,
                                train_splits,
                                spatial_transform=spatial_transform,
                                sequence=False,
                                numSeg=1,
                                stackSize=stackSize,
                                fmt='.png',
                                seqLen=seqLen,
                                uniform_sampling=uniformSampling)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    vid_seq_val = makeDataset(directory,
                              val_splits,
                              spatial_transform=Compose([
                                  Scale(256),
                                  CenterCrop(224),
                                  ToTensor(), normalize
                              ]),
                              sequence=False,
                              numSeg=1,
                              stackSize=stackSize,
                              fmt='.png',
                              phase='Test',
                              seqLen=seqLen,
                              uniform_sampling=uniformSampling)

    val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                             batch_size=valBatchSize,
                                             shuffle=False,
                                             num_workers=2,
                                             pin_memory=True)
    valSamples = vid_seq_val.__len__()

    model = twoStreamAttentionModel(flowModel=flowModel,
                                    frameModel=rgbModel,
                                    stackSize=stackSize,
                                    memSize=memSize,
                                    num_classes=num_classes)

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    base_params = []
    for params in model.flowModel.layer4.parameters():
        base_params += [params]
        params.requires_grad = True

    model.cuda()

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.SGD([
        {
            'params': train_params
        },
        {
            'params': base_params,
            'lr': 1e-4
        },
    ],
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn,
                                                      step_size=decay_step,
                                                      gamma=decay_factor)
    train_iter = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        model.flowModel.layer4.train(True)
        for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariableFlow = inputFlow.to(DEVICE)
            inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
            labelVariable = targets.to(DEVICE)
            output_label = model(inputVariableFlow, inputVariableFrame)
            loss = loss_fn(torch.log_softmax(output_label, dim=1),
                           labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.data.item()

        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain.item() / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(
            epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(
            epoch + 1, trainAccuracy))
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        # Validation Phase
        #if valDatasetDir is not None:
        if (epoch + 1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            val_iter = 0
            numCorr = 0
            for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariableFlow = inputFlow.to(DEVICE)
                inputVariableFrame = inputFrame.permute(1, 0, 2, 3,
                                                        4).to(DEVICE)
                labelVariable = targets.to(DEVICE)
                output_label = model(inputVariableFlow, inputVariableFrame)
                loss = loss_fn(torch.log_softmax(output_label, dim=1),
                               labelVariable)
                val_loss_epoch += loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == labelVariable.data).sum()
            val_accuracy = (numCorr.item() / valSamples) * 100
            avg_val_loss = val_loss_epoch / val_iter
            print('Val Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_val_loss))
            print('Val Accuracy after {} epochs = {}%'.format(
                epoch + 1, val_accuracy))
            val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_val_loss))
            val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, val_accuracy))
            if val_accuracy > min_accuracy:
                save_path_model = (model_folder +
                                   '/model_twoStream_state_dict.pth')
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
        #else:
        #    if (epoch + 1) % 10 == 0:
        #        save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth')
        #        torch.save(model.state_dict(), save_path_model)

        optim_scheduler.step()

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
Example #18
0
def main_run(dataset, stage, root_dir, out_dir, stage1_dict, seqLen,
             trainBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize,
             outPool_size, split, evalInterval, regression, rloss, debug):
    if debug:
        n_workers = 0
        n_workers_test = 0
        device = 'cpu'
    else:
        n_workers = 4
        n_workers_test = 2
        device = 'cuda'
    # Train/Validation/Test split
    train_splits = ["S1", "S3", "S4"]
    val_splits = ["S2"]

    test_split = split

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    stage = stage
    #test_split = split
    seqLen = seqLen
    memSize = memSize
    c_cam_classes = outPool_size
    dataset = dataset
    best_acc = 0

    if stage == 1:
        trainBatchSize = trainBatchSize
        testBatchSize = trainBatchSize
        lr1 = lr1
        decay_factor = decay_factor
        decay_step = decay_step
        numEpochs = numEpochs
    elif stage == 2 or stage == 3:
        trainBatchSize = trainBatchSize
        testBatchSize = trainBatchSize
        lr1 = lr1
        decay_factor = decay_factor
        decay_step = decay_step
        numEpochs = numEpochs

    num_classes = 61

    dataset_dir = root_dir

    #model_folder = os.path.join('.', out_dir, dataset, str(test_split))
    model_folder = os.path.join('./', out_dir, 'stage' + str(stage))
    if not os.path.exists(model_folder):
        os.makedirs(model_folder)
    else:
        print('Directory {} exists!'.format(model_folder))
        sys.exit()

    note_fl = open(model_folder + '/note.txt', 'w')
    note_fl.write('Number of Epochs = {}\n'
                  'lr = {}\n'
                  'Train Batch Size = {}\n'
                  'Sequence Length = {}\n'
                  'Decay steps = {}\n'
                  'Decay factor = {}\n'
                  'Memory size = {}\n'
                  'Memory cam classes = {}\n'.format(numEpochs, lr1,
                                                     trainBatchSize, seqLen,
                                                     decay_step, decay_factor,
                                                     memSize, c_cam_classes))

    note_fl.close()

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    train_log_loss_batch = open((model_folder + '/train_log_loss_batch.txt'),
                                'w')
    test_log_loss = open((model_folder + '/test_log_loss.txt'), 'w')
    test_log_acc = open((model_folder + '/test_log_acc.txt'), 'w')

    # Dataloaders
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    print('Preparing dataset...')
    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        # ToTensor(),
        # normalize
    ])
    transform_rgb = Compose([ToTensor(), normalize])
    transform_MS = Compose([Resize((7, 7)), ToTensor()])

    vid_seq_train = makeDataset(dataset_dir,
                                splits=train_splits,
                                spatial_transform=spatial_transform,
                                transform_rgb=transform_rgb,
                                transform_MS=transform_MS,
                                seqLen=seqLen,
                                fmt='.png',
                                regression=regression)

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               num_workers=n_workers,
                                               pin_memory=True)

    vid_seq_test = makeDataset(dataset_dir,
                               splits=val_splits,
                               spatial_transform=Compose(
                                   [Scale(256), CenterCrop(224)]),
                               transform_rgb=transform_rgb,
                               transform_MS=transform_MS,
                               seqLen=seqLen,
                               fmt='.png',
                               regression=regression,
                               verbose=False)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=n_workers,
                                              pin_memory=True)

    print('here')
    print('Number of train samples = {}'.format(vid_seq_train.__len__()))

    print('Number of test samples = {}'.format(vid_seq_test.__len__()))

    train_params = []
    if stage == 1:
        if regression:
            model = attentionModel(num_classes=num_classes,
                                   mem_size=memSize,
                                   n_channels=1)
        else:
            model = attentionModel(num_classes=num_classes, mem_size=memSize)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    elif stage == 2 or stage == 3:
        if regression:
            model = attentionModel(num_classes=num_classes,
                                   mem_size=memSize,
                                   n_channels=1,
                                   c_cam_classes=c_cam_classes)
        else:
            model = attentionModel(num_classes=num_classes,
                                   mem_size=memSize,
                                   c_cam_classes=c_cam_classes)

        #model = attentionModel(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes)
        if stage == 2:
            checkpoint_path = os.path.join(
                stage1_dict, 'last_checkpoint_stage' + str(1) + '.pth.tar')
        elif stage == 3:
            checkpoint_path = os.path.join(
                stage1_dict, 'last_checkpoint_stage' + str(2) + '.pth.tar')
        if os.path.exists(checkpoint_path):
            print('Loading weights from checkpoint file {}'.format(
                checkpoint_path))
        else:
            print('Checkpoint file {} does not exist'.format(checkpoint_path))
            sys.exit()
        last_checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(last_checkpoint['model_state_dict'],
                              strict=False)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False

        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        # Add params from ms_module
        if stage == 2:
            for params in model.ms_module.parameters():
                params.requires_grad = True
                train_params += [params]

    for params in model.lsta_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.classifier.train(True)

    model.ms_module.train(True)
    model.to(device)

    loss_fn = nn.CrossEntropyLoss()

    if regression:
        if rloss == 'MSE':
            # Mean Squared Error loss
            loss_ms_fn = nn.MSELoss()  # it should work
        elif rloss == 'L1':
            # L1 loss
            loss_ms_fn = nn.L1Loss()
        elif rloss == 'SmoothL1':
            # Huber Loss or Smooth L1 Loss
            loss_ms_fn = nn.SmoothL1Loss()
        elif rloss == 'KLdiv':
            # Kullback-Leiber Loss
            loss_ms_fn = nn.KLDivLoss()
    else:
        # classification
        loss_ms_fn = nn.CrossEntropyLoss()  # TODO: check paper Planamente

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=5e-4,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        #optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        # model.classifier.train(True)
        model.lsta_cell.train(True)
        model.classifier.train(True)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
            model.ms_module.train(True)

        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, inputsMS, targets) in enumerate(train_loader):
            # Inputs:
            #   - inputsRGB : the rgb frame input
            # Labels :
            #   - inputsMS  : the motion task label
            #   - targets   : output

            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputs.permute(1, 0, 2, 3, 4).to(device)
            labelVariable = targets.to(device)
            msVariable = inputsMS.to(device)
            trainSamples += inputs.size(0)
            output_label, _, output_ms = model(inputVariable, device)
            loss_c = loss_fn(output_label, labelVariable)
            if stage == 2:
                if regression:
                    msVariable = torch.reshape(
                        msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                    output_ms = torch.sigmoid(output_ms)
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, output_ms.size(0)))
                else:
                    # classification task
                    msVariable = torch.reshape(
                        msVariable,
                        (seqLen * 7 * 7, msVariable.size(0))).long()
                    output_ms = torch.reshape(
                        output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))
                loss_ms = loss_ms_fn(output_ms, msVariable)
                loss = loss_c + loss_ms
            else:
                loss = loss_c
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(device)).sum()
            #print('Training loss after {} iterations = {} '.format(train_iter, loss.data.item()))
            #train_log_loss_batch.write('Training loss after {} iterations = {}\n'.format(train_iter, loss.data.item()))
            #writer.add_scalar('train/iter_loss', loss.data.item(), train_iter)
            epoch_loss += loss.data.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(
            epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(
            epoch + 1, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))

        save_path_model = os.path.join(
            model_folder, 'last_checkpoint_stage' + str(stage) + '.pth.tar')
        save_file = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer_fn.state_dict(),
            'best_acc': best_acc,
        }
        torch.save(save_file, save_path_model)

        if (epoch + 1) % evalInterval == 0:
            #print('Testing...')
            model.train(False)
            test_loss_epoch = 0
            test_iter = 0
            test_samples = 0
            numCorr = 0
            for j, (inputs, inputsMS, targets) in enumerate(test_loader):
                #print('testing inst = {}'.format(j))
                test_iter += 1
                test_samples += inputs.size(0)
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(device)
                labelVariable = targets.to(device)
                msVariable = inputsMS.to(device)

                output_label, _, output_ms = model(inputVariable, device)
                test_loss_c = loss_fn(output_label, labelVariable)
                if stage == 2:
                    if regression:
                        msVariable = torch.reshape(
                            msVariable, (seqLen * 7 * 7, msVariable.size(0)))
                        output_ms = torch.sigmoid(output_ms)
                        output_ms = torch.reshape(
                            output_ms, (seqLen * 7 * 7, output_ms.size(0)))
                    else:
                        # classification task
                        msVariable = torch.reshape(
                            msVariable,
                            (seqLen * 7 * 7, msVariable.size(0))).long()
                        output_ms = torch.reshape(
                            output_ms, (seqLen * 7 * 7, 2, output_ms.size(0)))
                    test_loss_ms = loss_ms_fn(output_ms, msVariable)
                    test_loss = test_loss_c + test_loss_ms
                else:
                    test_loss = test_loss_c
                test_loss_epoch += test_loss.data.item()
                _, predicted = torch.max(output_label.data, 1)
                numCorr += (predicted == targets.to(device)).sum()
            test_accuracy = (numCorr / test_samples) * 100
            avg_test_loss = test_loss_epoch / test_iter
            print('Test Loss after {} epochs, loss = {}'.format(
                epoch + 1, avg_test_loss))
            print('Test Accuracy after {} epochs = {}%'.format(
                epoch + 1, test_accuracy))
            writer.add_scalar('test/epoch_loss', avg_test_loss, epoch + 1)
            writer.add_scalar('test/accuracy', test_accuracy, epoch + 1)
            test_log_loss.write('Test Loss after {} epochs = {}\n'.format(
                epoch + 1, avg_test_loss))
            test_log_acc.write('Test Accuracy after {} epochs = {}%\n'.format(
                epoch + 1, test_accuracy))

            if test_accuracy > best_acc:
                best_acc = test_accuracy
                save_path_model = os.path.join(
                    model_folder,
                    'best_checkpoint_stage' + str(stage) + '.pth.tar')
                save_file = {
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer_fn.state_dict(),
                    'best_acc': best_acc,
                }
                torch.save(save_file, save_path_model)
        optim_scheduler.step()
    train_log_loss.close()
    train_log_acc.close()
    test_log_acc.close()
    train_log_loss_batch.close()
    test_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Example #19
0
def main():
    parser = argparse.ArgumentParser(description="Run model against images")
    parser.add_argument(
        '--input-glob',
        default=
        'data/kinetics_videos/jpg/yoga/0wHOYxjRmlw_000041_000051/image_000{41,42,43,44,45,46,47,48,49,50,41,42,43,44,45,46}.jpg',
        help="inputs")
    parser.add_argument("--depth", default="50", help="which model depth")
    args = parser.parse_args()

    model_file = model_files[args.depth]
    model_depth = int(args.depth)

    model = resnet.generate_model(model_depth=model_depth,
                                  n_classes=700,
                                  n_input_channels=3,
                                  shortcut_type="B",
                                  conv1_t_size=7,
                                  conv1_t_stride=1,
                                  no_max_pool=False,
                                  widen_factor=1.0)

    # model = load_pretrained_model(model, args.model, "resnet", 700)

    checkpoint = torch.load(model_file, map_location='cpu')
    arch = '{}-{}'.format("resnet", model_depth)
    print(arch, checkpoint['arch'])
    assert arch == checkpoint['arch']

    if hasattr(model, 'module'):
        # I think this only for legacy models
        model.module.load_state_dict(checkpoint['state_dict'])
    else:
        model.load_state_dict(checkpoint['state_dict'])

    model.eval()

    image_clips = []
    files = real_glob(args.input_glob)
    files = extend_to_length(files, 16)
    print(files)
    for f in files:
        img = Image.open(f).convert("RGB")
        image_clips.append(img)

    # print("EARLY", image_clips[0][0:4,0:4,0])

    mean = [0.4345, 0.4051, 0.3775]
    std = [0.2768, 0.2713, 0.2737]
    normalize = Normalize(mean, std)

    sample_size = 112

    spatial_transform = [Resize(sample_size)]
    spatial_transform.append(CenterCrop(sample_size))
    spatial_transform.append(ToTensor())
    spatial_transform.extend([ScaleValue(1), normalize])
    spatial_transform = Compose(spatial_transform)

    # c = spatial_transform(image_clips[0])
    # c.save("raw.png")

    model_clips = []
    clip = [spatial_transform(img) for img in image_clips]
    model_clips.append(torch.stack(clip, 0).permute(1, 0, 2, 3))
    model_clips = torch.stack(model_clips, 0)

    print("Final", model_clips.shape)
    print("PEEK", model_clips[0, 0, 0, 0:4, 0:4])

    with torch.no_grad():
        outputs = model(model_clips)
        print(outputs[0][0:10])
        outputs = F.softmax(outputs, dim=1).cpu()

    sorted_scores, locs = torch.topk(outputs[0], k=3)

    print(locs[0])

    video_results = []
    for i in range(sorted_scores.size(0)):
        video_results.append({
            'label': magic_labels_700[locs[i].item()],
            'score': sorted_scores[i].item()
        })

    print(video_results)
Example #20
0
    center = 1

    openpose_transform = {
        'train':
        MultiScaleTorsoRandomCrop(scales, args.img_size),
        'val':
        MultiScaleTorsoRandomCrop(np.linspace(center, center, num=1),
                                  args.img_size,
                                  centercrop=True)
    }

    spatial_transform = {
        'train':
        Compose([
            Scale(args.img_size),
            CenterCrop(args.img_size),
            RandomHorizontalFlip(),
            ColorJitter(brightness=0.1),
            ToTensor(1),
            Normalize(args.mean, args.std)
        ]),
        'val':
        Compose([
            Scale(args.img_size),
            CenterCrop(args.img_size),
            ToTensor(1),
            Normalize(args.mean, args.std)
        ])
    }

    temporal_transform = {
def main_run(dataset, trainDir, valDir, outDir, stackSize, trainBatchSize, valBatchSize, numEpochs, lr1,
             decay_factor, decay_step):


    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    min_accuracy = 0

    model_folder = os.path.join('./', outDir, dataset, 'flow')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        !rm -rf ./experiments
        #sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

    vid_seq_train = makeDataset(trainDir, spatial_transform=spatial_transform, sequence=False,
                                stackSize=stackSize, fmt='.png')

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, sampler=None, num_workers=4, pin_memory=True)
    valInstances=0
    if valDir is not None:

        vid_seq_val = makeDataset(valDir, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   sequence=False, stackSize=stackSize, fmt='.png', phase='Test')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()

    trainInstances = vid_seq_train.__len__()
    print('Number of samples in the dataset: training = {} | validation = {}'.format(trainInstances, valInstances))

    model = flow_resnet34(True, channels=2*stackSize, num_classes=num_classes)
    model.train(True)
    train_params = list(model.parameters())

    model.cuda()

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.SGD(train_params, lr=lr1, momentum=0.9, weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor)

    train_iter = 0

    for epoch in range(numEpochs):
        optim_scheduler.step()
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.cuda())
            labelVariable = Variable(targets.cuda())
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.data[0]
        avg_loss = epoch_loss/iterPerEpoch
        trainAccuracy = (numCorrTrain / trainSamples) * 100
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch+1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch+1, trainAccuracy))
        if valDir is not None:
            if (epoch+1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                for j, (inputs, targets) in enumerate(val_loader):
                    val_iter += 1
                    val_samples += inputs.size(0)
                    inputVariable = Variable(inputs.cuda(), volatile=True)
                    labelVariable = Variable(targets.cuda(async=True), volatile=True)
                    output_label, _ = model(inputVariable)
                    val_loss = loss_fn(output_label, labelVariable)
                    val_loss_epoch += val_loss.data[0]
                    _, predicted = torch.max(output_label.data, 1)
                    numCorr += (predicted == targets.cuda()).sum()
                val_accuracy = (numCorr / val_samples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_flow_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_flow_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.close()
Example #22
0
def main_run(dataset, model_state_dict, dataset_dir, stackSize, numSeg):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=True,
                               numSeg=numSeg,
                               stackSize=stackSize,
                               fmt='.jpg',
                               phase='Test')

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    model = flow_resnet34(False,
                          channels=2 * stackSize,
                          num_classes=num_classes)
    model.load_state_dict(torch.load(model_state_dict))
    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorr = 0
    true_labels = []
    predicted_labels = []

    for j, (inputs, targets) in enumerate(test_loader):
        inputVariable = Variable(inputs[0].cuda(), volatile=True)
        output_label, _ = model(inputVariable)
        output_label_mean = torch.mean(output_label.data, 0, True)
        _, predicted = torch.max(output_label_mean, 1)
        numCorr += (predicted == targets[0]).sum()
        true_labels.append(targets)
        predicted_labels.append(predicted)
    test_accuracy = (numCorr / test_samples) * 100
    print('Test Accuracy  = {}%'.format(test_accuracy))

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = np.linspace(0, 60, num=61)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-flow.jpg', bbox_inches='tight')
    plt.show()
Example #23
0
def main_run(model_state_dict, dataset_dir, seqLen, memSize, out_dir):
    model_folder = os.path.join('./', out_dir, 'attConvLSTMDoubleResnet',
                                str(seqLen))
    #dataset = 'gtea61'
    num_classes = 61

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               seqLen=seqLen,
                               fmt='.png',
                               train=False,
                               spatial_transform=spatial_transform,
                               users=['S2'])

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    model = attentionDoubleResnet(num_classes=num_classes, mem_size=memSize)
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorr = 0
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        #for j, (inputs, targets) in enumerate(test_loader):
        for inputs, inputsSN, targets in test_loader:
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda())
            output_label, _ = model(inputVariable, inputSNVariable)

            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.cuda()).sum()
            true_labels.append(targets)
            predicted_labels.append(predicted.cpu())

    test_accuracy = torch.true_divide(numCorr, test_samples) * 100
    test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy)
    print(test_accuracy)
    fil = open(model_folder + "/test_log_acc.txt", "w")
    fil.write(test_accuracy)
    fil.close()

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = np.linspace(0, 60, num=61)
    plt.figure(1, figsize=(12, 12), dpi=100.0)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    xy = np.arange(start=0, stop=61)
    plt.plot(xy, xy)
    plt.savefig(model_folder + '/cnf_matrix_normalized.png',
                bbox_inches='tight')
    plt.show()
mean = [0.4345, 0.4051, 0.3775]
std = [0.2768, 0.2713, 0.2737]
no_mean_norm = False
no_std_norm = False
sample_size = 112
value_scale = 1
input_type = 'rgb'
sample_t_stride = 1
sample_duration = 16
inference_stride = 16

#normalize = get_normalize_method(mean, std, no_mean_norm, no_std_norm)
normalize = Normalize(mean, std)
spatial_transform = [Resize(sample_size)]
if inference_crop == 'center':
    spatial_transform.append(CenterCrop(sample_size))
if input_type == 'flow':
    spatial_transform.append(PickFirstChannels(n=2))
spatial_transform.append(ToTensor())
spatial_transform.extend([ScaleValue(value_scale), normalize])
spatial_transform = Compose(spatial_transform)

temporal_transform = []
if sample_t_stride > 1:
    temporal_transform.append(TemporalSubsampling(sample_t_stride))
temporal_transform.append(SlidingWindow(sample_duration, inference_stride))
temporal_transform = TemporalCompose(temporal_transform)

# 加载模型
#print('load model begin!')
model = generate_model_resnet(1)  # 生成resnet模型
        img_prefix = ''

    whole_model, parameters = generate_model(args)
    print(whole_model)
    # input('...')

    if args.no_mean_norm and not args.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not args.std_norm:
        norm_method = Normalize(args.mean, [1, 1, 1])
    else:
        norm_method = Normalize(args.mean, args.std)

    spatial_transform = Compose([
        Scale(args.sample_size),
        CenterCrop(args.sample_size),
        ToTensor(args.norm_value), norm_method
    ])
    # if not args.test_temp_crop == 'sparse':
    if args.compared_temp_transform == 'shuffle':
        temp_transform = ShuffleFrames(args.sample_duration)
    else:
        temp_transform = ReverseFrames(args.sample_duration)

    temp_crop_method = TemporalRandomCrop(args.sample_duration)
    # if args.compared_temp_transform == 'reverse':
    # temp_transform = Compose([
    # ReverseFrames(args.sample_duration),
    # temp_crop_method
    # ])
    # elif args.compared_temp_transform == 'shuffle':
Example #26
0
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen,
             memSize):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    testBatchSize = 1
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=False,
                               numSeg=1,
                               stackSize=stackSize,
                               fmt='.png',
                               phase='Test',
                               seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    model = twoStreamAttentionModel(stackSize=5,
                                    memSize=512,
                                    num_classes=num_classes)
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    classes = sorted(
        os.listdir(
            "/content/drive/My Drive/testingGithub/FPAR_project/GTEA61/processed_frames2/train/S1"
        ))[1:]
    print(classes)
    print(len(classes))

    model.train(False)
    model.cuda()

    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0
    predicted_labels = []
    true_labels = []
    with torch.no_grad():
        test_preds = get_all_preds(model, test_loader)
        labels = vid_seq_test.labels
        predictions = test_preds.argmax(dim=1)
        cm = confusion_matrix(labels, predictions)
        plt.figure(figsize=(25, 25))
        plot_confusion_matrix(cm, classes)
Example #27
0
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    if opt.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_data)
    else:
        train_sampler = None
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=opt.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=opt.n_threads,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               worker_init_fn=worker_init_fn)

    if opt.is_master_node:
        train_logger = Logger(opt.result_path / 'train.log',
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            opt.result_path / 'train_batch.log',
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    else:
        train_logger = None
        train_batch_logger = None

    if opt.nesterov:
        dampening = 0
    else:
        dampening = opt.dampening
    optimizer = SGD(model_parameters,
                    lr=opt.learning_rate,
                    momentum=opt.momentum,
                    dampening=dampening,
                    weight_decay=opt.weight_decay,
                    nesterov=opt.nesterov)

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min', patience=opt.plateau_patience)
    else:
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             opt.multistep_milestones)

    return (train_loader, train_sampler, train_logger, train_batch_logger,
            optimizer, scheduler)
Example #28
0
epochs = args.epochs
N_blocks = args.n_blocks
d_model = args.d_model
att_heads = args.att_heads
lr = args.learning_rate
sample_duration = args.sample_duration
sample_size = args.sample_size
num_workers = args.workers
frames_path = args.frames_path
output_path = args.output_path
model_path = args.model_path

mean = [114.7748, 107.7354, 99.4750]

spatial_transform = Compose([Scale(sample_size),
                             CenterCrop(sample_size),
                             ToTensor(),
                             Normalize(mean, [1, 1, 1])])
temporal_transform = LoopPadding(sample_duration)

data_train = Video(os.path.join(frames_path, "train"),
                   "S2T/3D/data/annotations/train.csv",
                   spatial_transform=spatial_transform,
                   temporal_transform=temporal_transform,
                   sample_duration=sample_duration)

trg_vocab = len(data_train.dictionary.idx2word)

train_loader = torch.utils.data.DataLoader(
    data_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
            os.makedirs(os.path.join(opt.save_path, vid))
        cmd = 'cp -r "{}" "{}"'.format(image_path, target_image_path)
        subprocess.call(cmd, shell=True)
        print(cmd)


    # print(vid)



if __name__ == '__main__':

    opt = opts.parse_opts()
    opt.sample_size = 112
    spatial_transform = Compose([Scale(opt.sample_size),
                                 CenterCrop(opt.sample_size)])
    loader = get_default_video_loader()

    base_dir = "/userhome/dataset/MSVD/Video-Description-with-Spatial-Temporal-Attention/youtube-frames/*"
    videos_dir = glob.glob(base_dir)

    opt.save_path = "/userhome/dataset/MSVD/Video-Description-with-Spatial-Temporal-Attention/28frames-msvd/"
    if not os.path.exists(opt.save_path):
        os.makedirs(opt.save_path)

    # for video_path in videos_dir:
    #     Video(video_path)


    pool = ThreadPool(8)  # 创建4个容量的线程池并发执行
    pool.map(Video, videos_dir)  # pool.map同map用法
Example #30
0
def main_run(dataset, flowModel_state_dict, RGBModel_state_dict, dataset_dir,
             stackSize, seqLen, memSize, numSeg):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    flow_wt = 0.5
    testBatchSize = 1
    sequence = True
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               sequence=sequence,
                               numSeg=numSeg,
                               stackSize=stackSize,
                               fmt='.jpg',
                               phase='Test',
                               seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    modelFlow = flow_resnet34(False,
                              channels=2 * stackSize,
                              num_classes=num_classes)
    modelFlow.load_state_dict(torch.load(flowModel_state_dict))
    modelRGBSN = attentionMDoubleResnet(num_classes=num_classes,
                                        mem_size=memSize)
    modelRGBSN.load_state_dict(torch.load(RGBSNModel_state_dict))

    for params in modelFlow.parameters():
        params.requires_grad = False

    for params in modelRGBSN.parameters():
        params.requires_grad = False

    modelFlow.train(False)
    modelRGBSN.train(False)
    modelFlow.cuda()
    modelRGBSN.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0

    true_labels = []
    predicted_labels = []

    for j, (inputFlow, inputFrame, inputSN, targets) in enumerate(test_loader):
        inputVariableFlow = Variable(inputFlow[0].cuda(), volatile=True)
        inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda(),
                                      volatile=True)
        inputSN = Variable(inputSN.permute(1, 0, 2, 3, 4).cuda(),
                           volatile=True)
        output_labelFlow, _ = modelFlow(inputVariableFlow)
        output_labelFrameSN, _ = modelRGBSN(inputVariableFrame,
                                            inputVariableSN)
        output_label_meanFlow = torch.mean(output_labelFlow.data, 0, True)
        output_label_meanTwoStream = (flow_wt * output_label_meanFlow) + (
            (1 - flow_wt) * output_labelFrameSN.data)
        _, predictedTwoStream = torch.max(output_label_meanTwoStream, 1)
        numCorrTwoStream += (predictedTwoStream == targets[0]).sum()
        true_labels.append(targets)
        predicted_labels.append(predictedTwoStream)
    test_accuracyTwoStream = (numCorrTwoStream / test_samples) * 100
    print('Test Accuracy = {}'.format(test_accuracyTwoStream))

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = np.linspace(0, 60, num=61)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-twoStreamDoubleResnet.jpg', bbox_inches='tight')
    plt.show()