Beispiel #1
0
def main_run(dataset, flowModel, rgbModel, stackSize, seqLen, memSize, trainDatasetDir, valDatasetDir, outDir,
             trainBatchSize, valBatchSize, lr1, numEpochs, decayRate, stepSize):


    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    # Setting Device
    DEVICE = "cuda"

    model_folder = os.path.join('./', outDir, dataset, 'twoStream')  # Dir for saving models and log files
    # Create the dir
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                ToTensor(), normalize])

    vid_seq_train = makeDataset2Stream(trainDatasetDir,spatial_transform=spatial_transform,
                                sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen)

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)

    if valDatasetDir is not None:

        vid_seq_val = makeDataset2Stream(valDatasetDir,
                                    spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                    sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test',
                                    seqLen=seqLen)

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)
        valSamples = vid_seq_val.__len__()

        

    model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=stackSize, memSize=memSize,
                                        num_classes=num_classes)

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    train_params = []

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.lstm_cell.parameters():
        train_params += [params]
        params.requires_grad = True

    for params in model.frameModel.resNet.layer4[0].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[0].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv1.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[1].conv2.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.frameModel.resNet.layer4[2].conv1.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.layer4[2].conv2.parameters():
        params.requires_grad = True
        train_params += [params]
    #
    for params in model.frameModel.resNet.fc.parameters():
        params.requires_grad = True
        train_params += [params]

    base_params = []
    for params in model.flowModel.layer4.parameters():
        base_params += [params]
        params.requires_grad = True

    model = model.to(DEVICE)

    trainSamples = vid_seq_train.__len__()
    min_accuracy = 0

    loss_fn = nn.CrossEntropyLoss()
    optimizer_fn = torch.optim.SGD([
        {'params': train_params},
        {'params': base_params, 'lr': 1e-4},
    ], lr=lr1, momentum=0.9, weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=stepSize, gamma=decayRate)
    
    train_iter = 0


    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        model.flowModel.layer4.train(True)
        for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariableFlow = inputFlow.to(DEVICE)
            inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
            labelVariable = Variable(targets.cuda())
            output_label = model(inputVariableFlow, inputVariableFrame)
            loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100
        print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss))
        print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy))
        optim_scheduler.step()
        
        if valDatasetDir is not None:
            if (epoch + 1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                numCorr = 0
                # wrapping with torch.no_grad() because it wasn't present
                # check if it makes sense
                with torch.no_grad():
                    for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
                        val_iter += 1
                        inputVariableFlow = inputFlow.to(DEVICE)
                        inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
                        labelVariable = targets.to(DEVICE)
                        output_label = model(inputVariableFlow, inputVariableFrame)
                        loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
                        val_loss_epoch += loss.item()
                        _, predicted = torch.max(output_label.data, 1)
                        numCorr += (predicted == labelVariable.data).sum()
                val_accuracy = torch.true_divide(numCorr, valSamples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss))
                print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_twoStream_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch + 1) % 10 == 0:
                    save_path_model = (model_folder + '/model_twoStream_state_dict_epoch' + str(epoch + 1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)



    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    #writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.flush()
    writer.close()
Beispiel #2
0
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import random
import glob
import sys

from ML_DL_Project.Scripts.spatial_transforms import (Compose, ToTensor,
                                                      CenterCrop, Scale,
                                                      Normalize,
                                                      MultiScaleCornerCrop,
                                                      RandomHorizontalFlip)

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalize = Normalize(mean=mean, std=std)
spatial_transform2 = Compose([Scale((7, 7)), ToTensor()])


def gen_split(root_dir, stackSize):
    DatasetX = []
    DatasetY = []
    DatasetF = []
    Labels = []
    NumFrames = []
    root_dir = os.path.join(root_dir, 'flow_x_processed')
    for dir_user in sorted(os.listdir(root_dir)):
        if not dir_user.startswith('.') and dir_user:
            class_id = 0
            directory = os.path.join(root_dir, dir_user)
            action = sorted(os.listdir(directory))
Beispiel #3
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize,
             valBatchSize, numEpochs, lr1, decayRate, stepSize, memSize, attention):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    # Setting Device
    DEVICE = "cuda"

    if attention==True:
        model_folder = os.path.join('./', out_dir, dataset, 'rgb', 'stage'+str(stage))  # Dir for saving models and log files
    else:
        # DO this if no attention
        # TODO:
        # check if it's correct
        model_folder = os.path.join('./', out_dir, dataset, 'rgb_noAttention', 'stage'+str(stage))  # Dir for saving models and log files

    # Create the dir
    # TODO:
    # see if is necessary other if as in colab
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

    vid_seq_train = makeDataset(train_data_dir,
                                spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png')

    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)
    if val_data_dir is not None:
        vid_seq_val = makeDataset(val_data_dir,spatial_transform = Compose([Scale(256),
                                                                            CenterCrop(224),
                                                                            ToTensor(),
                                                                            normalize]),
                                    seqLen=seqLen, fmt='.png')

        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True)
        valInstances = vid_seq_val.__len__()
        
        trainInstances = vid_seq_train.__len__()


    train_params = []
    if stage == 1:
        
        
        if attention==True:
            model = attentionModel(num_classes=num_classes, mem_size=memSize)
        else:
            # DO this if no attention
            model = clstm_Model(num_classes=num_classes, mem_size=memSize)

        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    else:

        if attention==True:
            model = attentionModel(num_classes=num_classes, mem_size=memSize)
        else:
            # DO this fo no attention, we must address it better
            model = clstm_Model(num_classes=num_classes, mem_size=memSize)


        model.load_state_dict(torch.load(stage1_dict))
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]


    model.lstm_cell.train(True)

    model.classifier.train(True)

    model = model.to(DEVICE)

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=stepSize, gamma=decayRate)


    train_iter = 0
    min_accuracy = 0


    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE))
            labelVariable = Variable(targets.to(DEVICE))
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(DEVICE)).sum()
            # see if loss.item() has to be multiplied by inputs.size(0)
            epoch_loss += loss.item()
        avg_loss = epoch_loss/iterPerEpoch
        # This is deprecated, see if the below "torch.true_divide" is correct
        #trainAccuracy =  (numCorrTrain / trainSamples) * 100
        trainAccuracy =  torch.true_divide(numCorrTrain, trainSamples) * 100
        optim_scheduler.step()

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
        train_log_acc.write('train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))
        if val_data_dir is not None:
            if (epoch+1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                with torch.no_grad():
                    for j, (inputs, targets) in enumerate(val_loader):
                        val_iter += 1
                        val_samples += inputs.size(0)
                        # Deprecated
                        #inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
                        #labelVariable = Variable(targets.cuda(async=True), volatile=True)
                        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                        labelVariable = targets.to(DEVICE)
                        output_label, _ = model(inputVariable)
                        val_loss = loss_fn(output_label, labelVariable)
                        val_loss_epoch += val_loss.item()
                        _, predicted = torch.max(output_label.data, 1)
                        numCorr += (predicted == targets.cuda()).sum()
                # This is deprecated, see if the below "torch.true_divide" is correct
                #val_accuracy = (numCorr / val_samples) * 100
                val_accuracy = torch.true_divide(numCorr, val_samples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder + '/model_rgb_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_rgb_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    #writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.flush()
    writer.close()
Beispiel #4
0
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
      print("dataset not found")
      sys.exit()
    
    DEVICE="cuda"

    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, seqLen=seqLen,
                                stackSize=stackSize, fmt='.png')

    test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1,
                            shuffle=False, num_workers=2, pin_memory=True)
    
    actions =vid_seq_test.__getLabel__()

    model = flow_resnet34(False, channels=1*seqLen, num_classes=num_classes)
    model.load_state_dict(torch.load(model_state_dict))
    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorr = 0
    true_labels = []
    predicted_labels = []
    with torch.no_grad():
    #for inputs,targets in test_loader:
      for j, (inputs, targets) in enumerate(test_loader):
          # levato il "[0]" per l'idt, vedere se ha senso
          #inputVariable = inputs[0].to(DEVICE)
          inputVariable = inputs.to(DEVICE)
          output_label, _ = model(inputVariable)
          output_label_mean = torch.mean(output_label.data, 0, True)
          _, predicted = torch.max(output_label_mean, 1)
          numCorr += (predicted == targets[0]).sum()
          true_labels.append(targets)
          predicted_labels.append(predicted.cpu())
    test_accuracy = (numCorr / test_samples) * 100
    print('Test Accuracy  = {}%'.format(test_accuracy))

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    ticks = [str(action + str(i) ) for i, action in enumerate(actions)]
    plt.figure(figsize=(20,20))
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='Blues')
    plt.colorbar()
    plt.xticks(np.arange(num_classes),labels = set(ticks), fontsize=10, rotation = 90)
    plt.yticks(np.arange(num_classes),labels = set(ticks), fontsize=10)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-idt.jpg', bbox_inches='tight')
    plt.show()
Beispiel #5
0
def main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize, attention):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    DEVICE = "cuda"

    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               seqLen=seqLen, fmt='.png')
    actions =vid_seq_test.__getLabel__()

    test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1,
                            shuffle=False, num_workers=2, pin_memory=True)
    if attention==True:

      model = attentionModel(num_classes=num_classes, mem_size=memSize)
    else:

      model= clstm_Model(num_classes=num_classes, mem_size=memSize)
    
    
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorr = 0
    true_labels = []
    predicted_labels = []
    #Controllare se lasciarla così o togliere il contatore chiave
    with torch.no_grad():
        for j, (inputs, targets) in enumerate(test_loader):
            inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
            output_label, _ = model(inputVariable)
            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.to(DEVICE)).sum()
            true_labels.append(targets)
            #.cpu() because confusion matrix is from scikit-learn
            predicted_labels.append(predicted.cpu())
            
    test_accuracy = (numCorr / test_samples) * 100
    print('Test Accuracy = {}%'.format(test_accuracy))     

    # ebug
    print(true_labels)
    print(predicted_labels)





    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]


    #ticks = np.linspace(0, 60, num=61)
    ticks = [str(action + str(i) ) for i, action in enumerate(actions)]
    plt.figure(figsize=(20,20))
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(np.arange(num_classes),labels = set(ticks), fontsize=10, rotation = 90)
    plt.yticks(np.arange(num_classes),labels = set(ticks), fontsize=10)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-rgb.jpg', bbox_inches='tight')
    plt.show()
def main_run(dataset, model_state_dict, dataset_dir, stackSize, seqLen,
             memSize):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    DEVICE = "cuda"

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)

    testBatchSize = 1
    spatial_transform = Compose(
        [Scale(256), CenterCrop(224),
         ToTensor(), normalize])

    vid_seq_test = makeDataset2Stream(dataset_dir,
                                      spatial_transform=spatial_transform,
                                      sequence=False,
                                      numSeg=1,
                                      stackSize=stackSize,
                                      fmt='.png',
                                      phase='Test',
                                      seqLen=seqLen)

    test_loader = torch.utils.data.DataLoader(vid_seq_test,
                                              batch_size=testBatchSize,
                                              shuffle=False,
                                              num_workers=2,
                                              pin_memory=True)

    actions = vid_seq_test.__getLabel__()

    model = twoStreamAttentionModel(stackSize=5,
                                    memSize=512,
                                    num_classes=num_classes)
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.cuda()

    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorrTwoStream = 0

    predicted_labels = []
    true_labels = []
    with torch.no_grad():
        for j, (inputFlow, inputFrame, targets) in enumerate(test_loader):
            inputVariableFrame = inputFrame.permute(1, 0, 2, 3, 4).to(DEVICE)
            inputVariableFlow = inputFlow.to(DEVICE)
            output_label = model(inputVariableFlow, inputVariableFrame)
            _, predictedTwoStream = torch.max(output_label.data, 1)
            numCorrTwoStream += (
                predictedTwoStream == targets.to(DEVICE)).sum()
            predicted_labels.append(predictedTwoStream.cpu())
            true_labels.append(targets)
    test_accuracyTwoStream = (numCorrTwoStream / test_samples) * 100

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]

    print('Accuracy {:.02f}%'.format(test_accuracyTwoStream))

    ticks = [str(action + str(i)) for i, action in enumerate(actions)]
    plt.figure(figsize=(20, 20))
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='cool')
    plt.colorbar()
    plt.xticks(np.arange(num_classes),
               labels=set(ticks),
               fontsize=6,
               rotation=90)
    plt.yticks(np.arange(num_classes), labels=set(ticks), fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-twoStreamJoint.jpg', bbox_inches='tight')
    plt.show()
Beispiel #7
0
def main_run(dataset, train_data_dir, val_data_dir, out_dir, stackSize,
             trainBatchSize, valBatchSize, numEpochs, lr1, decayRate,
             stepSize):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    # Setting Device
    DEVICE = "cuda"

    model_folder = os.path.join('./', out_dir, dataset, 'flow')
    if os.path.exists(model_folder):
        print('Dir {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])

    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])

    vid_seq_train = makeDatasetFlow(train_data_dir,
                                    spatial_transform=spatial_transform,
                                    sequence=False,
                                    stackSize=stackSize,
                                    fmt='.png')

    train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                               batch_size=trainBatchSize,
                                               shuffle=True,
                                               sampler=None,
                                               num_workers=4,
                                               pin_memory=True)
    if val_data_dir is not None:

        vid_seq_val = makeDatasetFlow(val_data_dir,
                                      spatial_transform=Compose([
                                          Scale(256),
                                          CenterCrop(224),
                                          ToTensor(), normalize
                                      ]),
                                      sequence=False,
                                      stackSize=stackSize,
                                      fmt='.png',
                                      phase='Test')

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)
        valInstances = vid_seq_val.__len__()

    trainInstances = vid_seq_train.__len__()
    print('Number of samples in the dataset: training = {} | validation = {}'.
          format(trainInstances, valInstances))

    model = flow_resnet34(True,
                          channels=2 * stackSize,
                          num_classes=num_classes)
    model.train(True)
    train_params = list(model.parameters())

    model.to(DEVICE)

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.SGD(train_params,
                                   lr=lr1,
                                   momentum=0.9,
                                   weight_decay=5e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn,
                                                           milestones=stepSize,
                                                           gamma=decayRate)

    min_accuracy = 0
    train_iter = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = inputs.to(DEVICE)
            labelVariable = targets.to(DEVICE)
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.cuda()).sum()
            epoch_loss += loss.item()
        avg_loss = epoch_loss / iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        train_log_loss.write('Training loss after {} epoch = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(
            epoch + 1, trainAccuracy))
        optim_scheduler.step()

        if val_data_dir is not None:
            if (epoch + 1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                # wrapping with torch.no_grad() because it wasn't present, see issuea with volatie=True
                # volatile keyword is deprecated, check is it's correct
                with torch.no_grad():
                    for j, (inputs, targets) in enumerate(val_loader):
                        val_iter += 1
                        val_samples += inputs.size(0)
                        inputVariable = inputs.to(DEVICE)
                        #inputVariable = Variable(inputs.cuda(), volatile=True)
                        #labelVariable = Variable(targets.cuda(async=True), volatile=True)
                        #vedere se "non_blockign=True" va bene
                        labelVariable = targets.to(DEVICE, non_blocking=True)
                        output_label, _ = model(inputVariable)
                        val_loss = loss_fn(output_label, labelVariable)
                        val_loss_epoch += val_loss.item()
                        _, predicted = torch.max(output_label.data, 1)
                        numCorr += (predicted == targets.cuda()).sum()
                val_accuracy = torch.true_divide(numCorr, val_samples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Validation: Epoch = {} | Loss = {} | Accuracy = {}'.
                      format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                    epoch + 1, avg_val_loss))
                val_log_acc.write(
                    'Val Accuracy after {} epochs = {}%\n'.format(
                        epoch + 1, val_accuracy))
                if val_accuracy > min_accuracy:
                    save_path_model = (model_folder +
                                       '/model_flow_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch + 1) % 10 == 0:
                    save_path_model = (model_folder +
                                       '/model_flow_state_dict_epoch' +
                                       str(epoch + 1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    #writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.flush()
    writer.close()
Beispiel #8
0
def main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize, regression):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    DEVICE = "cuda"

    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)
    spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])

    vid_seq_test = makeDataset(dataset_dir,
                               spatial_transform=spatial_transform,
                               seqLen=seqLen, fmt='.png')

    test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1,
                            shuffle=False, num_workers=2, pin_memory=True)
    
    
    model = SelfSupAttentionModel(num_classes=num_classes, mem_size=memSize, REGRESSOR=regression)
    
    model.load_state_dict(torch.load(model_state_dict))

    for params in model.parameters():
        params.requires_grad = False

    model.train(False)
    model.to(DEVICE)
    test_samples = vid_seq_test.__len__()
    print('Number of samples = {}'.format(test_samples))
    print('Evaluating...')
    numCorr = 0
    true_labels = []
    predicted_labels = []
   
    
    with torch.no_grad():
      for inputs, inputMmap, targets in test_loader:

        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
        inputMmap = inputMmap.to(DEVICE)
        output_label, _ , mmapPrediction = model(inputVariable)

        if regression==True:
          mmapPrediction = mmapPrediction.view(-1)
          #Regression -> float number for the input motion maps
          inputMmap = torch.reshape(inputMmap, (-1,)).float()                            
        else:
          mmapPrediction = mmapPrediction.view(-1,2)
          inputMmap = torch.reshape(inputMmap, (-1,))
          inputMmap = torch.round(inputMmap).long()

        _, predicted = torch.max(output_label.data, 1)
        numCorr += (predicted == targets.to(DEVICE)).sum()
        true_labels.append(targets)
        #.cpu() because confusion matrix is from scikit-learn
        predicted_labels.append(predicted.cpu())
            
            
    test_accuracy = (numCorr / test_samples) * 100
    print('Test Accuracy = {}%'.format(test_accuracy))     

    # ebug
    print(true_labels)
    print(predicted_labels)

    cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
    cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]


    ticks = np.linspace(0, 60, num=61)
    plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
    plt.colorbar()
    plt.xticks(ticks, fontsize=6)
    plt.yticks(ticks, fontsize=6)
    plt.grid(True)
    plt.clim(0, 1)
    plt.savefig(dataset + '-rgb.jpg', bbox_inches='tight')
    plt.show()
Beispiel #9
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict,
             out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1,
             decayRate, weightDecay, stackSize, stepSize, memSize, alpha,
             regression, pretrainedRgbStage1, rgbStage1Dict, Flow):

    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
        num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()

    # Setting Device
    DEVICE = "cuda"

    # Debug
    #print(regression)

    if regression == True:
        if Flow is True:
            model_folder = os.path.join(
                './', out_dir, dataset, 'regSelfSup_flow',
                'stage' + str(stage))  # Dir for saving models and log files
        else:
            model_folder = os.path.join(
                './', out_dir, dataset, 'regSelfSup',
                'stage' + str(stage))  # Dir for saving models and log files
    else:
        # DO this if no attention
        # TODO:
        # check if it's correct
        if Flow is True:
            model_folder = os.path.join(
                './', out_dir, dataset, 'selfSup_flow',
                'stage' + str(stage))  # Dir for saving models and log files
        else:
            model_folder = os.path.join(
                './', out_dir, dataset, 'selfSup',
                'stage' + str(stage))  # Dir for saving models and log files

    # Create the dir
    # TODO:
    # see if is necessary other if as in colab
    if os.path.exists(model_folder):
        print('Directory {} exists!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)

    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

    # IMPORTANT: IF FLOW IS TRUE, DROP LAST BATCH FROM BOTH DATA LOADERS
    # Data loader
    normalize = Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])
    spatial_transform = Compose([
        Scale(256),
        RandomHorizontalFlip(),
        MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
        ToTensor(), normalize
    ])
    if Flow is True:
        vid_seq_train = makeDataset2Stream(train_data_dir,
                                           spatial_transform=spatial_transform,
                                           stackSize=stackSize,
                                           seqLen=seqLen,
                                           fmt='.png',
                                           selfSup=True)

        train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                                   batch_size=trainBatchSize,
                                                   shuffle=True,
                                                   num_workers=4,
                                                   pin_memory=True,
                                                   drop_last=True)

        if val_data_dir is not None:
            vid_seq_val = makeDataset2Stream(val_data_dir,
                                             spatial_transform=Compose([
                                                 Scale(256),
                                                 CenterCrop(224),
                                                 ToTensor(), normalize
                                             ]),
                                             seqLen=seqLen,
                                             stackSize=stackSize,
                                             fmt='.png',
                                             selfSup=True)

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True,
                                                 drop_last=True)
        valInstances = vid_seq_val.__len__()

        trainInstances = vid_seq_train.__len__()

    else:
        vid_seq_train = makeDataset(train_data_dir,
                                    spatial_transform=spatial_transform,
                                    stackSize=stackSize,
                                    seqLen=seqLen,
                                    fmt='.png')

        train_loader = torch.utils.data.DataLoader(vid_seq_train,
                                                   batch_size=trainBatchSize,
                                                   shuffle=True,
                                                   num_workers=4,
                                                   pin_memory=True)

        if val_data_dir is not None:
            vid_seq_val = makeDataset(val_data_dir,
                                      spatial_transform=Compose([
                                          Scale(256),
                                          CenterCrop(224),
                                          ToTensor(), normalize
                                      ]),
                                      seqLen=seqLen,
                                      stackSize=stackSize,
                                      fmt='.png')

        val_loader = torch.utils.data.DataLoader(vid_seq_val,
                                                 batch_size=valBatchSize,
                                                 shuffle=False,
                                                 num_workers=2,
                                                 pin_memory=True)
        valInstances = vid_seq_val.__len__()

        trainInstances = vid_seq_train.__len__()

    train_params = []
    if stage == 1:

        model = SelfSupAttentionModel(num_classes=num_classes,
                                      mem_size=memSize,
                                      REGRESSOR=regression,
                                      Flow=Flow)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False

    else:

        if pretrainedRgbStage1 == True:
            # Pretrain from rgb with attention stage 1
            modelRgbStage1 = attentionModel(num_classes=num_classes,
                                            mem_size=memSize)
            modelRgbStage1.load_state_dict(torch.load(rgbStage1Dict))

            model = SelfSupAttentionModel(num_classes=num_classes,
                                          mem_size=memSize,
                                          REGRESSOR=regression,
                                          Flow=Flow)

            model.classifier = modelRgbStage1.classifier
            model.lstm_cell = modelRgbStage1.lstm_cell

        else:
            # Pretrain with stage1 from self supervised
            model = SelfSupAttentionModel(num_classes=num_classes,
                                          mem_size=memSize,
                                          REGRESSOR=regression,
                                          FLow=Flow)
            model.load_state_dict(torch.load(stage1_dict))

        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
        #
        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]
        #
        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

        model.resNet.layer4[0].conv1.train(True)
        model.resNet.layer4[0].conv2.train(True)
        model.resNet.layer4[1].conv1.train(True)
        model.resNet.layer4[1].conv2.train(True)
        model.resNet.layer4[2].conv1.train(True)
        model.resNet.layer4[2].conv2.train(True)
        model.resNet.fc.train(True)

    for params in model.lstm_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.lstm_cell.train(True)

    model.classifier.train(True)

    model = model.to(DEVICE)

    loss_fn = nn.CrossEntropyLoss()
    #TODO: address this to make a loss also for regression with a flag
    # Loss of the motion segmentation self supervised task,
    # it is different whether there is regression or not
    if regression == True:
        lossMS = nn.MSELoss()
        # Debug
        #print("lossMS is mse")
    else:
        lossMS = nn.CrossEntropyLoss()
        # Debug
        #print("lossMS is crossEntropy")

    optimizer_fn = torch.optim.Adam(train_params,
                                    lr=lr1,
                                    weight_decay=weightDecay,
                                    eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn,
                                                           milestones=stepSize,
                                                           gamma=decayRate)

    # Debug
    #print(model)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        mmap_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.lstm_cell.train(True)
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1)
        if stage == 2:
            model.resNet.layer4[0].conv1.train(True)
            model.resNet.layer4[0].conv2.train(True)
            model.resNet.layer4[1].conv1.train(True)
            model.resNet.layer4[1].conv2.train(True)
            model.resNet.layer4[2].conv1.train(True)
            model.resNet.layer4[2].conv2.train(True)
            model.resNet.fc.train(True)
        # Change for cycle
        #for i, (inputs, targets) in enumerate(train_loader):
        for inputs, inputMmap, targets in train_loader:
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            # Add  inpuMmap to device
            if Flow is True:
                inputMmap = torch.reshape(inputMmap, (32, 14, 1, 7, 7))
            inputMmap = inputMmap.to(DEVICE)

            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE))
            labelVariable = Variable(targets.to(DEVICE))
            trainSamples += inputs.size(0)

            output_label, _, mmapPrediction = model(inputVariable)

            if regression == True:
                # Things to do when regression is selected
                mmapPrediction = mmapPrediction.view(-1)
                #Regression -> float number for the input motion maps
                inputMmap = torch.reshape(inputMmap, (-1, )).float()
            else:
                # Things to do when regression isn't selected
                mmapPrediction = mmapPrediction.view(-1, 2)
                inputMmap = inputMmap = torch.reshape(inputMmap, (-1, ))
                inputMmap = torch.round(
                    inputMmap).long()  #making things black and white again

            # Weighting the loss of the seflSup task by multiplying it by alpha
            loss2 = alpha * lossMS(mmapPrediction, inputMmap)
            loss = loss_fn(output_label, labelVariable)

            total_loss = loss + loss2
            total_loss.backward()

            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(DEVICE)).sum()
            # see if loss.item() has to be multiplied by inputs.size(0)
            mmap_loss += loss2.item()
            epoch_loss += loss.item()

        optim_scheduler.step()
        avg_loss = epoch_loss / iterPerEpoch
        avg_mmap_loss = mmap_loss / iterPerEpoch
        # This is deprecated, see if the below "torch.true_divide" is correct
        #trainAccuracy =  (numCorrTrain / trainSamples) * 100
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100

        # Vedere se bisogna cambiare il print per la mappa
        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(
            epoch + 1, avg_loss, trainAccuracy))
        print('Mmap loss after {} epoch = {}% '.format(epoch + 1,
                                                       avg_mmap_loss))

        writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
        writer.add_scalar('mmap_train_loss', avg_mmap_loss, epoch + 1)
        train_log_loss.write('Train Loss after {} epochs = {}\n'.format(
            epoch + 1, avg_loss))
        train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(
            epoch + 1, trainAccuracy))
        train_log_loss.write('Train mmap loss after {} epoch= {}'.format(
            epoch + 1, avg_mmap_loss))
        if val_data_dir is not None:
            if (epoch + 1) % 1 == 0:
                model.train(False)
                val_loss_epoch = 0
                val_mmap_loss = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0

                with torch.no_grad():
                    for inputs, inputMmap, targets in val_loader:
                        val_iter += 1
                        val_samples += inputs.size(0)
                        # Deprecated
                        #inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
                        #labelVariable = Variable(targets.cuda(async=True), volatile=True)
                        inputVariable = inputs.permute(1, 0, 2, 3,
                                                       4).to(DEVICE)
                        labelVariable = targets.to(DEVICE)
                        if Flow is True:
                            inputMmap = torch.reshape(inputMmap,
                                                      (64, 14, 1, 7, 7))
                        inputMmap = inputMmap.to(DEVICE)
                        output_label, _, mmapPrediction = model(inputVariable)

                        if regression == True:
                            mmapPrediction = mmapPrediction.view(-1)
                            #Regression -> float number for the input motion maps
                            inputMmap = torch.reshape(inputMmap,
                                                      (-1, )).float()
                        else:
                            mmapPrediction = mmapPrediction.view(-1, 2)
                            inputMmap = torch.reshape(inputMmap, (-1, ))
                            inputMmap = torch.round(inputMmap).long()

                        val_loss2 = alpha * lossMS(mmapPrediction, inputMmap)

                        val_loss = loss_fn(output_label, labelVariable)
                        val_loss_epoch += val_loss.item()
                        val_mmap_loss += val_loss2.item()

                        _, predicted = torch.max(output_label.data, 1)
                        numCorr += (predicted == targets.cuda()).sum()
                # This is deprecated, see if the below "torch.true_divide" is correct
                #val_accuracy = (numCorr / val_samples) * 100
                val_accuracy = torch.true_divide(numCorr, val_samples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                avg_mmap_val_loss = val_mmap_loss / val_iter

                print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(
                    epoch + 1, avg_val_loss, val_accuracy))
                # Vedere se cambiare questo print
                print('Val MMap Loss after {} epochs, loss = {}'.format(
                    epoch + 1, avg_mmap_val_loss))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                writer.add_scalar('val mmap/epoch_loss', avg_mmap_val_loss,
                                  epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(
                    epoch + 1, avg_val_loss))
                val_log_acc.write(
                    'Val Accuracy after {} epochs = {}%\n'.format(
                        epoch + 1, val_accuracy))
                val_log_loss.write(
                    'Val MMap Loss after {} epochs = {}\n'.format(
                        epoch + 1, avg_mmap_val_loss))

                if val_accuracy > min_accuracy:
                    # TODO: see if we can build 2 different dicts
                    # 1 for selSup and 1 for regSelfSup
                    save_path_model = (model_folder +
                                       '/model_selfSup_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch + 1) % 10 == 0:
                    # TODO: see if we can build 2 different dicts
                    # 1 for selSup and 1 for regSelfSup
                    # PAY ATTENTION THAT THIS IS NOT EQUAL TO THE PREVIOUS ONE
                    save_path_model = (model_folder +
                                       '/model_selfSup_state_dict_epoch' +
                                       str(epoch + 1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)

    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    #writer.export_scalars_to_json(model_folder + "/all_scalars.json")
    writer.flush()
    writer.close()
Beispiel #10
0
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize,
             valBatchSize, numEpochs, lr1, decayRate, stepSize, memSize, outPool_size, evalInterval):


    if dataset == 'gtea61':
        num_classes = 61
    elif dataset == 'gtea71':
      num_classes = 71
    elif dataset == 'gtea_gaze':
        num_classes = 44
    elif dataset == 'egtea':
        num_classes = 106
    else:
        print('Dataset not found')
        sys.exit()
    
    DEVICE = "cuda"
    c_cam_classes = outPool_size

    model_folder = os.path.join('./', out_dir, dataset, 'rgb_lsta', 'stage'+str(stage))

    if os.path.exists(model_folder):
        print('Directory {} exitst!'.format(model_folder))
        sys.exit()
    os.makedirs(model_folder)


    # Log files
    writer = SummaryWriter(model_folder)
    train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
    train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
    val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
    val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


    # Data loader

    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]

    normalize = Normalize(mean=mean, std=std)


    spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                                 ToTensor(), normalize])

    vid_seq_train = makeDataset(train_data_dir,
                                spatial_transform=spatial_transform, fmt='.png', seqLen=seqLen)

    print('Number of train samples = {}'.format(vid_seq_train.__len__()))
    train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True)


    if val_data_dir is not None:
        vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([Scale(256),
                                                                            CenterCrop(224),
                                                                            ToTensor(),
                                                                            normalize]),
                                fmt='.png', seqLen=seqLen)

        print('Number of test samples = {}'.format(vid_seq_val.__len__()))
        val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True)


    train_params = []
    if stage == 1:
        model = attentionModelLSTA(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes)
        model.train(False)
        for params in model.parameters():
            params.requires_grad = False
    elif stage == 2:
        model = attentionModelLSTA(num_classes=num_classes, mem_size=memSize, c_cam_classes=c_cam_classes)
        
        model.load_state_dict(torch.load(stage1_dict))
        model.train(False)

        for params in model.parameters():
            params.requires_grad = False

        for params in model.resNet.layer4[0].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[0].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[1].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv1.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.layer4[2].conv2.parameters():
            params.requires_grad = True
            train_params += [params]

        for params in model.resNet.fc.parameters():
            params.requires_grad = True
            train_params += [params]

    for params in model.lsta_cell.parameters():
        params.requires_grad = True
        train_params += [params]

    for params in model.classifier.parameters():
        params.requires_grad = True
        train_params += [params]

    model.classifier.train(True)
    model = model.to(DEVICE)

    loss_fn = nn.CrossEntropyLoss()

    optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=5e-4, eps=1e-4)

    optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=stepSize, gamma=decayRate)

    train_iter = 0
    min_accuracy = 0

    for epoch in range(numEpochs):
        epoch_loss = 0
        numCorrTrain = 0
        trainSamples = 0
        iterPerEpoch = 0
        model.classifier.train(True)
        writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
        for i, (inputs, targets) in enumerate(train_loader):
            train_iter += 1
            iterPerEpoch += 1
            optimizer_fn.zero_grad()
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).to(DEVICE))
            labelVariable = Variable(targets.to(DEVICE))
            trainSamples += inputs.size(0)
            output_label, _ = model(inputVariable)
            loss = loss_fn(output_label, labelVariable)
            loss.backward()
            optimizer_fn.step()
            _, predicted = torch.max(output_label.data, 1)
            numCorrTrain += (predicted == targets.to(DEVICE)).sum()
            epoch_loss += loss.item()
        avg_loss = epoch_loss/iterPerEpoch
        trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100
        optim_scheduler.step()

        print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
        writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
        writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
        train_log_loss.write('train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
        train_log_acc.write('train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))

        if val_data_dir is not None:
            if (epoch+1) % evalInterval == 0:
                model.train(False)
                val_loss_epoch = 0
                val_iter = 0
                val_samples = 0
                numCorr = 0
                with torch.no_grad():
                    for j, (inputs, targets) in enumerate(val_loader):
                        val_iter += 1
                        val_samples += inputs.size(0)
                        inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                        labelVariable = targets.to(DEVICE)
                        output_label, _ = model(inputVariable)
                        val_loss = loss_fn(output_label, labelVariable)
                        val_loss_epoch += val_loss.item()
                        _, predicted = torch.max(output_label.data, 1)
                        numCorr += (predicted == targets.cuda()).sum()

                val_accuracy = torch.true_divide(numCorr, val_samples) * 100
                avg_val_loss = val_loss_epoch / val_iter
                print('Val: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
                writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
                writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
                val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
                val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))

                if val_accuracy > min_accuracy:
                    min_accuracy = val_accuracy
                    save_path_model = (model_folder + '/model_rgb_lsta_state_dict.pth')
                    torch.save(model.state_dict(), save_path_model)
                    min_accuracy = val_accuracy
            else:
                if (epoch+1) % 10 == 0:
                    save_path_model = (model_folder + '/model_rgb_lsta_state_dict_epoch' + str(epoch+1) + '.pth')
                    torch.save(model.state_dict(), save_path_model)



    train_log_loss.close()
    train_log_acc.close()
    val_log_acc.close()
    val_log_loss.close()
    writer.flush()
    writer.close()