Ejemplo n.º 1
0
def train(args, model, enc=False):
    global best_acc

    weight = torch.ones(NUM_CLASSES)
    weight[0] = 121.21
    weight[1] = 947.02
    weight[2] = 151.92
    weight[3] = 428.31
    weight[4] = 25.88
    weight[5] = 235.97
    weight[6] = 885.72
    weight[7] = 911.87
    weight[8] = 307.49
    weight[9] = 204.69
    weight[10] = 813.92
    weight[11] = 5.83
    weight[12] = 34.22
    weight[13] = 453.34
    weight[14] = 346.10
    weight[15] = 250.19
    weight[16] = 119.99
    weight[17] = 75.28
    weight[18] = 76.71
    weight[19] = 8.58
    weight[20] = 281.68
    weight[21] = 924.07
    weight[22] = 3.91
    weight[23] = 7.14
    weight[24] = 88.89
    weight[25] = 59.00
    weight[26] = 126.59
    weight[27] = 0

    assert os.path.exists(
        args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(enc, augment=True, height=args.height)  #1024)
    co_transform_val = MyCoTransform(enc, augment=False,
                                     height=args.height)  #1024)
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train,
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=args.num_workers,
                            batch_size=args.batch_size,
                            shuffle=False)

    if args.cuda:
        #criterion =LovaszLoss2d()
        #criterion = CrossEntropyLoss2d(weight.cuda())
        criterion = FocalLoss2d(weight.cuda())
    else:
        #criterion = LovaszLoss2d()
        #criterion = CrossEntropyLoss2d(weight)
        criterion = FocalLoss2d(weight.cuda())

    print(type(criterion))

    savedir = f'../save/{args.savedir}'

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    if (not os.path.exists(automated_log_path)
        ):  #dont add first line if it exists
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
    optimizer = Adam(model.parameters(),
                     1e-4, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=1e-4)  ## scheduler 2

    start_epoch = 1

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow(
        (1 - ((epoch - 1) / args.num_epochs)), 0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer,
                                      lr_lambda=lambda1)  ## scheduler 2

    time_train_perepoch = []
    for epoch in range(start_epoch, args.num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")
        start_time_perepoch = time.time()

        scheduler.step(epoch)  ## scheduler 2

        epoch_loss = []
        time_train = []

        doIouTrain = args.iouTrain
        doIouVal = args.iouVal

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels) in enumerate(loader):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            #inputs = images
            #targets= labels
            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)
            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            #loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            time_train.append(time.time() - start_time)

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    f'loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / args.batch_size))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        #evalIoU.printConfMatrix(confMatrix, evalIoU.args)

        time_train_perepoch.append(time.time() - start_time_perepoch)
        print("// Time per epoch: %.4f hours" %
              (sum(time_train_perepoch) / len(time_train_perepoch) / 3600.0))

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        if (doIouVal):
            iouEvalVal = iouEval(NUM_CLASSES)

        with torch.no_grad():
            for step, (images, labels) in enumerate(loader_val):
                start_time = time.time()
                if args.cuda:
                    images = images.cuda()
                    labels = labels.cuda()

                #inputs =images
                #targets=labels
                inputs = Variable(
                    images, requires_grad=False
                )  #, volatile=True)    #volatile flag makes it free backward or outputs for eval
                targets = Variable(labels,
                                   requires_grad=False)  #, volatile=True)
                outputs = model(inputs, only_encode=enc)

                loss = criterion(outputs, targets[:, 0])
                epoch_loss_val.append(loss.data[0])
                time_val.append(time.time() - start_time)

                if (doIouVal):
                    iouEvalVal.addBatch(
                        outputs.max(1)[1].unsqueeze(1).data, targets.data)

                if args.steps_loss > 0 and step % args.steps_loss == 0:
                    average = sum(epoch_loss_val) / len(epoch_loss_val)
                    print(
                        f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})',
                        "// Avg time/img: %.4f s" %
                        (sum(time_val) / len(time_val) / args.batch_size))

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

        # Calculate IOU scores on class level from matrix
        iouVal = 0
        iouTrain = 0
        if (doIouVal):
            iouVal, iou_classes, accVal, acc_classes = iouEvalVal.getIoU()

            print("pole    : %.6f" % (iou_classes[0] * 100.0), "%\t")
            print("slight  : %.6f" % (iou_classes[1] * 100.0), "%\t")
            print("bboard  : %.6f" % (iou_classes[2] * 100.0), "%\t")
            print("tlight  : %.6f" % (iou_classes[3] * 100.0), "%\t")
            print("car     : %.6f" % (iou_classes[4] * 100.0), "%\t")
            print("truck   : %.6f" % (iou_classes[5] * 100.0), "%\t")
            print("bicycle : %.6f" % (iou_classes[6] * 100.0), "%\t")
            print("motor   : %.6f" % (iou_classes[7] * 100.0), "%\t")
            print("bus     : %.6f" % (iou_classes[8] * 100.0), "%\t")
            print("tsignf  : %.6f" % (iou_classes[9] * 100.0), "%\t")
            print("tsignb  : %.6f" % (iou_classes[10] * 100.0), "%\t")
            print("road    : %.6f" % (iou_classes[11] * 100.0), "%\t")
            print("sidewalk: %.6f" % (iou_classes[12] * 100.0), "%\t")
            print("curbcut : %.6f" % (iou_classes[13] * 100.0), "%\t")
            print("crosspln: %.6f" % (iou_classes[14] * 100.0), "%\t")
            print("bikelane: %.6f" % (iou_classes[15] * 100.0), "%\t")
            print("curb    : %.6f" % (iou_classes[16] * 100.0), "%\t")
            print("fence   : %.6f" % (iou_classes[17] * 100.0), "%\t")
            print("wall    : %.6f" % (iou_classes[18] * 100.0), "%\t")
            print("building: %.6f" % (iou_classes[19] * 100.0), "%\t")
            print("person  : %.6f" % (iou_classes[20] * 100.0), "%\t")
            print("rider   : %.6f" % (iou_classes[21] * 100.0), "%\t")
            print("sky     : %.6f" % (iou_classes[22] * 100.0), "%\t")
            print("vege    : %.6f" % (iou_classes[23] * 100.0), "%\t")
            print("terrain : %.6f" % (iou_classes[24] * 100.0), "%\t")
            print("markings: %.6f" % (iou_classes[25] * 100.0), "%\t")
            print("crosszeb: %.6f" % (iou_classes[26] * 100.0), "%\t")

            iouStr = getColorEntry(iouVal) + '{:0.2f}'.format(
                iouVal * 100) + '\033[0m'
            print("EPOCH IoU on VAL set: ", iouStr, "%")

            print("pole    : %.6f" % (acc_classes[0] * 100.0), "%\t")
            print("slight  : %.6f" % (acc_classes[1] * 100.0), "%\t")
            print("bboard  : %.6f" % (acc_classes[2] * 100.0), "%\t")
            print("tlight  : %.6f" % (acc_classes[3] * 100.0), "%\t")
            print("car     : %.6f" % (acc_classes[4] * 100.0), "%\t")
            print("truck   : %.6f" % (acc_classes[5] * 100.0), "%\t")
            print("bicycle : %.6f" % (acc_classes[6] * 100.0), "%\t")
            print("motor   : %.6f" % (acc_classes[7] * 100.0), "%\t")
            print("bus     : %.6f" % (acc_classes[8] * 100.0), "%\t")
            print("tsignf  : %.6f" % (acc_classes[9] * 100.0), "%\t")
            print("tsignb  : %.6f" % (acc_classes[10] * 100.0), "%\t")
            print("road    : %.6f" % (acc_classes[11] * 100.0), "%\t")
            print("sidewalk: %.6f" % (acc_classes[12] * 100.0), "%\t")
            print("curbcut : %.6f" % (acc_classes[13] * 100.0), "%\t")
            print("crosspln: %.6f" % (acc_classes[14] * 100.0), "%\t")
            print("bikelane: %.6f" % (acc_classes[15] * 100.0), "%\t")
            print("curb    : %.6f" % (acc_classes[16] * 100.0), "%\t")
            print("fence   : %.6f" % (acc_classes[17] * 100.0), "%\t")
            print("wall    : %.6f" % (acc_classes[18] * 100.0), "%\t")
            print("building: %.6f" % (acc_classes[19] * 100.0), "%\t")
            print("person  : %.6f" % (acc_classes[20] * 100.0), "%\t")
            print("rider   : %.6f" % (acc_classes[21] * 100.0), "%\t")
            print("sky     : %.6f" % (acc_classes[22] * 100.0), "%\t")
            print("vege    : %.6f" % (acc_classes[23] * 100.0), "%\t")
            print("terrain : %.6f" % (acc_classes[24] * 100.0), "%\t")
            print("markings: %.6f" % (acc_classes[25] * 100.0), "%\t")
            print("crosszeb: %.6f" % (acc_classes[26] * 100.0), "%\t")

            accStr = getColorEntry(accVal) + '{:0.2f}'.format(
                accVal * 100) + '\033[0m'
            print("EPOCH ACC on VAL set: ", accStr, "%")

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = average_epoch_loss_val
        else:
            current_acc = iouVal
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
        if (enc and epoch == args.num_epochs):
            best_acc = 0

        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth'
            filenameBest = savedir + '/model_best_enc.pth'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth'
            filenameBest = savedir + '/model_best.pth'
        save_checkpoint({
            'state_dict': model.state_dict(),
        }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best_each.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best_each.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        #if (True) #(is_best):
        torch.save(model.state_dict(), filenamebest)
        print(f'save: {filenamebest} (epoch: {epoch})')
        filenameSuperBest = f'{savedir}/model_superbest.pth'
        if (is_best):
            torch.save(model.state_dict(), filenameSuperBest)
            print(f'saving superbest')
        if (not enc):
            with open(savedir + "/best.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                             (epoch, iouVal))
        else:
            with open(savedir + "/best_encoder.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                             (epoch, iouVal))

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                         (epoch, average_epoch_loss_train,
                          average_epoch_loss_val, iouTrain, iouVal, usedLr))

    return (model)  #return model (convenience for encoder-decoder training)
Ejemplo n.º 2
0
def train(args, model, enc=False):
    global best_acc

    weight = torch.ones(NUM_CLASSES)
    weight[0] = 121.21
    weight[1] = 947.02
    weight[2] = 151.92
    weight[3] = 428.31
    weight[4] = 25.88
    weight[5] = 235.97
    weight[6] = 885.72
    weight[7] = 911.87
    weight[8] = 307.49
    weight[9] = 204.69
    weight[10] = 813.92
    weight[11] = 5.83
    weight[12] = 34.22
    weight[13] = 453.34
    weight[14] = 346.10
    weight[15] = 250.19
    weight[16] = 119.99
    weight[17] = 75.28
    weight[18] = 76.71
    weight[19] = 8.58
    weight[20] = 281.68
    weight[21] = 924.07
    weight[22] = 3.91
    weight[23] = 7.14
    weight[24] = 88.89
    weight[25] = 59.00
    weight[26] = 126.59
    weight[27] = 0

    assert os.path.exists(
        args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(enc, augment=True, height=args.height)  #1024)
    co_transform_val = MyCoTransform(enc, augment=False,
                                     height=args.height)  #1024)
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train,
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=args.num_workers,
                            batch_size=args.batch_size,
                            shuffle=False)

    if args.cuda:
        #criterion = CrossEntropyLoss2d(weight.cuda())
        criterion = FocalLoss2d(weight.cuda())
    else:
        #criterion = CrossEntropyLoss2d(weight)
        criterion = FocalLoss2d(weight.cuda())

    print(type(criterion))

    savedir = f'../save/{args.savedir}'

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    if (not os.path.exists(automated_log_path)
        ):  #dont add first line if it exists
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
    optimizer = Adam(model.parameters(),
                     5e-5, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=2e-4)  ## scheduler 2

    start_epoch = 1

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow(
        (1 - ((epoch - 1) / args.num_epochs)), 0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer,
                                      lr_lambda=lambda1)  ## scheduler 2

    for epoch in range(start_epoch, args.num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step(epoch)  ## scheduler 2

        epoch_loss = []
        time_train = []

        doIouTrain = args.iouTrain
        doIouVal = args.iouVal

        #TODO: remake the evalIoU.py code to avoid using "evalIoU.args"
        confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels) in enumerate(loader):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)
            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            #loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            time_train.append(time.time() - start_time)

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    f'loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / args.batch_size))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        #evalIoU.printConfMatrix(confMatrix, evalIoU.args)

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        #New confusion matrix data
        confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        for step, (images, labels) in enumerate(loader_val):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(
                images, volatile=True
            )  #volatile flag makes it free backward or outputs for eval
            targets = Variable(labels, volatile=True)
            outputs = model(inputs, only_encode=enc)

            loss = criterion(outputs, targets[:, 0])
            epoch_loss_val.append(loss.data[0])
            time_val.append(time.time() - start_time)

            #Add outputs to confusion matrix
            if (doIouVal):
                #compatibility with criterion dataparallel
                if isinstance(outputs, list):  #merge gpu tensors
                    outputs_cpu = outputs[0].cpu()
                    for i in range(1, len(outputs)):
                        outputs_cpu = torch.cat(
                            (outputs_cpu, outputs[i].cpu()), 0)
                    #print(outputs_cpu.size())
                else:
                    outputs_cpu = outputs.cpu()

                #start_time_iou = time.time()
                for i in range(0, outputs_cpu.size(0)):  #args.batch_size
                    prediction = ToPILImage()(
                        outputs_cpu[i].max(0)[1].data.unsqueeze(0).byte())
                    groundtruth = ToPILImage()(labels[i].cpu().byte())
                    nbPixels += evalIoU.evaluatePairPytorch(
                        prediction, groundtruth, confMatrix, perImageStats,
                        evalIoU.args)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
                print(
                    f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_val) / len(time_val) / args.batch_size))

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

        # Calculate IOU scores on class level from matrix
        iouVal = 0
        iouTrain = 0
        if (doIouVal):
            #start_time_iou = time.time()
            classScoreList = {}
            for label in evalIoU.args.evalLabels:
                labelName = evalIoU.trainId2label[label].name
                classScoreList[labelName] = evalIoU.getIouScoreForTrainLabel(
                    label, confMatrix, evalIoU.args)

            iouAvgStr = evalIoU.getColorEntry(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args),
                evalIoU.args) + "{avg:5.3f}".format(
                    avg=evalIoU.getScoreAverage(
                        classScoreList, evalIoU.args)) + evalIoU.args.nocol
            iouVal = float(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args))
            print("EPOCH IoU on VAL set: ", iouAvgStr)

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = average_epoch_loss_val
        else:
            current_acc = iouVal
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
        if (enc and epoch == args.num_epochs):
            best_acc = 0

        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth'
            filenameBest = savedir + '/model_best_enc.pth'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth'
            filenameBest = savedir + '/model_best.pth'
        save_checkpoint({
            'state_dict': model.state_dict(),
        }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best_each.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best_each.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        #if (True) #(is_best):
        torch.save(model.state_dict(), filenamebest)
        print(f'save: {filenamebest} (epoch: {epoch})')
        filenameSuperBest = f'{savedir}/model_superbest.pth'
        if (is_best):
            torch.save(model.state_dict(), filenameSuperBest)
            print(f'saving superbest')
        if (not enc):
            with open(savedir + "/best.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                             (epoch, iouVal))
        else:
            with open(savedir + "/best_encoder.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                             (epoch, iouVal))

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                         (epoch, average_epoch_loss_train,
                          average_epoch_loss_val, iouTrain, iouVal, usedLr))

    return (model)  #return model (convenience for encoder-decoder training)
Ejemplo n.º 3
0
def train(args, model, enc=False):
    global best_acc

    #TODO: calculate weights by processing dataset histogram (now its being set by hand from the torch values)
    #create a loder to run all images and calculate histogram of labels, then create weight array using class balancing

    weight = torch.ones(NUM_CLASSES)
    if (enc):
        weight[0] = 2.3653597831726
        weight[1] = 4.4237880706787
        weight[2] = 2.9691488742828
        weight[3] = 5.3442072868347
        weight[4] = 5.2983593940735
        weight[5] = 5.2275490760803
        weight[6] = 5.4394111633301
        weight[7] = 5.3659925460815
        weight[8] = 3.4170460700989
        weight[9] = 5.2414722442627
        weight[10] = 4.7376127243042
        weight[11] = 5.2286224365234
        weight[12] = 5.455126285553
        weight[13] = 4.3019247055054
        weight[14] = 5.4264230728149
        weight[15] = 5.4331531524658
        weight[16] = 5.433765411377
        weight[17] = 5.4631009101868
        weight[18] = 5.3947434425354
    else:
        weight[0] = 2.8149201869965
        weight[1] = 6.9850029945374
        weight[2] = 3.7890393733978
        weight[3] = 9.9428062438965
        weight[4] = 9.7702074050903
        weight[5] = 9.5110931396484
        weight[6] = 10.311357498169
        weight[7] = 10.026463508606
        weight[8] = 4.6323022842407
        weight[9] = 9.5608062744141
        weight[10] = 7.8698215484619
        weight[11] = 9.5168733596802
        weight[12] = 10.373730659485
        weight[13] = 6.6616044044495
        weight[14] = 10.260489463806
        weight[15] = 10.287888526917
        weight[16] = 10.289801597595
        weight[17] = 10.405355453491
        weight[18] = 10.138095855713

    weight[19] = 0

    #loader = DataLoader(VOC12(args.datadir, input_transform, target_transform),
    #    num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)

    assert os.path.exists(
        args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(enc, augment=True, height=args.height)  #1024)
    co_transform_val = MyCoTransform(enc, augment=False,
                                     height=args.height)  #1024)
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train,
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=args.num_workers,
                            batch_size=args.batch_size,
                            shuffle=False)

    if args.cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
        #criterion = CriterionDataParallel(criterion).cuda()
    else:
        criterion = CrossEntropyLoss2d(weight)

    print(type(criterion))

    savedir = f'../save/{args.savedir}'

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    if (not os.path.exists(automated_log_path)
        ):  #dont add first line if it exists
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    #TODO: reduce memory in first gpu: https://discuss.pytorch.org/t/multi-gpu-training-memory-usage-in-balance/4163/4        #https://github.com/pytorch/pytorch/issues/1893
    """
	#Some optimizer examples:
    optimizer = Adam(model.parameters())    
    if args.model.startswith('FCN'):
        optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5)
    if args.model.startswith('PSP'):
        optimizer = SGD(model.parameters(), 1e-2, .9, 1e-4)
    if args.model.startswith('Seg'):
        optimizer = SGD(model.parameters(), 1e-3, .9)
    if args.model.startswith('E'):
        #optimizer = Adam(model.parameters(), 1e-3, .9)
        optimizer = Adam(model.parameters(), 5e-4, .9, weight_decay=2e-4)
#5e-4 wd: 2e-4
    """
    #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
    optimizer = Adam(model.parameters(),
                     5e-4, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=1e-4)  ## scheduler 2

    start_epoch = 1
    if args.resume:
        #Must load weights, optimizer, epoch and best value.
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'

        assert os.path.exists(
            filenameCheckpoint
        ), "Error: resume option was used but checkpoint was not found in folder"
        checkpoint = torch.load(filenameCheckpoint)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        best_acc = checkpoint['best_acc']
        print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch']))

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow(
        (1 - ((epoch - 1) / args.num_epochs)), 0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer,
                                      lr_lambda=lambda1)  ## scheduler 2

    if args.visualize and args.steps_plot > 0:
        board = Dashboard(args.port)

    for epoch in range(start_epoch, args.num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step(epoch)  ## scheduler 2

        epoch_loss = []
        time_train = []

        doIouTrain = args.iouTrain
        doIouVal = args.iouVal

        #TODO: remake the evalIoU.py code to avoid using "evalIoU.args"
        confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels) in enumerate(loader):

            start_time = time.time()
            #print (labels.size())
            #print (np.unique(labels.numpy()))
            #print("labels: ", np.unique(labels[0].numpy()))
            #labels = torch.ones(4, 1, 512, 1024).long()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)

            #print("targets", np.unique(targets[:, 0].cpu().data.numpy()))

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            time_train.append(time.time() - start_time)

            #print (outputs_cpu.size())
            #Add outputs to confusion matrix    #CODE USING evalIoU.py remade from cityscapes/scripts/evaluation/evalPixelLevelSemanticLabeling.py
            if (doIouTrain):
                #compatibility with criterion dataparallel
                if isinstance(outputs, list):  #merge gpu tensors
                    outputs_cpu = outputs[0].cpu()
                    for i in range(1, len(outputs)):
                        outputs_cpu = torch.cat(
                            (outputs_cpu, outputs[i].cpu()), 0)
                    #print(outputs_cpu.size())
                else:
                    outputs_cpu = outputs.cpu()

                #start_time_iou = time.time()
                for i in range(0, outputs_cpu.size(0)):  #args.batch_size
                    prediction = ToPILImage()(
                        outputs_cpu[i].max(0)[1].data.unsqueeze(0).byte())
                    groundtruth = ToPILImage()(labels[i].cpu().byte())
                    nbPixels += evalIoU.evaluatePairPytorch(
                        prediction, groundtruth, confMatrix, perImageStats,
                        evalIoU.args)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            #print(outputs.size())
            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                #image[0] = image[0] * .229 + .485
                #image[1] = image[1] * .224 + .456
                #image[2] = image[2] * .225 + .406
                #print("output", np.unique(outputs[0].cpu().max(0)[1].data.numpy()))
                board.image(image, f'input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):  #merge gpu tensors
                    board.image(
                        color_transform(
                            outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'output (epoch: {epoch}, step: {step})')
                else:
                    board.image(
                        color_transform(
                            outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                            f'target (epoch: {epoch}, step: {step})')
                print("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    f'loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / args.batch_size))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        #evalIoU.printConfMatrix(confMatrix, evalIoU.args)

        iouTrain = 0
        if (doIouTrain):
            # Calculate IOU scores on class level from matrix
            classScoreList = {}
            for label in evalIoU.args.evalLabels:
                labelName = evalIoU.trainId2label[label].name
                classScoreList[labelName] = evalIoU.getIouScoreForTrainLabel(
                    label, confMatrix, evalIoU.args)
            iouAvgStr = evalIoU.getColorEntry(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args),
                evalIoU.args) + "{avg:5.3f}".format(
                    avg=evalIoU.getScoreAverage(
                        classScoreList, evalIoU.args)) + evalIoU.args.nocol

            iouTrain = float(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args))
            print("EPOCH IoU on TRAIN set: ", iouAvgStr)
            #print("")
            #evalIoU.printClassScoresPytorchTrain(classScoreList, evalIoU.args)
            #print("--------------------------------")
            #print("Score Average : " + iouAvgStr )#+ "    " + niouAvgStr)
            #print("--------------------------------")
            #print("")
            #input ("Press key to continue...")

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        #New confusion matrix data
        confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        for step, (images, labels) in enumerate(loader_val):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(
                images, volatile=True
            )  #volatile flag makes it free backward or outputs for eval
            targets = Variable(labels, volatile=True)
            outputs = model(inputs, only_encode=enc)

            loss = criterion(outputs, targets[:, 0])
            epoch_loss_val.append(loss.data[0])
            time_val.append(time.time() - start_time)

            #Add outputs to confusion matrix
            if (doIouVal):
                #compatibility with criterion dataparallel
                if isinstance(outputs, list):  #merge gpu tensors
                    outputs_cpu = outputs[0].cpu()
                    for i in range(1, len(outputs)):
                        outputs_cpu = torch.cat(
                            (outputs_cpu, outputs[i].cpu()), 0)
                    #print(outputs_cpu.size())
                else:
                    outputs_cpu = outputs.cpu()

                #start_time_iou = time.time()
                for i in range(0, outputs_cpu.size(0)):  #args.batch_size
                    prediction = ToPILImage()(
                        outputs_cpu[i].max(0)[1].data.unsqueeze(0).byte())
                    groundtruth = ToPILImage()(labels[i].cpu().byte())
                    nbPixels += evalIoU.evaluatePairPytorch(
                        prediction, groundtruth, confMatrix, perImageStats,
                        evalIoU.args)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                board.image(image, f'VAL input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):  #merge gpu tensors
                    board.image(
                        color_transform(
                            outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'VAL output (epoch: {epoch}, step: {step})')
                else:
                    board.image(
                        color_transform(
                            outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'VAL output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                            f'VAL target (epoch: {epoch}, step: {step})')
                print("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
                print(
                    f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_val) / len(time_val) / args.batch_size))

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

        # Calculate IOU scores on class level from matrix
        iouVal = 0
        if (doIouVal):
            #start_time_iou = time.time()
            classScoreList = {}
            for label in evalIoU.args.evalLabels:
                labelName = evalIoU.trainId2label[label].name
                classScoreList[labelName] = evalIoU.getIouScoreForTrainLabel(
                    label, confMatrix, evalIoU.args)

            iouAvgStr = evalIoU.getColorEntry(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args),
                evalIoU.args) + "{avg:5.3f}".format(
                    avg=evalIoU.getScoreAverage(
                        classScoreList, evalIoU.args)) + evalIoU.args.nocol
            iouVal = float(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args))
            print("EPOCH IoU on VAL set: ", iouAvgStr)
            #print("")
            #evalIoU.printClassScoresPytorchTrain(classScoreList, evalIoU.args)
            #print("--------------------------------")
            #print("Score Average : " + iouAvgStr )#+ "    " + niouAvgStr)
            #print("--------------------------------")
            #print("")
            #print ("Time to calculate confusion matrix: ", time.time() - start_time_iou)
            #input ("Press key to continue...")

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = average_epoch_loss_val
        else:
            current_acc = iouVal
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
            filenameBest = savedir + '/model_best_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'
            filenameBest = savedir + '/model_best.pth.tar'
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': str(model),
                'state_dict': model.state_dict(),
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        if (is_best):
            torch.save(model.state_dict(), filenamebest)
            print(f'save: {filenamebest} (epoch: {epoch})')
            if (not enc):
                with open(savedir + "/best.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                                 (epoch, iouVal))
            else:
                with open(savedir + "/best_encoder.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                                 (epoch, iouVal))

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                         (epoch, average_epoch_loss_train,
                          average_epoch_loss_val, iouTrain, iouVal, usedLr))

    return (model)  #return model (convenience for encoder-decoder training)
Ejemplo n.º 4
0
def train(args, model, enc):
    global best_acc

    #TODO: calculate weights by processing dataset histogram (now its being set by hand from the torch values)
    #create a loder to run all images and calculate histogram of labels, then create weight array using class balancing

    weight = torch.ones(NUM_CLASSES)
    weight[0] = 1
    weight[1] = 1
    weight[2] = 1
    weight[3] = 1
    weight[4] = 1
    weight[5] = 1
    weight[6] = 1
    weight[7] = 1
    weight[8] = 1
    weight[9] = 1
        
    
    assert os.path.exists(args.datadir), "Error: datadir (dataset directory) could not be loaded"

    #Loading the dataset
    co_transform = MyCoTransform(False, augment=True, height=args.height)#1024)
    co_transform_val = MyCoTransform(False, augment=False, height=args.height)#1024)
    
    
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'test')

    loader = DataLoader(dataset_train, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)
    loader_val = DataLoader(dataset_val, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

    if args.cuda:
        criterion = CrossEntropyLoss2dv2(weight.cuda())
        
    else:
        criterion = CrossEntropyLoss2dv2(weight)

    savedir = '../save/'+args.savedir

    automated_log_path = savedir + "/automated_log.txt"
    modeltxtpath = savedir + "/model.txt"    

    if (not os.path.exists(automated_log_path)):    #dont add first line if it exists 
        with open(automated_log_path, "a") as myfile:
            myfile.write("Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate")

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))
    
    # We use Adam optimizer with lr of 5e-4
    optimizer = Adam([ {'params' : model.parameters()},], 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=1e-4)
    
    start_epoch = 1
    if args.resume:
        #Must load weights, optimizer, epoch and best value. 
        filenameCheckpoint = savedir + '/checkpoint.pth.tar'#'/model_best.pth.tar'

        assert os.path.exists(filenameCheckpoint), "Error: resume option was used but checkpoint was not found in folder"
        checkpoint = torch.load(filenameCheckpoint)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        best_acc = checkpoint['best_acc']
        print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch']))
    
    
    lambda1 = lambda epoch: pow((1-((epoch-1)/args.num_epochs)),0.9)                            ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)                             ## scheduler 2

    cont_train_loss = []
    cont_val_loss = []
    for epoch in range(start_epoch, args.num_epochs+1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step(epoch)    ## scheduler 2

        epoch_loss = []
        time_train = []
     
        doIouTrain = args.iouTrain   
        doIouVal =  args.iouVal      

        #TODO: remake the evalIoU.py code to avoid using "evalIoU.args"
        confMatrix    = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        
        model.train()
    
        for step, (images,oldimages, labels, filename, filenameGt) in enumerate(loader):
            start_time = time.time()
            break
            
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs, road_mask = model(inputs)

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            
            time_train.append(time.time() - start_time)

            if (doIouTrain):
                #compatibility with criterion dataparallel
                if isinstance(outputs, list):   #merge gpu tensors
                    outputs_cpu = outputs[0].cpu()
                    for i in range(1,len(outputs)):
                        outputs_cpu = torch.cat((outputs_cpu, outputs[i].cpu()), 0)
                else:
                    outputs_cpu = outputs.cpu()

                #start_time_iou = time.time()
                for i in range(0, outputs_cpu.size(0)):   #args.batch_size
                    prediction = ToPILImage()(outputs_cpu[i].max(0)[1].data.unsqueeze(0).byte())
                    groundtruth = ToPILImage()(labels[i].cpu().byte())
                    nbPixels += evalIoU.evaluatePairPytorch(prediction, groundtruth, confMatrix, perImageStats, evalIoU.args)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)


        if not args.eval:    
            average_epoch_loss_train = 0#sum(epoch_loss) / len(epoch_loss)
        else :
            average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        #evalIoU.printConfMatrix(confMatrix, evalIoU.args)
        
        iouTrain = 0
        if (doIouTrain ):
            # Calculate IOU scores on class level from matrix
            classScoreList = {}
            for label in evalIoU.args.evalLabels:
                labelName = evalIoU.trainId2label[label].name
                classScoreList[labelName] = evalIoU.getIouScoreForTrainLabel(label, confMatrix, evalIoU.args)
            print(classScoreList)
            iouAvgStr  = evalIoU.getColorEntry(evalIoU.getScoreAverage(classScoreList, evalIoU.args), evalIoU.args) + "{avg:5.3f}".format(avg=evalIoU.getScoreAverage(classScoreList, evalIoU.args)) + evalIoU.args.nocol

            iouTrain = float(evalIoU.getScoreAverage(classScoreList, evalIoU.args))
            print ("EPOCH IoU on TRAIN set: ", iouAvgStr)
            
            evalIoU.printClassScoresPytorchTrain(classScoreList, evalIoU.args)
            print("--------------------------------")
            print("Score Average : " + iouAvgStr )#+ "    " + niouAvgStr)
            print("--------------------------------")
            

        #Validate on val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        #model = pretrained_model
        epoch_loss_val = []
        time_val = []

        #New confusion matrix data
        confMatrix    = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0
        val_ct = 0
        for step, (images, oldimages, labels, filename, filenameGt) in enumerate(loader_val):
            start_time = time.time()
            #break
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()
            
            inputs = Variable(images, volatile=True)    #volatile flag makes it free backward or outputs for eval
            targets = Variable(labels, volatile=True)
            outputs, road_mask = model(inputs) 
            
            loss = criterion(outputs, targets[:, 0])
            epoch_loss_val.append(loss.data[0])
            time_val.append(time.time() - start_time)
            
            #Add outputs to confusion matrix
            if (doIouVal):
                #compatibility with criterion dataparallel
                if isinstance(outputs, list):   #merge gpu tensors
                    outputs_cpu = outputs[0].cpu()
                    for i in range(1,len(outputs)):
                        outputs_cpu = torch.cat((outputs_cpu, outputs[i].cpu()), 0)
                else:
                    outputs_cpu = outputs.cpu()
                    targets_cpu = targets.cpu()
                    
                start_time_iou = time.time()
                for i in range(0, outputs_cpu.size(0)):   #args.batch_size
                    val_ct += 1
                    pred_img = outputs_cpu[i].max(0)[1].data.unsqueeze(0)
                    
                    roadMask = road_mask[i].data.cpu()
                    
                    #print(type(roadMask))
                    pred_img[roadMask == 0] = 255
                    #predictionClr = ToPILImage()(Colorize()(pred_img.byte())) 
                    prediction = ToPILImage()(pred_img.byte())
                    
                    #filenameSave = "./save_color_res/" + str(val_ct).zfill(3)+'.png'
                    #filename_break = str(filename[0]).split('/')

                    #filename_path = '/'.join(filename_break[-3:])

                    #filenameSave = "./save_color_res/" + str(filename_path)
                    
                    #os.makedirs(os.path.dirname(filenameSave), exist_ok=True)

                    #predictionClr.save(filenameSave)
                    
                    groundtruth = ToPILImage()(labels[i].cpu().byte())
                    
                    nbPixels += evalIoU.evaluatePairPytorch(prediction, groundtruth, confMatrix, perImageStats, evalIoU.args)
                print ("Time to add confusion matrix: ", time.time() - start_time_iou)
                       
        
        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        
        print(doIouVal)
        
        # Calculate IOU scores on class level from matrix
        iouVal = 0
        confMatrix= confMatrix[:12,:12]
        
        if (doIouVal):
            #start_time_iou = time.time()
            classScoreList = {}
            for label in evalIoU.args.evalLabels:
                labelName = evalIoU.trainId2label[label].name
                classScoreList[labelName] = evalIoU.getIouScoreForTrainLabel(label, confMatrix, evalIoU.args)
            print(classScoreList)

            iouAvgStr  = evalIoU.getColorEntry(evalIoU.getScoreAverage(classScoreList, evalIoU.args), evalIoU.args) + "{avg:5.3f}".format(avg=evalIoU.getScoreAverage(classScoreList, evalIoU.args)) + evalIoU.args.nocol
            iouVal = float(evalIoU.getScoreAverage(classScoreList, evalIoU.args))
            print ("EPOCH IoU on VAL set: ", iouAvgStr)
            #print("")
            #evalIoU.printClassScoresPytorchTrain(classScoreList, evalIoU.args)
            #print("--------------------------------")
            #print("Score Average : " + iouAvgStr )#+ "    " + niouAvgStr)
            #print("--------------------------------")
            #print("")
            #print ("Time to calculate confusion matrix: ", time.time() - start_time_iou)
            #input ("Press key to continue...")
           

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = average_epoch_loss_val
        else:
            current_acc = iouVal 
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
    
        filenameCheckpoint = savedir + '/checkpoint.pth.tar'
        filenameBest = savedir + '/model_best.pth.tar'
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': str(model),
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        
        filename = savedir+'/model-'+str(epoch)+'}.pth'
        filenamebest = savedir+'/model_best.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print('save: {'+filename+'} (epoch: {'+str(epoch)+'})')
        if (is_best):
            torch.save(model.state_dict(), filenamebest)
            print('save: {'+filenamebest+'} (epoch: {'+str(epoch)+'})')
            
            
            with open(savedir + "/best_encoder.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal))           

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" % (epoch, average_epoch_loss_train, average_epoch_loss_val, iouTrain, iouVal, usedLr ))
    
    return(model)   #return model (convenience for encoder-decoder training)