Ejemplo n.º 1
0
def train(opt, model, use_cuda):
    model.train()
    loader = DataLoader(dt_ex(opt.root, input_transform, target_transform, 512),
                        num_workers=opt.workers,
                        batch_size=opt.batch,
                        pin_memory=True,
                        shuffle=True)
    weight = torch.ones(2)
    weight[0] = 0

    if use_cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
    else:
        criterion = CrossEntropyLoss2d(weight)

    criterion = CrossEntropyLoss2d().cuda()

    optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5)

    if opt.steps_plot > 0:
        board = Dashboard(opt.port)

    for epoch in range(opt.epochs+1):
        epoch_loss = []
        for step, (images, labels) in enumerate(loader):
            if use_cuda:
                print('use cuda!!!!!!')
                images = images.cuda()
                labels = labels.cuda()
            inputs = Variable(images)
            targets = Variable(labels)

            outputs = model(inputs)

            optimizer.zero_grad()

            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])

            if opt.steps_plot > 0 and step % opt.steps_plot == 0:
                image = inputs[0].cpu().data
                image[0] = image[0] * .229 + .485
                image[1] = image[1] * .224 + .456
                image[2] = image[2] * .225 + .406
                board.image(image,
                            f'input (epoch: {epoch}, step: {step})')
                board.image(color_transform(outputs[0].cpu().max(0)[1].data),
                            f'output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                            f'target (epoch: {epoch}, step: {step})')
            if opt.steps_loss > 0 and step % opt.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(f'loss: {average} (epoch: {epoch}, step: {step})')
            if opt.steps_save > 0 and step % opt.steps_save == 0:
                filename = f'fcn8-{epoch:03}-{step:04}.pth'
                torch.save(model.state_dict(), filename)
                print(f'save: {filename} (epoch: {epoch}, step: {step})')
Ejemplo n.º 2
0
def train():
    model = SegNet(NUM_CLASSES)
    #model = FCN16(NUM_CLASSES)

    if args.cuda:
        model = model.cuda()

    weight = torch.ones(22)
    weight[21] = 0

    loader = DataLoader(VOC12(args.datadir, input_transform, target_transform),
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)

    if args.cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
    else:
        criterion = CrossEntropyLoss2d(weight)

    #optimizer = Adam(model.parameters())
    optimizer = SGD(model.parameters(),
                    lr=args.lr,
                    momentum=args.momentum,
                    weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [10, 20, 30, 40],
                                               gamma=0.1)

    for epoch in range(0, args.epochs + 1):
        epoch_loss = []
        scheduler.step(epoch)
        for step, (images, labels) in enumerate(loader):
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs)

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(f'loss: {average} (epoch: {epoch}, step: {step})')

        if epoch % 2 == 0:
            save_filename = "{}/model_{}.pth".format(args.save_folder, epoch)
            torch.save(model.state_dict(), save_filename)
Ejemplo n.º 3
0
def train():
    model = SegNetMultiScale(3, NUM_CLASSES)

    if args.cuda:
        model = model.cuda()

    weight = torch.ones(12)

    loader = DataLoader(ChejianDataSet(args.datadir, input_transform, target_transform),
                        num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)

    if args.cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
    else:
        criterion = CrossEntropyLoss2d(weight)

    #optimizer = Adam(model.parameters())
    optimizer = SGD(model.parameters(), lr =args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [5, 10, 15, 20], gamma=0.1)

    for epoch in range(0, args.epochs+1):
        epoch_loss = []
        scheduler.step(epoch)
        for step, (images, labels) in enumerate(loader):
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs)

            optimizer.zero_grad()
            targets = targets.squeeze(1)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            epoch_loss.append(loss.data[0])

            # predict
            predict = outputs.data.max(1)
            predict = torch.unsqueeze(predict[1],dim=1)
            acc = (predict == labels).float().mean().cpu().item()

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(f'loss: {average} (epoch: {epoch}, step: {step}, acc: {acc})')

        if epoch % 2 == 0:
            save_filename = "../{}/model_{}.pth".format(args.save_folder,epoch)
            torch.save(model.state_dict(), save_filename)
Ejemplo n.º 4
0
def train(args, model):
    model.train()

    loader = DataLoader(MA(
        '/Users/zhangweidong03/Code/dl/pytorch/github/piwise/MAdata',
        input_transform, target_transform),
                        num_workers=1,
                        batch_size=1,
                        shuffle=True)

    weight = torch.ones(2)
    weight[0] = 0
    use_cuda = False
    if use_cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
    else:
        criterion = CrossEntropyLoss2d(weight)

    criterion = CrossEntropyLoss2d()

    optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5)

    for epoch in range(1, 51):
        epoch_loss = []

        for step, (images, labels) in enumerate(loader):
            if use_cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs)

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])

            average = sum(epoch_loss) / len(epoch_loss)
            print(f'loss: {average} (epoch: {epoch}, step: {step})')
Ejemplo n.º 5
0
def train(args, model):

    loader = DataLoader(Cityscapes(args.datadir, input_transform, target_transform, scale=0),\
                        num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)
    model.train()
    weight = torch.ones(20)
    weight[19] = 0
    criterion = CrossEntropyLoss2d(weight.cuda())
    model.load_state_dict(torch.load('model-001-0000.pth'), strict=True)
    lr_start = 0.05
    for epoch in range(0, args.num_epochs):
        lr_init = lr_start * ((args.num_epochs - epoch) / args.num_epochs)
        optimizer = SGD(model.parameters(),
                        lr=lr_init,
                        momentum=0.9,
                        weight_decay=0.0005)
        epoch_loss = []
        for step, (images, label) in enumerate(loader):
            inputs = Variable(images).cuda()
            targets = Variable(label).cuda()
            output = model(inputs)

            def loss_back(loss, optimizer, args, step):
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                epoch_loss.append(loss.data)
                average1 = sum(epoch_loss) / len(epoch_loss)
                print(f'step:{step},loss:{average1}')
                if (epoch + 1) % 1 == 0:
                    if args.steps_save > 0 and step % args.steps_save == 0:
                        filename = f'{args.model}-{epoch:03}-{step:04}.pth'
                        torch.save(model.state_dict(), filename)

            loss = criterion(output, targets)
            loss_back(loss, optimizer, args, step)
Ejemplo n.º 6
0
def train(args, model):
    model.train()

    # loader = DataLoader(MA('/Users/zhangweidong03/Code/dl/pytorch/github/piwise/MAdata', input_transform, target_transform),
    #                     num_workers=1, batch_size=1, shuffle=True)

    loader = DataLoader(dt_ma(args.datadir, input_transform, target_transform),
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)

    weight = torch.ones(2)
    weight[0] = 0

    use_cuda = False
    if args.cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
    else:
        criterion = CrossEntropyLoss2d(weight)

    criterion = CrossEntropyLoss2d()

    # optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5)

    optimizer = Adam(model.parameters())
    if args.model.startswith('FCN'):
        optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5)
    if args.model.startswith('PSP'):
        optimizer = SGD(model.parameters(), 1e-2, .9, 1e-4)
    if args.model.startswith('Seg'):
        optimizer = SGD(model.parameters(), 1e-3, .9)

    if args.steps_plot > 0:
        board = Dashboard(args.port)

    for epoch in range(1, args.num_epochs + 1):
        epoch_loss = []

        for step, (images, labels) in enumerate(loader):
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs)

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            if args.steps_plot > 0 and step % args.steps_plot == 0:
                image = inputs[0].cpu().data
                image[0] = image[0] * .229 + .485
                image[1] = image[1] * .224 + .456
                image[2] = image[2] * .225 + .406
                board.image(image, f'input (epoch: {epoch}, step: {step})')
                board.image(color_transform(outputs[0].cpu().max(0)[1].data),
                            f'output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                            f'target (epoch: {epoch}, step: {step})')
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(f'loss: {average} (epoch: {epoch}, step: {step})')
            if args.steps_save > 0 and step % args.steps_save == 0:
                filename = f'{args.model}-{epoch:03}-{step:04}.pth'
                torch.save(model.state_dict(), filename)
                print(f'save: {filename} (epoch: {epoch}, step: {step})')
Ejemplo n.º 7
0
def train(args, model):
    model.train()

    weight = torch.ones(NUM_CLASSES)
    weight[0] = 0.1

    loader = DataLoader(VOCTrain(args.datadir, 'train', input_transform,
                                 target_transform),
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)

    if args.cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
    else:
        criterion = CrossEntropyLoss2d(weight)

    optimizer = Adam(model.parameters(), lr=1e-5)
    # if args.model.startswith('FCN'):
    #     optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5)
    # if args.model.startswith('PSP'):
    #     optimizer = SGD(model.parameters(), 1e-2, .9, 1e-4)
    # if args.model.startswith('Seg'):
    #     optimizer = SGD(model.parameters(), 1e-3, .9)

    if args.steps_plot > 0:
        board = Dashboard(args.port)

    for epoch in range(1, args.num_epochs + 1):
        epoch_loss = []

        for step, (images, labels) in enumerate(loader):
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs)

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.item())
            if args.steps_plot > 0 and step % args.steps_plot == 0:
                image = inputs[0].cpu().data
                image[0] = image[0] * .5 + .5
                image[1] = image[1] * .5 + .5
                image[2] = image[2] * .5 + .5
                board.image(image, f'input (epoch: {epoch}, step: {step})')
                board.image(
                    color_transform(outputs[0].cpu().max(
                        0, keepdim=True)[1].data),
                    f'output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                            f'target (epoch: {epoch}, step: {step})')
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(f'loss: {average} (epoch: {epoch}, step: {step})')
            if args.steps_save > 0 and step % args.steps_save == 0:
                filename = f'{args.model}-{epoch:03}-{step:04}.pth'
                torch.save(model.state_dict(), filename)
                print(f'save: {filename} (epoch: {epoch}, step: {step})')
Ejemplo n.º 8
0
def train(args, model, enc=False):
    global best_acc

    #TODO: calculate weights by processing dataset histogram (now its being set by hand from the torch values)
    #create a loder to run all images and calculate histogram of labels, then create weight array using class balancing

    weight = torch.ones(NUM_CLASSES)
    if (enc):
        weight[0] = 2.3653597831726
        weight[1] = 4.4237880706787
        weight[2] = 2.9691488742828
        weight[3] = 5.3442072868347
        weight[4] = 5.2983593940735
        weight[5] = 5.2275490760803
        weight[6] = 5.4394111633301
        weight[7] = 5.3659925460815
        weight[8] = 3.4170460700989
        weight[9] = 5.2414722442627
        weight[10] = 4.7376127243042
        weight[11] = 5.2286224365234
        weight[12] = 5.455126285553
        weight[13] = 4.3019247055054
        weight[14] = 5.4264230728149
        weight[15] = 5.4331531524658
        weight[16] = 5.433765411377
        weight[17] = 5.4631009101868
        weight[18] = 5.3947434425354
    else:
        weight[0] = 2.8149201869965
        weight[1] = 6.9850029945374
        weight[2] = 3.7890393733978
        weight[3] = 9.9428062438965
        weight[4] = 9.7702074050903
        weight[5] = 9.5110931396484
        weight[6] = 10.311357498169
        weight[7] = 10.026463508606
        weight[8] = 4.6323022842407
        weight[9] = 9.5608062744141
        weight[10] = 7.8698215484619
        weight[11] = 9.5168733596802
        weight[12] = 10.373730659485
        weight[13] = 6.6616044044495
        weight[14] = 10.260489463806
        weight[15] = 10.287888526917
        weight[16] = 10.289801597595
        weight[17] = 10.405355453491
        weight[18] = 10.138095855713

    weight[19] = 0

    #loader = DataLoader(VOC12(args.datadir, input_transform, target_transform),
    #    num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)

    assert os.path.exists(
        args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(enc, augment=True, height=args.height)  #1024)
    co_transform_val = MyCoTransform(enc, augment=False,
                                     height=args.height)  #1024)
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train,
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=args.num_workers,
                            batch_size=args.batch_size,
                            shuffle=False)

    if args.cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
        #criterion = CriterionDataParallel(criterion).cuda()
    else:
        criterion = CrossEntropyLoss2d(weight)

    print(type(criterion))

    savedir = f'../save/{args.savedir}'

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    if (not os.path.exists(automated_log_path)
        ):  #dont add first line if it exists
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    #TODO: reduce memory in first gpu: https://discuss.pytorch.org/t/multi-gpu-training-memory-usage-in-balance/4163/4        #https://github.com/pytorch/pytorch/issues/1893
    """
	#Some optimizer examples:
    optimizer = Adam(model.parameters())    
    if args.model.startswith('FCN'):
        optimizer = SGD(model.parameters(), 1e-4, .9, 2e-5)
    if args.model.startswith('PSP'):
        optimizer = SGD(model.parameters(), 1e-2, .9, 1e-4)
    if args.model.startswith('Seg'):
        optimizer = SGD(model.parameters(), 1e-3, .9)
    if args.model.startswith('E'):
        #optimizer = Adam(model.parameters(), 1e-3, .9)
        optimizer = Adam(model.parameters(), 5e-4, .9, weight_decay=2e-4)
#5e-4 wd: 2e-4
    """
    #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
    optimizer = Adam(model.parameters(),
                     5e-4, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=1e-4)  ## scheduler 2

    start_epoch = 1
    if args.resume:
        #Must load weights, optimizer, epoch and best value.
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'

        assert os.path.exists(
            filenameCheckpoint
        ), "Error: resume option was used but checkpoint was not found in folder"
        checkpoint = torch.load(filenameCheckpoint)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        best_acc = checkpoint['best_acc']
        print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch']))

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow(
        (1 - ((epoch - 1) / args.num_epochs)), 0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer,
                                      lr_lambda=lambda1)  ## scheduler 2

    if args.visualize and args.steps_plot > 0:
        board = Dashboard(args.port)

    for epoch in range(start_epoch, args.num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step(epoch)  ## scheduler 2

        epoch_loss = []
        time_train = []

        doIouTrain = args.iouTrain
        doIouVal = args.iouVal

        #TODO: remake the evalIoU.py code to avoid using "evalIoU.args"
        confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels) in enumerate(loader):

            start_time = time.time()
            #print (labels.size())
            #print (np.unique(labels.numpy()))
            #print("labels: ", np.unique(labels[0].numpy()))
            #labels = torch.ones(4, 1, 512, 1024).long()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)

            #print("targets", np.unique(targets[:, 0].cpu().data.numpy()))

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            time_train.append(time.time() - start_time)

            #print (outputs_cpu.size())
            #Add outputs to confusion matrix    #CODE USING evalIoU.py remade from cityscapes/scripts/evaluation/evalPixelLevelSemanticLabeling.py
            if (doIouTrain):
                #compatibility with criterion dataparallel
                if isinstance(outputs, list):  #merge gpu tensors
                    outputs_cpu = outputs[0].cpu()
                    for i in range(1, len(outputs)):
                        outputs_cpu = torch.cat(
                            (outputs_cpu, outputs[i].cpu()), 0)
                    #print(outputs_cpu.size())
                else:
                    outputs_cpu = outputs.cpu()

                #start_time_iou = time.time()
                for i in range(0, outputs_cpu.size(0)):  #args.batch_size
                    prediction = ToPILImage()(
                        outputs_cpu[i].max(0)[1].data.unsqueeze(0).byte())
                    groundtruth = ToPILImage()(labels[i].cpu().byte())
                    nbPixels += evalIoU.evaluatePairPytorch(
                        prediction, groundtruth, confMatrix, perImageStats,
                        evalIoU.args)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            #print(outputs.size())
            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                #image[0] = image[0] * .229 + .485
                #image[1] = image[1] * .224 + .456
                #image[2] = image[2] * .225 + .406
                #print("output", np.unique(outputs[0].cpu().max(0)[1].data.numpy()))
                board.image(image, f'input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):  #merge gpu tensors
                    board.image(
                        color_transform(
                            outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'output (epoch: {epoch}, step: {step})')
                else:
                    board.image(
                        color_transform(
                            outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                            f'target (epoch: {epoch}, step: {step})')
                print("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    f'loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / args.batch_size))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        #evalIoU.printConfMatrix(confMatrix, evalIoU.args)

        iouTrain = 0
        if (doIouTrain):
            # Calculate IOU scores on class level from matrix
            classScoreList = {}
            for label in evalIoU.args.evalLabels:
                labelName = evalIoU.trainId2label[label].name
                classScoreList[labelName] = evalIoU.getIouScoreForTrainLabel(
                    label, confMatrix, evalIoU.args)
            iouAvgStr = evalIoU.getColorEntry(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args),
                evalIoU.args) + "{avg:5.3f}".format(
                    avg=evalIoU.getScoreAverage(
                        classScoreList, evalIoU.args)) + evalIoU.args.nocol

            iouTrain = float(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args))
            print("EPOCH IoU on TRAIN set: ", iouAvgStr)
            #print("")
            #evalIoU.printClassScoresPytorchTrain(classScoreList, evalIoU.args)
            #print("--------------------------------")
            #print("Score Average : " + iouAvgStr )#+ "    " + niouAvgStr)
            #print("--------------------------------")
            #print("")
            #input ("Press key to continue...")

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        #New confusion matrix data
        confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        for step, (images, labels) in enumerate(loader_val):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(
                images, volatile=True
            )  #volatile flag makes it free backward or outputs for eval
            targets = Variable(labels, volatile=True)
            outputs = model(inputs, only_encode=enc)

            loss = criterion(outputs, targets[:, 0])
            epoch_loss_val.append(loss.data[0])
            time_val.append(time.time() - start_time)

            #Add outputs to confusion matrix
            if (doIouVal):
                #compatibility with criterion dataparallel
                if isinstance(outputs, list):  #merge gpu tensors
                    outputs_cpu = outputs[0].cpu()
                    for i in range(1, len(outputs)):
                        outputs_cpu = torch.cat(
                            (outputs_cpu, outputs[i].cpu()), 0)
                    #print(outputs_cpu.size())
                else:
                    outputs_cpu = outputs.cpu()

                #start_time_iou = time.time()
                for i in range(0, outputs_cpu.size(0)):  #args.batch_size
                    prediction = ToPILImage()(
                        outputs_cpu[i].max(0)[1].data.unsqueeze(0).byte())
                    groundtruth = ToPILImage()(labels[i].cpu().byte())
                    nbPixels += evalIoU.evaluatePairPytorch(
                        prediction, groundtruth, confMatrix, perImageStats,
                        evalIoU.args)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                board.image(image, f'VAL input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):  #merge gpu tensors
                    board.image(
                        color_transform(
                            outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'VAL output (epoch: {epoch}, step: {step})')
                else:
                    board.image(
                        color_transform(
                            outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'VAL output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                            f'VAL target (epoch: {epoch}, step: {step})')
                print("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
                print(
                    f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_val) / len(time_val) / args.batch_size))

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

        # Calculate IOU scores on class level from matrix
        iouVal = 0
        if (doIouVal):
            #start_time_iou = time.time()
            classScoreList = {}
            for label in evalIoU.args.evalLabels:
                labelName = evalIoU.trainId2label[label].name
                classScoreList[labelName] = evalIoU.getIouScoreForTrainLabel(
                    label, confMatrix, evalIoU.args)

            iouAvgStr = evalIoU.getColorEntry(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args),
                evalIoU.args) + "{avg:5.3f}".format(
                    avg=evalIoU.getScoreAverage(
                        classScoreList, evalIoU.args)) + evalIoU.args.nocol
            iouVal = float(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args))
            print("EPOCH IoU on VAL set: ", iouAvgStr)
            #print("")
            #evalIoU.printClassScoresPytorchTrain(classScoreList, evalIoU.args)
            #print("--------------------------------")
            #print("Score Average : " + iouAvgStr )#+ "    " + niouAvgStr)
            #print("--------------------------------")
            #print("")
            #print ("Time to calculate confusion matrix: ", time.time() - start_time_iou)
            #input ("Press key to continue...")

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = average_epoch_loss_val
        else:
            current_acc = iouVal
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
            filenameBest = savedir + '/model_best_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'
            filenameBest = savedir + '/model_best.pth.tar'
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': str(model),
                'state_dict': model.state_dict(),
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        if (is_best):
            torch.save(model.state_dict(), filenamebest)
            print(f'save: {filenamebest} (epoch: {epoch})')
            if (not enc):
                with open(savedir + "/best.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                                 (epoch, iouVal))
            else:
                with open(savedir + "/best_encoder.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                                 (epoch, iouVal))

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                         (epoch, average_epoch_loss_train,
                          average_epoch_loss_val, iouTrain, iouVal, usedLr))

    return (model)  #return model (convenience for encoder-decoder training)
Ejemplo n.º 9
0
def train(args, model):

    model.train()   
    weight = torch.ones(22)
    weight[0] = 0
   
        

    if args.cuda:
        criterion = CrossEntropyLoss2d(weight.cuda())
    else:
        criterion = CrossEntropyLoss2d(weight)  

    model.load_state_dict(torch.load(args.model_para),strict=True)
   
   
    total_step=0
    for epoch in range(0, args.num_epochs):

    
        loader = DataLoader(VOC12(args.datadir, input_transform, target_transform),
        num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)
 
        lr=args.learning_rate*((args.num_epochs-epoch)/args.num_epochs)

        epoch_loss = []
        optimizer=SGD(model.parameters(),lr=lr,momentum=0.9,weight_decay=args.weight_decay)


        for step, (images,labels,path) in enumerate(loader):           
      
            inputs = Variable(images).cuda()
            targets = Variable(labels).cuda()
            
            outputs= model(inputs) 
      
            def loss_back(loss,optimizer,args):  
                optimizer.zero_grad() 
                
                

                loss.backward()         
                optimizer.step()     
                epoch_loss.append(loss.data[0])              
                if args.steps_loss > 0 and step % args.steps_loss == 0:
                    average = sum(epoch_loss) / len(epoch_loss)
                    print(f'loss: {average} (epoch: {epoch}, step: {step})')  
                    
                if (epoch+1)%1 ==0:
                    if args.steps_save > 0 and step % args.steps_save == 0:
                       filename = f'{args.model}-{epoch:03}-{step:04}.pth'
                       torch.save(model.state_dict(), filename)                    
                       print(f'save: {filename} (epoch: {epoch}, step: {step})')
     
                
           
            loss = criterion(outputs,targets.squeeze())                                    
            loss_back(loss,optimizer,args)
            total_step +=1
            if args.steps_loss > 0 and step % args.steps_loss == 0:   
                print('-----------------------------------------')