Example #1
0
def validate(val_loader, net, criterion, optim, curr_epoch, writer):
    """
    Runs the validation loop after each training epoch
    val_loader: Data loader for validation
    net: thet network
    criterion: loss fn
    optimizer: optimizer
    curr_epoch: current epoch
    writer: tensorboard writer
    return: val_avg for step function if required
    """

    net.eval()
    val_loss = AverageMeter()
    iou_acc = 0
    dump_images = []

    for val_idx, data in enumerate(val_loader):
        inputs, gt_image, img_names = data
        assert len(inputs.size()) == 4 and len(gt_image.size()) == 3
        assert inputs.size()[2:] == gt_image.size()[1:]

        batch_pixel_size = inputs.size(0) * inputs.size(2) * inputs.size(3)
        inputs, gt_cuda = inputs.cuda(), gt_image.cuda()

        with torch.no_grad():
            output = net(inputs)  # output = (1, 19, 713, 713)

        assert output.size()[2:] == gt_image.size()[1:]
        assert output.size()[1] == args.dataset_cls.num_classes

        val_loss.update(criterion(output, gt_cuda).item(), batch_pixel_size)
        predictions = output.data.max(1)[1].cpu()

        # Logging
        if val_idx % 20 == 0:
            if args.local_rank == 0:
                logging.info("validating: %d / %d", val_idx + 1, len(val_loader))
        if val_idx > 10 and args.test_mode:
            break

        # Image Dumps
        if val_idx < 10:
            dump_images.append([gt_image, predictions, img_names])

        iou_acc += fast_hist(predictions.numpy().flatten(), gt_image.numpy().flatten(),
                             args.dataset_cls.num_classes)
        del output, val_idx, data

    if args.apex:
        iou_acc_tensor = torch.cuda.FloatTensor(iou_acc)
        torch.distributed.all_reduce(iou_acc_tensor, op=torch.distributed.ReduceOp.SUM)
        iou_acc = iou_acc_tensor.cpu().numpy()

    if args.local_rank == 0:
        evaluate_eval(args, net, optim, val_loss, iou_acc, dump_images,
                      writer, curr_epoch, args.dataset_cls)

    return val_loss.avg
Example #2
0
def validate(val_loader, net, criterion, optim, scheduler, curr_epoch, curr_iter):
    """
    Runs the validation loop after each training epoch
    val_loader: Data loader for validation
    net: thet network
    criterion: loss fn
    optimizer: optimizer
    curr_epoch: current epoch
    return: val_avg for step function if required
    """

    net.eval()
    val_loss = AverageMeter()
    iou_acc = 0
    error_acc = 0

    for val_idx, data in enumerate(val_loader):
        inputs, gts = data = data
        assert len(inputs.size()) == 4 and len(gts.size()) == 3
        assert inputs.size()[2:] == gts.size()[1:]

        batch_pixel_size = inputs.size(0) * inputs.size(2) * inputs.size(3)
        inputs, gts = inputs.cuda(), gts.cuda()

        with torch.no_grad():
            output = net(inputs)
        del inputs
        assert output.size()[2:] == gts.size()[1:]
        assert output.size()[1] == args.num_classes
        val_loss.update(criterion(output, gts).item(), batch_pixel_size)

        predictions = output.data.max(1)[1].cpu()
        # Logging
        if val_idx % 20 == 0:
            logging.info("validating: %d / %d", val_idx + 1, len(val_loader))
        iou_acc += fast_hist(predictions.numpy().flatten(), gts.cpu().numpy().flatten(),
                             args.num_classes)
        del gts, output, val_idx, data

    per_cls_iou = evaluate_eval(args, net, optim, scheduler, val_loss, iou_acc, curr_epoch, args.dataset, curr_iter)
    return val_loss.avg, per_cls_iou
Example #3
0
def validate(val_loader, dataset, net, criterion, optim, scheduler, curr_epoch, writer, curr_iter, save_pth=True):
    """
    Runs the validation loop after each training epoch
    val_loader: Data loader for validation
    dataset: dataset name (str)
    net: thet network
    criterion: loss fn
    optimizer: optimizer
    curr_epoch: current epoch
    writer: tensorboard writer
    return: val_avg for step function if required
    """

    net.eval()
    val_loss = AverageMeter()
    iou_acc = 0
    error_acc = 0
    dump_images = []

    for val_idx, data in enumerate(val_loader):
        # input        = torch.Size([1, 3, 713, 713])
        # gt_image           = torch.Size([1, 713, 713])
        inputs, gt_image, img_names, _ = data

        if len(inputs.shape) == 5:
            B, D, C, H, W = inputs.shape
            inputs = inputs.view(-1, C, H, W)
            gt_image = gt_image.view(-1, 1, H, W)

        assert len(inputs.size()) == 4 and len(gt_image.size()) == 3
        assert inputs.size()[2:] == gt_image.size()[1:]

        batch_pixel_size = inputs.size(0) * inputs.size(2) * inputs.size(3)
        inputs, gt_cuda = inputs.cuda(), gt_image.cuda()

        with torch.no_grad():
            if args.use_wtloss:
                output, f_cor_arr = net(inputs, visualize=True)
            else:
                output = net(inputs)

        del inputs

        assert output.size()[2:] == gt_image.size()[1:]
        assert output.size()[1] == datasets.num_classes

        val_loss.update(criterion(output, gt_cuda).item(), batch_pixel_size)

        del gt_cuda

        # Collect data from different GPU to a single GPU since
        # encoding.parallel.criterionparallel function calculates distributed loss
        # functions
        predictions = output.data.max(1)[1].cpu()

        # Logging
        if val_idx % 20 == 0:
            if args.local_rank == 0:
                logging.info("validating: %d / %d", val_idx + 1, len(val_loader))
        if val_idx > 10 and args.test_mode:
            break

        # Image Dumps
        if val_idx < 10:
            dump_images.append([gt_image, predictions, img_names])

        iou_acc += fast_hist(predictions.numpy().flatten(), gt_image.numpy().flatten(),
                             datasets.num_classes)
        del output, val_idx, data

    iou_acc_tensor = torch.cuda.FloatTensor(iou_acc)
    torch.distributed.all_reduce(iou_acc_tensor, op=torch.distributed.ReduceOp.SUM)
    iou_acc = iou_acc_tensor.cpu().numpy()

    if args.local_rank == 0:
        evaluate_eval(args, net, optim, scheduler, val_loss, iou_acc, dump_images,
                    writer, curr_epoch, dataset, None, curr_iter, save_pth=save_pth)

    return val_loss.avg
Example #4
0
def validate(val_loader, net, criterion, optimizer, curr_epoch, writer):
    '''
    Runs the validation loop after each training epoch
    val_loader: Data loader for validation
    net: thet network
    criterion: loss fn
    optimizer: optimizer
    curr_epoch: current epoch 
    writer: tensorboard writer
    return: 
    '''
    net.eval()
    val_loss = AverageMeter()
    mf_score = AverageMeter()
    IOU_acc = 0
    dump_images = []
    heatmap_images = []
    for vi, data in enumerate(val_loader):
        input, mask, edge, img_names = data
        assert len(input.size()) == 4 and len(mask.size()) == 3
        assert input.size()[2:] == mask.size()[1:]
        h, w = mask.size()[1:]

        batch_pixel_size = input.size(0) * input.size(2) * input.size(3)
        input, mask_cuda, edge_cuda = input.cuda(), mask.cuda(), edge.cuda()

        with torch.no_grad():
            seg_out, edge_out = net(input)  # output = (1, 19, 713, 713)

        if args.joint_edgeseg_loss:
            loss_dict = criterion((seg_out, edge_out), (mask_cuda, edge_cuda))
            val_loss.update(sum(loss_dict.values()).item(), batch_pixel_size)
        else:
            val_loss.update(
                criterion(seg_out, mask_cuda).item(), batch_pixel_size)

        # Collect data from different GPU to a single GPU since
        # encoding.parallel.criterionparallel function calculates distributed loss
        # functions

        seg_predictions = seg_out.data.max(1)[1].cpu()
        edge_predictions = edge_out.max(1)[0].cpu()

        #Logging
        if vi % 20 == 0:
            if args.local_rank == 0:
                logging.info('validating: %d / %d' % (vi + 1, len(val_loader)))
        if vi > 10 and args.test_mode:
            break
        _edge = edge.max(1)[0]

        #Image Dumps
        if vi < 10:
            dump_images.append([mask, seg_predictions, img_names])
            heatmap_images.append([_edge, edge_predictions, img_names])

        IOU_acc += fast_hist(seg_predictions.numpy().flatten(),
                             mask.numpy().flatten(),
                             args.dataset_cls.num_classes)

        del seg_out, edge_out, vi, data

    if args.local_rank == 0:
        evaluate_eval(args, net, optimizer, val_loss, mf_score, IOU_acc,
                      dump_images, heatmap_images, writer, curr_epoch,
                      args.dataset_cls)

    return val_loss.avg
Example #5
0
def main():
    """
    Main Function
    """
    # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer
    assert_and_infer_cfg(args)
    writer = prep_experiment(args, parser)

    train_loader, val_loaders, train_obj, extra_val_loaders, covstat_val_loaders = datasets.setup_loaders(
        args)

    criterion, criterion_val = loss.get_loss(args)
    criterion_aux = loss.get_loss_aux(args)
    net = network.get_net(args, criterion, criterion_aux)

    optim, scheduler = optimizer.get_optimizer(args, net)

    net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net)
    net = network.warp_network_in_dataparallel(net, args.local_rank)
    epoch = 0
    i = 0

    if args.snapshot:
        epoch, mean_iu = optimizer.load_weights(net, optim, scheduler,
                                                args.snapshot,
                                                args.restore_optimizer)
        if args.restore_optimizer is True:
            iter_per_epoch = len(train_loader)
            i = iter_per_epoch * epoch
        else:
            epoch = 0

    print("#### iteration", i)
    torch.cuda.empty_cache()
    # Main Loop
    # for epoch in range(args.start_epoch, args.max_epoch):

    while i < args.max_iter:
        # Update EPOCH CTR
        cfg.immutable(False)
        cfg.ITER = i
        cfg.immutable(True)

        i = train(train_loader, net, optim, epoch, writer, scheduler,
                  args.max_iter)
        train_loader.sampler.set_epoch(epoch + 1)

        if (args.dynamic and args.use_isw and epoch % (args.cov_stat_epoch + 1) == args.cov_stat_epoch) \
           or (args.dynamic is False and args.use_isw and epoch == args.cov_stat_epoch):
            net.module.reset_mask_matrix()
            for trial in range(args.trials):
                for dataset, val_loader in covstat_val_loaders.items(
                ):  # For get the statistics of covariance
                    validate_for_cov_stat(val_loader,
                                          dataset,
                                          net,
                                          criterion_val,
                                          optim,
                                          scheduler,
                                          epoch,
                                          writer,
                                          i,
                                          save_pth=False)
                    net.module.set_mask_matrix()

        if args.local_rank == 0:
            print("Saving pth file...")
            evaluate_eval(args,
                          net,
                          optim,
                          scheduler,
                          None,
                          None, [],
                          writer,
                          epoch,
                          "None",
                          None,
                          i,
                          save_pth=True)

        if args.class_uniform_pct:
            if epoch >= args.max_cu_epoch:
                train_obj.build_epoch(cut=True)
                train_loader.sampler.set_num_samples()
            else:
                train_obj.build_epoch()

        epoch += 1

    # Validation after epochs
    if len(val_loaders) == 1:
        # Run validation only one time - To save models
        for dataset, val_loader in val_loaders.items():
            validate(val_loader, dataset, net, criterion_val, optim, scheduler,
                     epoch, writer, i)
    else:
        if args.local_rank == 0:
            print("Saving pth file...")
            evaluate_eval(args,
                          net,
                          optim,
                          scheduler,
                          None,
                          None, [],
                          writer,
                          epoch,
                          "None",
                          None,
                          i,
                          save_pth=True)

    for dataset, val_loader in extra_val_loaders.items():
        print("Extra validating... This won't save pth file")
        validate(val_loader,
                 dataset,
                 net,
                 criterion_val,
                 optim,
                 scheduler,
                 epoch,
                 writer,
                 i,
                 save_pth=False)
def validate(val_loader, net, criterion1, criterion2, optim, curr_epoch,
             writer):
    """
    Runs the validation loop after each training epoch
    val_loader: Data loader for validation
    net: thet network
    criterion: loss fn
    optimizer: optimizer
    curr_epoch: current epoch
    writer: tensorboard writer
    return: val_avg for step function if required
    """

    net.eval()
    val_loss1 = AverageMeter()
    val_loss2 = AverageMeter()
    iou_acc1 = 0
    iou_acc2 = 0
    dump_images = []

    for val_idx, data in enumerate(val_loader):
        inputs1, gt_image1, img_names1, inputs2, gt_image2, img_names2 = data
        assert len(inputs1.size()) == 4 and len(gt_image1.size()) == 3
        assert inputs1.size()[2:] == gt_image1.size()[1:]
        assert len(inputs2.size()) == 4 and len(gt_image2.size()) == 3
        assert inputs2.size()[2:] == gt_image2.size()[1:]

        batch_pixel_size1 = inputs1.size(0) * inputs1.size(2) * inputs1.size(3)
        batch_pixel_size2 = inputs2.size(0) * inputs2.size(2) * inputs2.size(3)
        inputs1, gt_cuda1 = inputs1.cuda(), gt_image1.cuda()
        inputs2, gt_cuda2 = inputs2.cuda(), gt_image2.cuda()

        with torch.no_grad():
            output1 = net(inputs1,
                          task='semantic')  # output = (1, 19, 713, 713)
            output2 = net(inputs2,
                          task='traversability')  # output = (1, 19, 713, 713)

        assert output1.size()[2:] == gt_image1.size()[1:]
        assert output1.size()[1] == args.dataset_cls.num_classes1
        assert output2.size()[2:] == gt_image2.size()[1:]
        assert output2.size()[1] == args.dataset_cls.num_classes2

        val_loss1.update(
            criterion1(output1, gt_cuda1).item(), batch_pixel_size1)
        val_loss2.update(
            criterion2(output2, gt_cuda2).item(), batch_pixel_size2)
        predictions1 = output1.data.max(1)[1].cpu()
        predictions2 = output2.data.max(1)[1].cpu()

        # Logging
        if val_idx % 20 == 0:
            if args.local_rank == 0:
                logging.info("validating: %d / %d", val_idx + 1,
                             len(val_loader))
        if val_idx > 10 and args.test_mode:
            break

        # Image Dumps


#        if val_idx < 30:
#            dump_images.append([gt_image, predictions1, predictions2, img_names])

        iou_acc1 += fast_hist(predictions1.numpy().flatten(),
                              gt_image1.numpy().flatten(),
                              args.dataset_cls.num_classes1)
        iou_acc2 += fast_hist(predictions2.numpy().flatten(),
                              gt_image2.numpy().flatten(),
                              args.dataset_cls.num_classes2)
        del output1, output2, val_idx, data

    if args.apex:
        iou_acc_tensor1 = torch.cuda.FloatTensor(iou_acc1)
        torch.distributed.all_reduce(iou_acc_tensor1,
                                     op=torch.distributed.ReduceOp.SUM)
        iou_acc1 = iou_acc_tensor1.cpu().numpy()
        iou_acc_tensor2 = torch.cuda.FloatTensor(iou_acc2)
        torch.distributed.all_reduce(iou_acc_tensor2,
                                     op=torch.distributed.ReduceOp.SUM)
        iou_acc2 = iou_acc_tensor2.cpu().numpy()

    if args.local_rank == 0:
        evaluate_eval(args, net, optim, val_loss1, val_loss2, iou_acc1,
                      iou_acc2, dump_images, writer, curr_epoch,
                      args.dataset_cls)

    return val_loss1.avg