Exemple #1
0
def validate(val_loader, model, criterion):
    # torch.backends.cudnn.enabled = False  # for cudnn bug at https://github.com/pytorch/pytorch/issues/4107
    if main_process():
        logger.info('>>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>>')
    # batch_time = AverageMeter()
    # data_time = AverageMeter()
    loss_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    target_meter = AverageMeter()

    model.eval()
    # end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            # data_time.update(time.time() - end)
            input = input.cuda(non_blocking=True)
            # input = [input[0].cuda(non_blocking=True), input[1].cuda(non_blocking=True)]
            target = target.cuda(non_blocking=True)
            output = model(input)
            # _, H, W = target.shape
            # output = F.interpolate(output, size=(H, W), mode='bilinear', align_corners=True)
            loss = criterion(output, target)

            n = target.size(0)
            if args.multiprocessing_distributed:
                loss = loss * n  # not considering ignore pixels
                count = target.new_tensor([n], dtype=torch.long)
                dist.all_reduce(loss), dist.all_reduce(count)
                n = count.item()
                loss = loss / n
            else:
                loss = torch.mean(loss)

            output = output.detach().max(1)[1]
            intersection, union, target = intersectionAndUnionGPU(
                output, target, args.classes, args.ignore_label)
            if args.multiprocessing_distributed:
                dist.all_reduce(intersection), dist.all_reduce(
                    union), dist.all_reduce(target)
            intersection, union, target = intersection.cpu().numpy(
            ), union.cpu().numpy(), target.cpu().numpy()
            intersection_meter.update(intersection), union_meter.update(
                union), target_meter.update(target)

            accuracy = sum(
                intersection_meter.val) / (sum(target_meter.val) + 1e-10)
            loss_meter.update(loss.item(), n)
            # batch_time.update(time.time() - end)
            # end = time.time()
            # if ((i + 1) % args.print_freq == 0) and main_process():
            #     logger.info('Test: [{}/{}] '
            #                 'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
            #                 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) '
            #                 'Loss {loss_meter.val:.4f} ({loss_meter.avg:.4f}) '
            #                 'Accuracy {accuracy:.4f}.'.format(i + 1, len(val_loader),
            #                                                 data_time=data_time,
            #                                                 batch_time=batch_time,
            #                                                 loss_meter=loss_meter,
            #                                                 accuracy=accuracy))

    iou_class = intersection_meter.sum / (union_meter.sum + 1e-10)
    accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10)
    mIoU = np.mean(iou_class)
    mAcc = np.mean(accuracy_class)
    allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10)
    if main_process():
        logger.info(
            'Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(
                mIoU, mAcc, allAcc))
        for i in range(args.classes):
            logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(
                i, iou_class[i], accuracy_class[i]))
        logger.info('<<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<<<<<')
    return loss_meter.avg, mIoU, mAcc, allAcc
Exemple #2
0
def train(gpu, ngpus_per_node, argss):
    global args
    args = argss
    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    teacher_model = None
    if args.teacher_model_path:
        teacher_model = PSPNet(layers=args.teacher_layers,
                               classes=args.classes,
                               zoom_factor=args.zoom_factor)
        kd_path = 'alpha_' + str(args.alpha) + '_Temp_' + str(args.temperature)
        args.save_path = os.path.join(args.save_path, kd_path)
        if not os.path.exists(args.save_path):
            os.mkdir(args.save_path)
    if args.arch == 'dct':
        model = DCTNet(layers=args.layers, classes=args.classes, vec_dim=300)
        # modules_ori = [model.layer0, model.layer1, model.layer2, model.layer3, model.layer4]
        # modules_new = [model.cls, model.aux]  # DCT4
        modules_ori = [model.cp, model.sp, model.head]
        modules_new = []
        args.index_split = len(
            modules_ori
        )  # the module after index_split need multiply 10 at learning rate

    params_list = []
    for module in modules_ori:
        params_list.append(dict(params=module.parameters(), lr=args.base_lr))
    for module in modules_new:
        params_list.append(
            dict(params=module.parameters(), lr=args.base_lr * 10))
    # args.index_split = 5
    optimizer = torch.optim.SGD(params_list,
                                lr=args.base_lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    if args.sync_bn:
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        if teacher_model is not None:
            teacher_model = nn.SyncBatchNorm.convert_sync_batchnorm(
                teacher_model)

    if main_process():
        global logger, writer
        logger = get_logger()
        writer = SummaryWriter(args.save_path)  # tensorboardX
        logger.info(args)
        logger.info("=> creating model ...")
        logger.info("Classes: {}".format(args.classes))
        logger.info(model)
        if teacher_model is not None:
            logger.info(teacher_model)
    if args.distributed:
        torch.cuda.set_device(gpu)
        args.batch_size = int(args.batch_size / ngpus_per_node)
        args.batch_size_val = int(args.batch_size_val / ngpus_per_node)
        args.workers = int(
            (args.workers + ngpus_per_node - 1) / ngpus_per_node)
        model = torch.nn.parallel.DistributedDataParallel(model.cuda(),
                                                          device_ids=[gpu])
        if teacher_model is not None:
            teacher_model = torch.nn.parallel.DistributedDataParallel(
                teacher_model.cuda(), device_ids=[gpu])

    else:
        model = torch.nn.DataParallel(model.cuda())
        if teacher_model is not None:
            teacher_model = torch.nn.DataParallel(teacher_model.cuda())

    if teacher_model is not None:
        checkpoint = torch.load(
            args.teacher_model_path,
            map_location=lambda storage, loc: storage.cuda())
        teacher_model.load_state_dict(checkpoint['state_dict'], strict=False)
        print("=> loading teacher checkpoint '{}'".format(
            args.teacher_model_path))

    if args.use_ohem:
        criterion = OhemCELoss(thresh=0.7,
                               ignore_index=args.ignore_label).cuda(gpu)
    else:
        criterion = nn.CrossEntropyLoss(
            ignore_index=args.ignore_label).cuda(gpu)

    kd_criterion = None
    if teacher_model is not None:
        kd_criterion = KDLoss(ignore_index=args.ignore_label).cuda(gpu)

    if args.weight:
        if os.path.isfile(args.weight):
            if main_process():
                logger.info("=> loading weight: '{}'".format(args.weight))
            checkpoint = torch.load(args.weight)
            model.load_state_dict(checkpoint['state_dict'])
            if main_process():
                logger.info("=> loaded weight '{}'".format(args.weight))
        else:
            if main_process():
                logger.info("=> mp weight found at '{}'".format(args.weight))

    best_mIoU_val = 0.0
    if args.resume:
        if os.path.isfile(args.resume):
            if main_process():
                logger.info("=> loading checkpoint '{}'".format(args.resume))
            # Load all tensors onto GPU
            checkpoint = torch.load(
                args.resume, map_location=lambda storage, loc: storage.cuda())
            args.start_iter = checkpoint['iteration']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            best_mIoU_val = checkpoint['best_mIoU_val']
            if main_process():
                logger.info("=> loaded checkpoint '{}' (iteration {})".format(
                    args.resume, checkpoint['iteration']))
        else:
            if main_process():
                logger.info("=> no checkpoint found at '{}'".format(
                    args.resume))

    value_scale = 255
    ## RGB mean & std
    rgb_mean = [0.485, 0.456, 0.406]
    rgb_mean = [item * value_scale for item in rgb_mean]
    rgb_std = [0.229, 0.224, 0.225]
    rgb_std = [item * value_scale for item in rgb_std]

    # DCT mean & std
    dct_mean = dct_mean_std.train_upscaled_static_mean
    dct_mean = [item * value_scale for item in dct_mean]
    dct_std = dct_mean_std.train_upscaled_static_std
    dct_std = [item * value_scale for item in dct_std]

    train_transform = transform.Compose([
        transform.RandScale([args.scale_min, args.scale_max]),
        transform.RandRotate([args.rotate_min, args.rotate_max],
                             padding=rgb_mean,
                             ignore_label=args.ignore_label),
        transform.RandomGaussianBlur(),
        transform.RandomHorizontalFlip(),
        transform.Crop([args.train_h, args.train_w],
                       crop_type='rand',
                       padding=rgb_mean,
                       ignore_label=args.ignore_label),
        # transform.GetDctCoefficient(),
        transform.ToTensor(),
        transform.Normalize(mean=rgb_mean, std=rgb_std)
    ])
    train_data = dataset.SemData(split='train',
                                 img_type='rgb',
                                 data_root=args.data_root,
                                 data_list=args.train_list,
                                 transform=train_transform)
    # train_transform = transform_rgbdct.Compose([
    #     transform_rgbdct.RandScale([args.scale_min, args.scale_max]),
    #     transform_rgbdct.RandRotate([args.rotate_min, args.rotate_max], padding=rgb_mean, ignore_label=args.ignore_label),
    #     transform_rgbdct.RandomGaussianBlur(),
    #     transform_rgbdct.RandomHorizontalFlip(),
    #     transform_rgbdct.Crop([args.train_h, args.train_w], crop_type='rand', padding=rgb_mean, ignore_label=args.ignore_label),
    #     transform_rgbdct.GetDctCoefficient(),
    #     transform_rgbdct.ToTensor(),
    #     transform_rgbdct.Normalize(mean_rgb=rgb_mean, mean_dct=dct_mean, std_rgb=rgb_std,  std_dct=dct_std)])
    # train_data = dataset.SemData(split='train', img_type='rgb&dct', data_root=args.data_root, data_list=args.train_list, transform=train_transform)
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_data)
    else:
        train_sampler = None
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, \
        shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, \
        sampler=train_sampler, drop_last=True)
    if args.evaluate:
        # val_h = int(args.base_h * args.scale)
        # val_w = int(args.base_w * args.scale)
        val_transform = transform.Compose([
            transform.Crop([args.train_h, args.train_w],
                           crop_type='center',
                           padding=rgb_mean,
                           ignore_label=args.ignore_label),
            # transform.Resize(size=(val_h, val_w)),
            # transform.GetDctCoefficient(),
            transform.ToTensor(),
            transform.Normalize(mean=rgb_mean, std=rgb_std)
        ])
        val_data = dataset.SemData(split='val',
                                   img_type='rgb',
                                   data_root=args.data_root,
                                   data_list=args.val_list,
                                   transform=val_transform)
        # val_transform = transform_rgbdct.Compose([
        #     transform_rgbdct.Crop([args.train_h, args.train_w], crop_type='center', padding=rgb_mean, ignore_label=args.ignore_label),
        #     # transform.Resize(size=(val_h, val_w)),
        #     transform_rgbdct.GetDctCoefficient(),
        #     transform_rgbdct.ToTensor(),
        #     transform_rgbdct.Normalize(mean_rgb=rgb_mean, mean_dct=dct_mean, std_rgb=rgb_std,  std_dct=dct_std)])
        # val_data = dataset.SemData(split='val', img_type='rgb&dct', data_root=args.data_root, data_list=args.val_list, transform=val_transform)
        if args.distributed:
            val_sampler = torch.utils.data.distributed.DistributedSampler(
                val_data)
        else:
            val_sampler = None
        val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size_val, \
            shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler)

    # Training Loop
    batch_time = AverageMeter()
    data_time = AverageMeter()
    main_loss_meter = AverageMeter()
    # aux_loss_meter = AverageMeter()
    loss_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    target_meter = AverageMeter()

    # switch to train mode
    model.train()
    if teacher_model is not None:
        teacher_model.eval()
    end = time.time()
    max_iter = args.max_iter

    data_iter = iter(train_loader)
    epoch = 0
    for current_iter in range(args.start_iter, args.max_iter):
        try:
            input, target = next(data_iter)
            if not target.size(0) == args.batch_size:
                raise StopIteration
        except StopIteration:
            epoch += 1
            if args.distributed:
                train_sampler.set_epoch(epoch)
                if main_process():
                    logger.info('train_sampler.set_epoch({})'.format(epoch))
            data_iter = iter(train_loader)
            input, target = next(data_iter)
            # need to update the AverageMeter for new epoch
            main_loss_meter = AverageMeter()
            # aux_loss_meter = AverageMeter()
            loss_meter = AverageMeter()
            intersection_meter = AverageMeter()
            union_meter = AverageMeter()
            target_meter = AverageMeter()
        # measure data loading time
        data_time.update(time.time() - end)
        input = input.cuda(non_blocking=True)
        # input = [input[0].cuda(non_blocking=True), input[1].cuda(non_blocking=True)]
        target = target.cuda(non_blocking=True)

        # compute output
        # main_out, aux_out = model(input)
        main_out = model(input)
        # _, H, W = target.shape
        # main_out = F.interpolate(main_out, size=(H, W), mode='bilinear', align_corners=True)
        main_loss = criterion(main_out, target)
        # aux_loss = criterion(aux_out, target)

        if not args.multiprocessing_distributed:
            # main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss)
            main_loss = torch.mean(main_loss)
        # loss = main_loss + args.aux_weight * aux_loss
        loss = main_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        n = target.size(0)
        # if args.multiprocessing_distributed:
        #     main_loss, aux_loss, loss = main_loss.detach() * n, aux_loss * n, loss * n  # not considering ignore pixels
        #     count = target.new_tensor([n], dtype=torch.long)
        #     dist.all_reduce(main_loss), dist.all_reduce(aux_loss), dist.all_reduce(loss), dist.all_reduce(count)
        #     n = count.item()
        #     main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n
        if args.multiprocessing_distributed:
            main_loss, loss = main_loss.detach(
            ) * n, loss * n  # not considering ignore pixels
            count = target.new_tensor([n], dtype=torch.long)
            dist.all_reduce(main_loss), dist.all_reduce(loss), dist.all_reduce(
                count)
            n = count.item()
            main_loss, loss = main_loss / n, loss / n

        main_out = main_out.detach().max(1)[1]
        intersection, union, target = intersectionAndUnionGPU(
            main_out, target, args.classes, args.ignore_label)
        if args.multiprocessing_distributed:
            dist.all_reduce(intersection), dist.all_reduce(
                union), dist.all_reduce(target)
        intersection, union, target = intersection.cpu().numpy(), union.cpu(
        ).numpy(), target.cpu().numpy()
        intersection_meter.update(intersection), union_meter.update(
            union), target_meter.update(target)

        accuracy = sum(
            intersection_meter.val) / (sum(target_meter.val) + 1e-10)
        main_loss_meter.update(main_loss.item(), n)
        # aux_loss_meter.update(aux_loss.item(), n)
        loss_meter.update(loss.item(), n)
        batch_time.update(time.time() - end)
        end = time.time()

        # Using Poly strategy to change the learning rate
        current_lr = poly_learning_rate(args.base_lr,
                                        current_iter,
                                        max_iter,
                                        power=args.power)
        for index in range(0, args.index_split
                           ):  # args.index_split = 5 -> ResNet has 5 stages
            optimizer.param_groups[index]['lr'] = current_lr
        for index in range(args.index_split, len(optimizer.param_groups)):
            optimizer.param_groups[index]['lr'] = current_lr * 10

        remain_iter = max_iter - current_iter
        remain_time = remain_iter * batch_time.avg
        t_m, t_s = divmod(remain_time, 60)
        t_h, t_m = divmod(t_m, 60)
        remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m),
                                                    int(t_s))

        iter_log = current_iter + 1
        if iter_log % args.print_freq == 0 and main_process():
            logger.info('Iter [{}/{}] '
                        'LR: {lr:.3e}, '
                        'ETA: {remain_time}, '
                        'Data: {data_time.val:.3f} ({data_time.avg:.3f}), '
                        'Batch: {batch_time.val:.3f} ({batch_time.avg:.3f}), '
                        'MainLoss: {main_loss_meter.val:.4f}, '
                        # 'AuxLoss: {aux_loss_meter.val:.4f}, '
                        'Loss: {loss_meter.val:.4f}, '
                        'Accuracy: {accuracy:.4f}.'.format(
                            iter_log,
                            args.max_iter,
                            lr=current_lr,
                            remain_time=remain_time,
                            data_time=data_time,
                            batch_time=batch_time,
                            main_loss_meter=main_loss_meter,
                            # aux_loss_meter=aux_loss_meter,
                            loss_meter=loss_meter,
                            accuracy=accuracy))
        if main_process():
            writer.add_scalar('loss_train_batch', main_loss_meter.val,
                              iter_log)
            writer.add_scalar('mIoU_train_batch',
                              np.mean(intersection / (union + 1e-10)),
                              iter_log)
            writer.add_scalar('mAcc_train_batch',
                              np.mean(intersection / (target + 1e-10)),
                              iter_log)
            writer.add_scalar('allAcc_train_batch', accuracy, iter_log)

        if iter_log % len(
                train_loader
        ) == 0 or iter_log == max_iter:  # for each epoch or the max interation
            iou_class = intersection_meter.sum / (union_meter.sum + 1e-10)
            accuracy_class = intersection_meter.sum / (target_meter.sum +
                                                       1e-10)
            mIoU_train = np.mean(iou_class)
            mAcc_train = np.mean(accuracy_class)
            allAcc_train = sum(
                intersection_meter.sum) / (sum(target_meter.sum) + 1e-10)
            loss_train = main_loss_meter.avg
            if main_process():
                logger.info('Train result at iteration [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'\
                    .format(iter_log, max_iter, mIoU_train, mAcc_train, allAcc_train))
                writer.add_scalar('loss_train', loss_train, iter_log)
                writer.add_scalar('mIoU_train', mIoU_train, iter_log)
                writer.add_scalar('mAcc_train', mAcc_train, iter_log)
                writer.add_scalar('allAcc_train', allAcc_train, iter_log)

        # if iter_log % args.save_freq == 0:
            is_best = False
            if args.evaluate:
                loss_val, mIoU_val, mAcc_val, allAcc_val = validate(
                    val_loader, model, criterion)
                model.train()  # the mode change from eval() to train()
                if main_process():
                    writer.add_scalar('loss_val', loss_val, iter_log)
                    writer.add_scalar('mIoU_val', mIoU_val, iter_log)
                    writer.add_scalar('mAcc_val', mAcc_val, iter_log)
                    writer.add_scalar('allAcc_val', allAcc_val, iter_log)

                    if best_mIoU_val < mIoU_val:
                        is_best = True
                        best_mIoU_val = mIoU_val
                        logger.info('==>The best val mIoU: %.3f' %
                                    (best_mIoU_val))

            if main_process():
                save_checkpoint(
                    {
                        'iteration': iter_log,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'best_mIoU_val': best_mIoU_val
                    }, is_best, args.save_path)
                logger.info('Saving checkpoint to:{}/iter_{}.pth or last.pth with mIoU:{:.3f}'\
                    .format(args.save_path, iter_log, mIoU_val))
                if is_best:
                    logger.info('Saving checkpoint to:{}/best.pth with mIoU:{:.3f}'\
                        .format(args.save_path, best_mIoU_val))

    if main_process():
        writer.close(
        )  # it must close the writer, otherwise it will appear the EOFError!
        logger.info(
            '==>Training done! The best val mIoU during training: %.3f' %
            (best_mIoU_val))
def evaluate():
    args = get_parser()
    logger = get_logger()
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
        str(x) for x in args.test_gpu)
    logger.info(args)
    logger.info("=> creating model ...")
    logger.info("Classes: {}".format(args.classes))

    if args.arch == 'psp':
        model = PSPNet(layers=args.layers,
                       classes=args.classes,
                       zoom_factor=args.zoom_factor)
    elif args.arch == 'nonlocal':
        model = Nonlocal(layers=args.layers,
                         classes=args.classes,
                         zoom_factor=args.zoom_factor)
    elif args.arch == 'danet':
        model = DANet(layers=args.layers,
                      classes=args.classes,
                      zoom_factor=args.zoom_factor)
    elif args.arch == 'sanet':
        model = SANet(layers=args.layers,
                      classes=args.classes,
                      zoom_factor=args.zoom_factor)
    elif args.arch == 'fanet':
        model = FANet(layers=args.layers, classes=args.classes)
    elif args.arch == 'fftnet':
        model = FFTNet(layers=args.layers, classes=args.classes)
    elif args.arch == 'fftnet_23':
        model = FFTNet23(layers=args.layers, classes=args.classes)
    elif args.arch == 'bise_v1':
        model = BiseNet(layers=args.layers,
                        classes=args.classes,
                        with_sp=args.with_sp)
    elif args.arch == 'dct':
        model = DCTNet(layers=args.layers, classes=args.classes, vec_dim=300)
    elif args.arch == 'triple':
        model = TriSeNet(layers=args.layers, classes=args.classes)
    elif args.arch == 'triple_1':
        model = TriSeNet1(layers=args.layers, classes=args.classes)
    elif args.arch == 'ppm':
        model = PPM_Net(backbone=args.backbone,
                        layers=args.layers,
                        classes=args.classes)
    elif args.arch == 'fc':
        model = FC_Net(backbone=args.backbone,
                       layers=args.layers,
                       classes=args.classes)

    logger.info(model)
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    if os.path.isfile(args.model_path):
        logger.info("=> loading checkpoint '{}'".format(args.model_path))
        # checkpoint = torch.load(args.model_path, map_location=torch.device('cpu'))
        checkpoint = torch.load(args.model_path)
        model.load_state_dict(checkpoint['state_dict'], strict=True)
        logger.info("=> loaded checkpoint '{}'".format(args.model_path))
    else:
        raise RuntimeError("=> no checkpoint found at '{}'".format(
            args.model_path))

    value_scale = 255
    ## RGB mean & std
    rgb_mean = [0.485, 0.456, 0.406]
    rgb_mean = [item * value_scale for item in rgb_mean]
    rgb_std = [0.229, 0.224, 0.225]
    rgb_std = [item * value_scale for item in rgb_std]

    # DCT mean & std
    dct_mean = dct_mean_std.train_upscaled_static_mean
    dct_mean = [item * value_scale for item in dct_mean]
    dct_std = dct_mean_std.train_upscaled_static_std
    dct_std = [item * value_scale for item in dct_std]

    val_h = int(args.base_h * args.scale)
    val_w = int(args.base_w * args.scale)

    val_transform = transform.Compose([
        transform.Resize(size=(val_h, val_w)),
        # transform.GetDctCoefficient(),
        transform.ToTensor(),
        transform.Normalize(mean=rgb_mean, std=rgb_std)
    ])
    val_data = dataset.SemData(split='val',
                               img_type='rgb',
                               data_root=args.data_root,
                               data_list=args.val_list,
                               transform=val_transform)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=4,
                                             pin_memory=True)

    # val_transform = transform.Compose([
    #     # transform.Resize(size=(val_h, val_w)),
    #     transform.GetDctCoefficient(),
    #     transform.ToTensor(),
    #     transform.Normalize(mean_rgb=rgb_mean, mean_dct=dct_mean, std_rgb=rgb_std, std_dct=dct_std)])
    # val_data = dataset.SemData(split='val', img_type='rgb&dct', data_root=args.data_root, data_list=args.val_list, transform=val_transform)
    # val_loader = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True)

    # test_transform = transform.Compose([
    #     transform.ToTensor(),
    #     transform.Normalize(mean=mean, std=std)])
    # # test_transform = transform.Compose([transform.ToTensor()])
    # test_data = dataset.SemData(split='test', data_root=args.data_root, data_list=args.test_list, transform=test_transform)
    # val_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True)

    logger.info('>>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>>')
    batch_time = AverageMeter()
    data_time = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    target_meter = AverageMeter()

    model.eval()
    end = time.time()
    results = []

    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            data_time.update(time.time() - end)
            # _, _, H, W = input.shape
            input = input.cuda(non_blocking=True)
            # input = [input[0].cuda(non_blocking=True), input[1].cuda(non_blocking=True)]
            target = target.cuda(non_blocking=True)
            # if args.scale != 1.0:
            # input = F.interpolate(input, size=(val_h, val_w), mode='bilinear', align_corners=True)
            if args.teacher_model_path != None and args.arch == 'sanet':
                output, _, _ = model(input)
            else:
                output = model(input)
                # if args.scale != 1.0:
                # output = F.interpolate(output, size=(H, W), mode='bilinear', align_corners=True)
            _, H, W = target.shape
            # output = F.interpolate(output, size=(H, W), mode='bilinear', align_corners=True)
            output = output.detach().max(1)[1]
            results.append(output.cpu().numpy().reshape(H, W))
            intersection, union, target = intersectionAndUnionGPU(
                output, target, args.classes, args.ignore_label)
            intersection, union, target = intersection.cpu().numpy(
            ), union.cpu().numpy(), target.cpu().numpy()
            intersection_meter.update(intersection), union_meter.update(
                union), target_meter.update(target)

            accuracy = sum(
                intersection_meter.val) / (sum(target_meter.val) + 1e-10)
            batch_time.update(time.time() - end)
            end = time.time()
            if ((i + 1) % 10 == 0) or (i + 1 == len(val_loader)):
                logger.info(
                    'Val: [{}/{}] '
                    'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                    'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                    'Accuracy {accuracy:.4f}.'.format(i + 1,
                                                      len(val_loader),
                                                      data_time=data_time,
                                                      batch_time=batch_time,
                                                      accuracy=accuracy))

    iou_class = intersection_meter.sum / (union_meter.sum + 1e-10)
    accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10)
    mIoU = np.mean(iou_class)
    mAcc = np.mean(accuracy_class)
    allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10)

    logger.info('Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(
        mIoU, mAcc, allAcc))
    for i in range(args.classes):
        logger.info('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(
            i, iou_class[i], accuracy_class[i]))
    logger.info('<<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<<<<<')
def train(local_rank, train_loader, model, teacher_model, criterion, kd_criterion, optimizer, epoch):
    # torch.backends.cudnn.enabled = True
    batch_time = AverageMeter()
    data_time = AverageMeter()
    main_loss_meter = AverageMeter()
    aux_loss_meter = AverageMeter()
    loss_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    target_meter = AverageMeter()

    # switch to train mode
    model.train()
    if teacher_model is not None:
        teacher_model.eval()
    end = time.time()
    max_iter = args.epochs * len(train_loader) # initialize for poly learning rate 
    for i, (input, target) in enumerate(train_loader):
        data_time.update(time.time() - end)
        if args.zoom_factor != 8:
            h = int((target.size()[1] - 1) / 8 * args.zoom_factor + 1)
            w = int((target.size()[2] - 1) / 8 * args.zoom_factor + 1) 
            # 'nearest' mode doesn't support align_corners mode and 'bilinear' mode is fine for downsampling
            target = F.interpolate(target.unsqueeze(1).float(), size=(h, w), mode='bilinear', align_corners=True).squeeze(1).long()
        input = input.cuda(local_rank, non_blocking=True)
        target = target.cuda(local_rank, non_blocking=True)
        main_out, aux_out = model(input)
        if teacher_model is not None:
            with torch.no_grad():
                teacher_out = teacher_model(input)
            main_loss = kd_criterion(main_out, target, teacher_out, alpha=args.alpha, temperature=args.temperature)
            del teacher_out # delete the teacher_out for releasing the gpu memory.
        else:
            main_loss = criterion(main_out, target)
        aux_loss = criterion(aux_out, target)

        if not args.multiprocessing_distributed:
            main_loss, aux_loss = torch.mean(main_loss), torch.mean(aux_loss)
        loss = main_loss + args.aux_weight * aux_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        n = input.size(0)
        if args.multiprocessing_distributed:
            main_loss, aux_loss, loss = main_loss.detach() * n, aux_loss * n, loss * n  # not considering ignore pixels
            count = target.new_tensor([n], dtype=torch.long)
            dist.all_reduce(main_loss), dist.all_reduce(aux_loss), dist.all_reduce(loss), dist.all_reduce(count)
            n = count.item()
            main_loss, aux_loss, loss = main_loss / n, aux_loss / n, loss / n
        
        main_out = main_out.detach().max(1)[1]
        intersection, union, target = intersectionAndUnionGPU(main_out, target, args.classes, args.ignore_label)
        if args.multiprocessing_distributed:
            dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce(target)
        intersection, union, target = intersection.cpu().numpy(), union.cpu().numpy(), target.cpu().numpy()
        intersection_meter.update(intersection), union_meter.update(union), target_meter.update(target)

        accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10)
        main_loss_meter.update(main_loss.item(), n)
        aux_loss_meter.update(aux_loss.item(), n)
        loss_meter.update(loss.item(), n)
        batch_time.update(time.time() - end)
        end = time.time()

        current_iter = epoch * len(train_loader) + i + 1
        current_lr = poly_learning_rate(args.base_lr, current_iter, max_iter, power=args.power)
        for index in range(0, args.index_split): # args.index_split = 5 -> ResNet has 5 stages
            optimizer.param_groups[index]['lr'] = current_lr
        for index in range(args.index_split, len(optimizer.param_groups)):
            optimizer.param_groups[index]['lr'] = current_lr * 10
        remain_iter = max_iter - current_iter
        remain_time = remain_iter * batch_time.avg
        t_m, t_s = divmod(remain_time, 60)
        t_h, t_m = divmod(t_m, 60)
        remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s))

        if (i + 1) % args.print_freq == 0 and main_process():
            logger.info('Epoch: [{}/{}][{}/{}] '
                        'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                        'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                        'Remain {remain_time} '
                        'MainLoss {main_loss_meter.val:.4f} '
                        'AuxLoss {aux_loss_meter.val:.4f} '
                        'Loss {loss_meter.val:.4f} '
                        'Accuracy {accuracy:.4f}.'.format(epoch + 1, args.epochs, i + 1, len(train_loader),
                                                                                data_time=data_time,
                                                                                batch_time=batch_time,
                                                                                remain_time=remain_time,
                                                                                main_loss_meter=main_loss_meter,
                                                                                aux_loss_meter=aux_loss_meter,
                                                                                loss_meter=loss_meter,
                                                                                accuracy=accuracy))
        if main_process():
            writer.add_scalar('loss_train_batch', main_loss_meter.val, current_iter)
            writer.add_scalar('mIoU_train_batch', np.mean(intersection / (union + 1e-10)), current_iter)
            writer.add_scalar('mAcc_train_batch', np.mean(intersection / (target + 1e-10)), current_iter)
            writer.add_scalar('allAcc_train_batch', accuracy, current_iter)
    iou_class = intersection_meter.sum / (union_meter.sum + 1e-10)  
    accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10)
    mIoU = np.mean(iou_class)
    mAcc = np.mean(accuracy_class)
    allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10)
    if main_process():
        logger.info('Train result at epoch [{}/{}]: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(epoch+1, args.epochs, mIoU, mAcc, allAcc))
    return main_loss_meter.avg, mIoU, mAcc, allAcc
Exemple #5
0
def evaluate():
    args = get_parser()
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
        str(x) for x in args.test_gpu)
    if args.arch == 'triple':
        model = TriSeNet(layers=args.layers, classes=args.classes)
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benchmark = True
    if os.path.isfile(args.model_path):
        print("=> loading checkpoint '{}'".format(args.model_path))
        # checkpoint = torch.load(args.model_path, map_location=torch.device('cpu'))
        checkpoint = torch.load(args.model_path)
        model.load_state_dict(checkpoint['state_dict'], strict=True)
        print("=> loaded checkpoint '{}'".format(args.model_path))
    else:
        raise RuntimeError("=> no checkpoint found at '{}'".format(
            args.model_path))

    value_scale = 255
    ## RGB mean & std
    rgb_mean = [0.485, 0.456, 0.406]
    rgb_mean = [item * value_scale for item in rgb_mean]
    rgb_std = [0.229, 0.224, 0.225]
    rgb_std = [item * value_scale for item in rgb_std]

    val_transform = transform.Compose([
        transform.ToTensor(),
        transform.Normalize(mean=rgb_mean, std=rgb_std)
    ])
    train_data = SemData(split='train',
                         data_root=args.data_root,
                         data_list=args.train_list,
                         transform=val_transform)
    val_loader = torch.utils.data.DataLoader(train_data, batch_size=1, \
        shuffle=False, num_workers=args.workers, pin_memory=True)

    print('>>>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>>')

    batch_time = AverageMeter()
    data_time = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    target_meter = AverageMeter()

    model.eval()
    end = time.time()
    results = []

    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            data_time.update(time.time() - end)
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)
            output = model(input)
            _, H, W = target.shape
            output = output.detach().max(1)[1]
            results.append(output.cpu().numpy().reshape(H, W))
            intersection, union, target = intersectionAndUnionGPU(
                output, target, args.classes, args.ignore_label)
            intersection, union, target = intersection.cpu().numpy(
            ), union.cpu().numpy(), target.cpu().numpy()
            intersection_meter.update(intersection), union_meter.update(
                union), target_meter.update(target)

            accuracy = sum(
                intersection_meter.val) / (sum(target_meter.val) + 1e-10)
            batch_time.update(time.time() - end)
            end = time.time()
            print('Val: [{}/{}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Accuracy {accuracy:.4f}.'.format(i + 1,
                                                    len(val_loader),
                                                    data_time=data_time,
                                                    batch_time=batch_time,
                                                    accuracy=accuracy))

    iou_class = intersection_meter.sum / (union_meter.sum + 1e-10)
    accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10)
    mIoU = np.mean(iou_class)
    mAcc = np.mean(accuracy_class)
    allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10)

    print('Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.'.format(
        mIoU, mAcc, allAcc))
    for i in range(args.classes):
        print('Class_{} Result: iou/accuracy {:.4f}/{:.4f}.'.format(
            i, iou_class[i], accuracy_class[i]))
    print('<<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<<<<<')
    print('Convert to Label ID')
    result_files = results2img(results=results,
                               data_root=args.data_root,
                               data_list=args.train_list,
                               save_dir='./visualization/train_result',
                               to_label_id=True)
    print('Convert to Label ID Finished')
Exemple #6
0
def train(gpu, ngpus_per_node, argss):
    global args
    args = argss

    if args.arch == 'triple':
        model = TriSeNet(layers=args.layers, classes=args.classes)
        modules_ori = [
            model.layer0, model.layer1, model.layer2, model.layer3,
            model.layer4
        ]
        # modules_new = [model.down_8_32, model.sa_8_32, model.seg_head]
        modules_new = []
        for key, value in model._modules.items():
            if "layer" not in key:
                modules_new.append(value)
    args.index_split = len(
        modules_ori
    )  # the module after index_split need multiply 10 at learning rate
    params_list = []
    for module in modules_ori:
        params_list.append(dict(params=module.parameters(), lr=args.base_lr))
    for module in modules_new:
        params_list.append(
            dict(params=module.parameters(), lr=args.base_lr * 10))
    optimizer = torch.optim.SGD(params_list,
                                lr=args.base_lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    print("=> creating model ...")
    print("Classes: {}".format(args.classes))
    print(model)
    model = torch.nn.DataParallel(model.cuda())
    cudnn.benchmark = True

    criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_label).cuda(gpu)

    value_scale = 255
    ## RGB mean & std
    mean = [0.485, 0.456, 0.406]
    mean = [item * value_scale for item in mean]
    std = [0.229, 0.224, 0.225]
    std = [item * value_scale for item in std]

    train_transform = transform.Compose([
        transform.RandScale([args.scale_min, args.scale_max]),
        transform.RandRotate([args.rotate_min, args.rotate_max],
                             padding=mean,
                             ignore_label=args.ignore_label),
        transform.RandomGaussianBlur(),
        transform.RandomHorizontalFlip(),
        transform.Crop([args.train_h, args.train_w],
                       crop_type='rand',
                       padding=mean,
                       ignore_label=args.ignore_label),
        transform.ToTensor(),
        transform.Normalize(mean=mean, std=std)
    ])

    train_data = SemData(split='train',
                         data_root=args.data_root,
                         data_list=args.train_list,
                         transform=train_transform)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, \
        shuffle=True, num_workers=args.workers, pin_memory=True, \
        sampler=None, drop_last=True)

    # Training Loop
    batch_time = AverageMeter()
    data_time = AverageMeter()
    main_loss_meter = AverageMeter()
    aux_loss_meter = AverageMeter()
    loss_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()
    target_meter = AverageMeter()

    model.train()
    end = time.time()
    max_iter = args.max_iter

    data_iter = iter(train_loader)
    epoch = 0
    for current_iter in range(args.start_iter, args.max_iter):
        try:
            input, target = next(data_iter)
            if not input.size(0) == args.batch_size:
                raise StopIteration
        except StopIteration:
            epoch += 1
            data_iter = iter(train_loader)
            input, target = next(data_iter)
            # need to update the AverageMeter for new epoch
            main_loss_meter = AverageMeter()
            aux_loss_meter = AverageMeter()
            loss_meter = AverageMeter()
            intersection_meter = AverageMeter()
            union_meter = AverageMeter()
            target_meter = AverageMeter()
        # measure data loading time
        data_time.update(time.time() - end)
        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        main_out = model(input)
        main_loss = criterion(main_out, target)
        aux_loss = torch.tensor(0).cuda()
        loss = main_loss + args.aux_weight * aux_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        n = input.size(0)
        main_out = main_out.detach().max(1)[1]
        intersection, union, target = intersectionAndUnionGPU(
            main_out, target, args.classes, args.ignore_label)
        intersection, union, target = intersection.cpu().numpy(), union.cpu(
        ).numpy(), target.cpu().numpy()
        intersection_meter.update(intersection), union_meter.update(
            union), target_meter.update(target)

        accuracy = sum(
            intersection_meter.val) / (sum(target_meter.val) + 1e-10)
        main_loss_meter.update(main_loss.item(), n)
        aux_loss_meter.update(aux_loss.item(), n)
        loss_meter.update(loss.item(), n)
        batch_time.update(time.time() - end)
        end = time.time()

        # Using Poly strategy to change the learning rate
        current_lr = poly_learning_rate(args.base_lr,
                                        current_iter,
                                        max_iter,
                                        power=args.power)
        for index in range(0, args.index_split
                           ):  # args.index_split = 5 -> ResNet has 5 stages
            optimizer.param_groups[index]['lr'] = current_lr
        for index in range(args.index_split, len(optimizer.param_groups)):
            optimizer.param_groups[index]['lr'] = current_lr * 10

        remain_iter = max_iter - current_iter
        remain_time = remain_iter * batch_time.avg
        t_m, t_s = divmod(remain_time, 60)
        t_h, t_m = divmod(t_m, 60)
        remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m),
                                                    int(t_s))

        iter_log = current_iter + 1

        if iter_log % args.print_freq == 0:
            print('Iteration: [{}/{}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'ETA {remain_time} '
                  'MainLoss {main_loss_meter.val:.4f} '
                  'AuxLoss {aux_loss_meter.val:.4f} '
                  'Loss {loss_meter.val:.4f} '
                  'Accuracy {accuracy:.4f}.'.format(
                      iter_log,
                      args.max_iter,
                      data_time=data_time,
                      batch_time=batch_time,
                      remain_time=remain_time,
                      main_loss_meter=main_loss_meter,
                      aux_loss_meter=aux_loss_meter,
                      loss_meter=loss_meter,
                      accuracy=accuracy))
    save_checkpoint(
        {
            'iteration': iter_log,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, False, args.save_path)