Beispiel #1
0
def main():
    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED
    gpus = list(config.TEST.GPUS)
    if len(gpus) > 1:
        raise ValueError('Test only supports single core.')
    device = torch.device('cuda:{}'.format(gpus[0]))
    # build model
    model = build_segmentation_model_from_cfg(config)
    logger.info("Model:\n{}".format(model))
    model = model.to(device)

    # build data_loader
    # TODO: still need it for thing_list
    data_loader = build_test_loader_from_cfg(config)

    # load model
    if config.TEST.MODEL_FILE:
        model_state_file = config.TEST.MODEL_FILE
    else:
        model_state_file = os.path.join(config.OUTPUT_DIR, 'final_state.pth')

    if os.path.isfile(model_state_file):
        model_weights = torch.load(model_state_file)
        if 'state_dict' in model_weights.keys():
            model_weights = model_weights['state_dict']
            logger.info('Evaluating a intermediate checkpoint.')
        model.load_state_dict(model_weights, strict=True)
        logger.info('Test model loaded from {}'.format(model_state_file))
    else:
        if not config.DEBUG.DEBUG:
            raise ValueError('Cannot find test model.')

    # dir to save intermediate raw outputs
    raw_out_dir = os.path.join(args.output_dir, 'raw')
    PathManager.mkdirs(raw_out_dir)

    # dir to save semantic outputs
    semantic_out_dir = os.path.join(args.output_dir, 'semantic')
    PathManager.mkdirs(semantic_out_dir)

    # dir to save instance outputs
    instance_out_dir = os.path.join(args.output_dir, 'instance')
    PathManager.mkdirs(instance_out_dir)

    # dir to save panoptic outputs
    panoptic_out_dir = os.path.join(args.output_dir, 'panoptic')
    PathManager.mkdirs(panoptic_out_dir)

    # Test loop
    model.eval()

    # build image demo transform

    net_time = AverageMeter()
    post_time = AverageMeter()

    # dataset
    source = "/home/muyun99/Desktop/MyGithub/cnsoftbei-video/inference/input/video-clip_2-4.mp4"
    imgsz = 1024
    dataset = LoadImages(source, img_size=imgsz)

    try:
        with torch.no_grad():
            for i, (path, image, im0s, vid_cap) in enumerate(dataset):
                (_, raw_h, raw_w) = image.shape
                image = torch.from_numpy(image).to(device)
                image = image.float()
                image /= 255.0  # 0 - 255 to 0.0 - 1.0
                if image.ndimension() == 3:
                    image = image.unsqueeze(0)

                # network
                start_time = time.time()
                out_dict = model(image)
                torch.cuda.synchronize(device)
                net_time.update(time.time() - start_time)

                # post-processing
                start_time = time.time()
                semantic_pred = get_semantic_segmentation(out_dict['semantic'])
                panoptic_pred, center_pred = get_panoptic_segmentation(
                    semantic_pred,
                    out_dict['center'],
                    out_dict['offset'],
                    thing_list=data_loader.dataset.thing_list,
                    label_divisor=data_loader.dataset.label_divisor,
                    stuff_area=config.POST_PROCESSING.STUFF_AREA,
                    void_label=(data_loader.dataset.label_divisor *
                                data_loader.dataset.ignore_label),
                    threshold=config.POST_PROCESSING.CENTER_THRESHOLD,
                    nms_kernel=config.POST_PROCESSING.NMS_KERNEL,
                    top_k=config.POST_PROCESSING.TOP_K_INSTANCE,
                    foreground_mask=None)
                torch.cuda.synchronize(device)
                post_time.update(time.time() - start_time)

                logger.info(
                    'Network Time: {net_time.val:.3f}s ({net_time.avg:.3f}s)\t'
                    'Post-processing Time: {post_time.val:.3f}s ({post_time.avg:.3f}s)\t'
                    .format(net_time=net_time, post_time=post_time))

                # save predictions
                semantic_pred = semantic_pred.squeeze(0).cpu().numpy()
                panoptic_pred = panoptic_pred.squeeze(0).cpu().numpy()

                # crop predictions
                semantic_pred = semantic_pred[:raw_h, :raw_w]
                panoptic_pred = panoptic_pred[:raw_h, :raw_w]

                # Raw outputs
                save_debug_images(
                    dataset=data_loader.dataset,
                    batch_images=image,
                    batch_targets={},
                    batch_outputs=out_dict,
                    out_dir=raw_out_dir,
                    iteration=i,
                    target_keys=[],
                    output_keys=['semantic', 'center', 'offset'],
                    is_train=False,
                )

                save_annotation(
                    semantic_pred,
                    semantic_out_dir,
                    'semantic_pred_%d' % i,
                    add_colormap=True,
                    colormap=data_loader.dataset.create_label_colormap())
                pan_to_sem = panoptic_pred // data_loader.dataset.label_divisor
                save_annotation(
                    pan_to_sem,
                    semantic_out_dir,
                    'pan_to_sem_pred_%d' % i,
                    add_colormap=True,
                    colormap=data_loader.dataset.create_label_colormap())
                ins_id = panoptic_pred % data_loader.dataset.label_divisor
                pan_to_ins = panoptic_pred.copy()
                pan_to_ins[ins_id == 0] = 0
                save_instance_annotation(pan_to_ins, instance_out_dir,
                                         'pan_to_ins_pred_%d' % i)
                save_panoptic_annotation(
                    panoptic_pred,
                    panoptic_out_dir,
                    'panoptic_pred_%d' % i,
                    label_divisor=data_loader.dataset.label_divisor,
                    colormap=data_loader.dataset.create_label_colormap())
    except Exception:
        logger.exception("Exception during demo:")
        raise
    finally:
        logger.info("Demo finished.")

    vid_path = args.output_dir
    pic2video(filelist=glob.glob(semantic_out_dir + "pan_to_sem_pred_*"),
              vid_path=os.path.join(vid_path, "semantic.mp4"),
              size=(1024, 576))
    pic2video(filelist=glob.glob(instance_out_dir + "pan_to_ins_pred_*"),
              vid_path=os.path.join(vid_path, "instance.mp4"),
              size=(1024, 576))
    pic2video(filelist=glob.glob(panoptic_out_dir + "panoptic_pred_*"),
              vid_path=os.path.join(vid_path, "panoptic.mp4"),
              size=(1024, 576))
    logger.info("Video saved")
Beispiel #2
0
def main():
    args = parse_args()

    logger = logging.getLogger('demo')
    if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called
        setup_logger(output=args.output_dir, name='demo')

    logger.info(pprint.pformat(args))
    logger.info(config)

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED
    gpus = list(config.TEST.GPUS)
    if len(gpus) > 1:
        raise ValueError('Test only supports single core.')
    device = torch.device('cuda:{}'.format(gpus[0]))

    # build model
    model = build_segmentation_model_from_cfg(config)

    # Change ASPP image pooling
    # output_stride = 2 ** (5 - sum(config.MODEL.BACKBONE.DILATION))
    # train_crop_h, train_crop_w = config.TEST.CROP_SIZE
    # scale = 1. / output_stride
    # pool_h = int((float(train_crop_h) - 1.0) * scale + 1.0)
    # pool_w = int((float(train_crop_w) - 1.0) * scale + 1.0)

    # model.set_image_pooling((pool_h, pool_w))

    logger.info("Model:\n{}".format(model))
    model = model.to(device)

    try:
        # build data_loader
        data_loader = build_test_loader_from_cfg(config)
        meta_dataset = data_loader.dataset
        save_intermediate_outputs = True
    except:
        logger.warning(
            "Cannot build data loader, using default meta data. This will disable visualizing intermediate outputs"
        )
        if 'cityscapes' in config.DATASET.DATASET:
            meta_dataset = CityscapesMeta()
        else:
            raise ValueError("Unsupported dataset: {}".format(
                config.DATASET.DATASET))
        save_intermediate_outputs = False

    # load model
    if config.TEST.MODEL_FILE:
        model_state_file = config.TEST.MODEL_FILE
    else:
        model_state_file = os.path.join(config.OUTPUT_DIR, 'final_state.pth')

    if os.path.isfile(model_state_file):
        model_weights = torch.load(model_state_file)
        if 'state_dict' in model_weights.keys():
            model_weights = model_weights['state_dict']
            logger.info('Evaluating a intermediate checkpoint.')
        model.load_state_dict(model_weights, strict=False)
        logger.info('Test model loaded from {}'.format(model_state_file))
    else:
        if not config.DEBUG.DEBUG:
            raise ValueError('Cannot find test model.')

    # load images
    input_list = []
    if os.path.exists(args.input_files):
        if os.path.isfile(args.input_files):
            # inference on a single file, extract extension
            ext = os.path.splitext(os.path.basename(args.input_files))[1]
            if ext in ['.png', '.jpg', '.jpeg']:
                # image file
                input_list.append(args.input_files)
            elif ext in ['.mpeg']:
                # video file
                # TODO: decode video and convert to image list
                raise NotImplementedError(
                    "Inference on video is not supported yet.")
            else:
                raise ValueError("Unsupported extension: {}.".format(ext))
        else:
            # inference on a directory
            for fname in glob.glob(
                    os.path.join(args.input_files, '*' + args.extension)):
                input_list.append(fname)
    else:
        raise ValueError('Input file or directory does not exists: {}'.format(
            args.input_files))

    if isinstance(input_list[0], str):
        logger.info("Inference on images")
        logger.info(input_list)
    else:
        logger.info("Inference on video")

    # dir to save intermediate raw outputs
    raw_out_dir = os.path.join(args.output_dir, 'raw')
    PathManager.mkdirs(raw_out_dir)

    # dir to save semantic outputs
    semantic_out_dir = os.path.join(args.output_dir, 'semantic')
    PathManager.mkdirs(semantic_out_dir)

    # dir to save instance outputs
    instance_out_dir = os.path.join(args.output_dir, 'instance')
    PathManager.mkdirs(instance_out_dir)

    # dir to save panoptic outputs
    panoptic_out_dir = os.path.join(args.output_dir, 'panoptic')
    PathManager.mkdirs(panoptic_out_dir)

    # Test loop
    model.eval()

    # build image demo transform
    transforms = T.Compose(
        [T.ToTensor(),
         T.Normalize(config.DATASET.MEAN, config.DATASET.STD)])

    net_time = AverageMeter()
    post_time = AverageMeter()
    try:
        with torch.no_grad():
            for i, fname in enumerate(input_list):
                if isinstance(fname, str):
                    # load image
                    raw_image = read_image(fname, 'RGB')
                else:
                    NotImplementedError(
                        "Inference on video is not supported yet.")

                # pad image
                raw_shape = raw_image.shape[:2]
                raw_h = raw_shape[0]
                raw_w = raw_shape[1]
                new_h = (raw_h + 31) // 32 * 32 + 1
                new_w = (raw_w + 31) // 32 * 32 + 1
                input_image = np.zeros((new_h, new_w, 3), dtype=np.uint8)
                input_image[:, :] = config.DATASET.MEAN
                input_image[:raw_h, :raw_w, :] = raw_image

                image, _ = transforms(input_image, None)
                image = image.unsqueeze(0).to(device)

                # network
                start_time = time.time()
                out_dict = model(image)
                torch.cuda.synchronize(device)
                net_time.update(time.time() - start_time)

                # post-processing
                start_time = time.time()
                semantic_pred = get_semantic_segmentation(out_dict['semantic'])

                panoptic_pred, center_pred = get_panoptic_segmentation(
                    semantic_pred,
                    out_dict['center'],
                    out_dict['offset'],
                    thing_list=meta_dataset.thing_list,
                    label_divisor=meta_dataset.label_divisor,
                    stuff_area=config.POST_PROCESSING.STUFF_AREA,
                    void_label=(meta_dataset.label_divisor *
                                meta_dataset.ignore_label),
                    threshold=config.POST_PROCESSING.CENTER_THRESHOLD,
                    nms_kernel=config.POST_PROCESSING.NMS_KERNEL,
                    top_k=config.POST_PROCESSING.TOP_K_INSTANCE,
                    foreground_mask=None)
                torch.cuda.synchronize(device)
                post_time.update(time.time() - start_time)

                logger.info(
                    '[{}/{}]\t'
                    'Network Time: {net_time.val:.3f}s ({net_time.avg:.3f}s)\t'
                    'Post-processing Time: {post_time.val:.3f}s ({post_time.avg:.3f}s)\t'
                    .format(i,
                            len(input_list),
                            net_time=net_time,
                            post_time=post_time))

                # save predictions
                semantic_pred = semantic_pred.squeeze(0).cpu().numpy()
                panoptic_pred = panoptic_pred.squeeze(0).cpu().numpy()

                # crop predictions
                semantic_pred = semantic_pred[:raw_h, :raw_w]
                panoptic_pred = panoptic_pred[:raw_h, :raw_w]

                if save_intermediate_outputs:
                    # Raw outputs
                    save_debug_images(
                        dataset=meta_dataset,
                        batch_images=image,
                        batch_targets={},
                        batch_outputs=out_dict,
                        out_dir=raw_out_dir,
                        iteration=i,
                        target_keys=[],
                        output_keys=['semantic', 'center', 'offset'],
                        is_train=False,
                    )

                save_annotation(semantic_pred,
                                semantic_out_dir,
                                'semantic_pred_%d' % i,
                                add_colormap=True,
                                colormap=meta_dataset.create_label_colormap(),
                                image=raw_image if args.merge_image else None)
                pan_to_sem = panoptic_pred // meta_dataset.label_divisor
                save_annotation(pan_to_sem,
                                semantic_out_dir,
                                'panoptic_to_semantic_pred_%d' % i,
                                add_colormap=True,
                                colormap=meta_dataset.create_label_colormap(),
                                image=raw_image if args.merge_image else None)
                ins_id = panoptic_pred % meta_dataset.label_divisor
                pan_to_ins = panoptic_pred.copy()
                pan_to_ins[ins_id == 0] = 0
                save_instance_annotation(
                    pan_to_ins,
                    instance_out_dir,
                    'panoptic_to_instance_pred_%d' % i,
                    image=raw_image if args.merge_image else None)
                save_panoptic_annotation(
                    panoptic_pred,
                    panoptic_out_dir,
                    'panoptic_pred_%d' % i,
                    label_divisor=meta_dataset.label_divisor,
                    colormap=meta_dataset.create_label_colormap(),
                    image=raw_image if args.merge_image else None)
    except Exception:
        logger.exception("Exception during demo:")
        raise
    finally:
        logger.info("Demo finished.")
        if save_intermediate_outputs:
            logger.info("Intermediate outputs saved to {}".format(raw_out_dir))
        logger.info(
            "Semantic predictions saved to {}".format(semantic_out_dir))
        logger.info(
            "Instance predictions saved to {}".format(instance_out_dir))
        logger.info(
            "Panoptic predictions saved to {}".format(panoptic_out_dir))
def main():
    args = parse_args()

    logger = logging.getLogger('segmentation_test')
    if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called
        setup_logger(output=config.OUTPUT_DIR, name='segmentation_test')

    logger.info(pprint.pformat(args))
    logger.info(config)

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED
    gpus = list(config.TEST.GPUS)
    if len(gpus) > 1:
        raise ValueError('Test only supports single core.')
    device = torch.device('cuda:{}'.format(gpus[0]))

    # build model
    model = build_segmentation_model_from_cfg(config)

    # Change ASPP image pooling
    output_stride = 2 ** (5 - sum(config.MODEL.BACKBONE.DILATION))
    train_crop_h, train_crop_w = config.TEST.CROP_SIZE
    scale = 1. / output_stride
    pool_h = int((float(train_crop_h) - 1.0) * scale + 1.0)
    pool_w = int((float(train_crop_w) - 1.0) * scale + 1.0)

    model.set_image_pooling((pool_h, pool_w))

    logger.info("Model:\n{}".format(model))
    model = model.to(device)

    # build data_loader
    data_loader = build_test_loader_from_cfg(config)

    # load model
    if config.TEST.MODEL_FILE:
        model_state_file = config.TEST.MODEL_FILE
    else:
        model_state_file = os.path.join(config.OUTPUT_DIR, 'final_state.pth')

    if os.path.isfile(model_state_file):
        model_weights = torch.load(model_state_file)
        if 'state_dict' in model_weights.keys():
            model_weights = model_weights['state_dict']
            logger.info('Evaluating a intermediate checkpoint.')
        model.load_state_dict(model_weights, strict=True)
        logger.info('Test model loaded from {}'.format(model_state_file))
    else:
        if not config.DEBUG.DEBUG:
            raise ValueError('Cannot find test model.')

    data_time = AverageMeter()
    net_time = AverageMeter()
    post_time = AverageMeter()
    timing_warmup_iter = 10

    semantic_metric = SemanticEvaluator(
        num_classes=data_loader.dataset.num_classes,
        ignore_label=data_loader.dataset.ignore_label,
        output_dir=os.path.join(config.OUTPUT_DIR, config.TEST.SEMANTIC_FOLDER),
        train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id()
    )

    instance_metric = None
    panoptic_metric = None

    if config.TEST.EVAL_INSTANCE:
        if 'cityscapes' in config.DATASET.DATASET:
            instance_metric = CityscapesInstanceEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR, config.TEST.INSTANCE_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                gt_dir=os.path.join(config.DATASET.ROOT, 'gtFine', config.DATASET.TEST_SPLIT)
            )
        elif 'coco' in config.DATASET.DATASET:
            instance_metric = COCOInstanceEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR, config.TEST.INSTANCE_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                gt_dir=os.path.join(config.DATASET.ROOT, 'annotations',
                                    'instances_{}.json'.format(config.DATASET.TEST_SPLIT))
            )
        else:
            raise ValueError('Undefined evaluator for dataset {}'.format(config.DATASET.DATASET))

    if config.TEST.EVAL_PANOPTIC:
        if 'cityscapes' in config.DATASET.DATASET:
            panoptic_metric = CityscapesPanopticEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR, config.TEST.PANOPTIC_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                label_divisor=data_loader.dataset.label_divisor,
                void_label=data_loader.dataset.label_divisor * data_loader.dataset.ignore_label,
                gt_dir=config.DATASET.ROOT,
                split=config.DATASET.TEST_SPLIT,
                num_classes=data_loader.dataset.num_classes
            )
        elif 'coco' in config.DATASET.DATASET:
            panoptic_metric = COCOPanopticEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR, config.TEST.PANOPTIC_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                label_divisor=data_loader.dataset.label_divisor,
                void_label=data_loader.dataset.label_divisor * data_loader.dataset.ignore_label,
                gt_dir=config.DATASET.ROOT,
                split=config.DATASET.TEST_SPLIT,
                num_classes=data_loader.dataset.num_classes
            )
        else:
            raise ValueError('Undefined evaluator for dataset {}'.format(config.DATASET.DATASET))

    foreground_metric = None
    if config.TEST.EVAL_FOREGROUND:
        foreground_metric = SemanticEvaluator(
            num_classes=2,
            ignore_label=data_loader.dataset.ignore_label,
            output_dir=os.path.join(config.OUTPUT_DIR, config.TEST.FOREGROUND_FOLDER)
        )

    image_filename_list = [
        os.path.splitext(os.path.basename(ann))[0] for ann in data_loader.dataset.ann_list]

    # Debug output.
    if config.TEST.DEBUG:
        debug_out_dir = os.path.join(config.OUTPUT_DIR, 'debug_test')
        PathManager.mkdirs(debug_out_dir)

    # Train loop.
    try:
        model.eval()
        with torch.no_grad():
            for i, data in enumerate(data_loader):
                if i == timing_warmup_iter:
                    data_time.reset()
                    net_time.reset()
                    post_time.reset()

                # data
                start_time = time.time()
                for key in data.keys():
                    try:
                        data[key] = data[key].to(device)
                    except:
                        pass

                image = data.pop('image')
                torch.cuda.synchronize(device)
                data_time.update(time.time() - start_time)

                start_time = time.time()
                # out_dict = model(image, data)
                out_dict = model(image)
                torch.cuda.synchronize(device)
                net_time.update(time.time() - start_time)

                start_time = time.time()
                semantic_pred = get_semantic_segmentation(out_dict['semantic'])
                if 'foreground' in out_dict:
                    foreground_pred = get_semantic_segmentation(out_dict['foreground'])
                else:
                    foreground_pred = None

                # Oracle experiment
                if config.TEST.ORACLE_SEMANTIC:
                    # Use predicted semantic for foreground
                    foreground_pred = torch.zeros_like(semantic_pred)
                    for thing_class in data_loader.dataset.thing_list:
                        foreground_pred[semantic_pred == thing_class] = 1
                    # Use gt semantic
                    semantic_pred = data['semantic']
                    # Set it to a stuff label
                    stuff_label = 0
                    while stuff_label in data_loader.dataset.thing_list:
                        stuff_label += 1
                    semantic_pred[semantic_pred == data_loader.dataset.ignore_label] = stuff_label
                if config.TEST.ORACLE_FOREGROUND:
                    foreground_pred = data['foreground']
                if config.TEST.ORACLE_CENTER:
                    out_dict['center'] = data['center']
                if config.TEST.ORACLE_OFFSET:
                    out_dict['offset'] = data['offset']

                if config.TEST.EVAL_INSTANCE or config.TEST.EVAL_PANOPTIC:
                    panoptic_pred, center_pred = get_panoptic_segmentation(
                        semantic_pred,
                        out_dict['center'],
                        out_dict['offset'],
                        thing_list=data_loader.dataset.thing_list,
                        label_divisor=data_loader.dataset.label_divisor,
                        stuff_area=config.POST_PROCESSING.STUFF_AREA,
                        void_label=(
                                data_loader.dataset.label_divisor *
                                data_loader.dataset.ignore_label),
                        threshold=config.POST_PROCESSING.CENTER_THRESHOLD,
                        nms_kernel=config.POST_PROCESSING.NMS_KERNEL,
                        top_k=config.POST_PROCESSING.TOP_K_INSTANCE,
                        foreground_mask=foreground_pred)
                else:
                    panoptic_pred = None
                torch.cuda.synchronize(device)
                post_time.update(time.time() - start_time)
                logger.info('[{}/{}]\t'
                            'Data Time: {data_time.val:.3f}s ({data_time.avg:.3f}s)\t'
                            'Network Time: {net_time.val:.3f}s ({net_time.avg:.3f}s)\t'
                            'Post-processing Time: {post_time.val:.3f}s ({post_time.avg:.3f}s)\t'.format(
                             i, len(data_loader), data_time=data_time, net_time=net_time, post_time=post_time))

                semantic_pred = semantic_pred.squeeze(0).cpu().numpy()
                if panoptic_pred is not None:
                    panoptic_pred = panoptic_pred.squeeze(0).cpu().numpy()
                if foreground_pred is not None:
                    foreground_pred = foreground_pred.squeeze(0).cpu().numpy()

                # Crop padded regions.
                image_size = data['size'].squeeze(0).cpu().numpy()
                semantic_pred = semantic_pred[:image_size[0], :image_size[1]]
                if panoptic_pred is not None:
                    panoptic_pred = panoptic_pred[:image_size[0], :image_size[1]]
                if foreground_pred is not None:
                    foreground_pred = foreground_pred[:image_size[0], :image_size[1]]

                # Resize back to the raw image size.
                raw_image_size = data['raw_size'].squeeze(0).cpu().numpy()
                if raw_image_size[0] != image_size[0] or raw_image_size[1] != image_size[1]:
                    semantic_pred = cv2.resize(semantic_pred.astype(np.float), (raw_image_size[1], raw_image_size[0]),
                                               interpolation=cv2.INTER_NEAREST).astype(np.int32)
                    if panoptic_pred is not None:
                        panoptic_pred = cv2.resize(panoptic_pred.astype(np.float),
                                                   (raw_image_size[1], raw_image_size[0]),
                                                   interpolation=cv2.INTER_NEAREST).astype(np.int32)
                    if foreground_pred is not None:
                        foreground_pred = cv2.resize(foreground_pred.astype(np.float),
                                                     (raw_image_size[1], raw_image_size[0]),
                                                     interpolation=cv2.INTER_NEAREST).astype(np.int32)

                # Evaluates semantic segmentation.
                semantic_metric.update(semantic_pred,
                                       data['raw_label'].squeeze(0).cpu().numpy(),
                                       image_filename_list[i])

                # Optional: evaluates instance segmentation.
                if instance_metric is not None:
                    raw_semantic = F.softmax(out_dict['semantic'][:, :, :image_size[0], :image_size[1]], dim=1)
                    center_hmp = out_dict['center'][:, :, :image_size[0], :image_size[1]]
                    if raw_image_size[0] != image_size[0] or raw_image_size[1] != image_size[1]:
                        raw_semantic = F.interpolate(raw_semantic,
                                                     size=(raw_image_size[0], raw_image_size[1]),
                                                     mode='bilinear',
                                                     align_corners=False)  # Consistent with OpenCV.
                        center_hmp = F.interpolate(center_hmp,
                                                   size=(raw_image_size[0], raw_image_size[1]),
                                                   mode='bilinear',
                                                   align_corners=False)  # Consistent with OpenCV.

                    raw_semantic = raw_semantic.squeeze(0).cpu().numpy()
                    center_hmp = center_hmp.squeeze(1).squeeze(0).cpu().numpy()

                    instances = get_cityscapes_instance_format(panoptic_pred,
                                                               raw_semantic,
                                                               center_hmp,
                                                               label_divisor=data_loader.dataset.label_divisor,
                                                               score_type=config.TEST.INSTANCE_SCORE_TYPE)
                    instance_metric.update(instances, image_filename_list[i])

                # Optional: evaluates panoptic segmentation.
                if panoptic_metric is not None:
                    image_id = '_'.join(image_filename_list[i].split('_')[:3])
                    panoptic_metric.update(panoptic_pred,
                                           image_filename=image_filename_list[i],
                                           image_id=image_id)

                # Optional: evaluates foreground segmentation.
                if foreground_metric is not None:
                    semantic_label = data['raw_label'].squeeze(0).cpu().numpy()
                    foreground_label = np.zeros_like(semantic_label)
                    for sem_lab in np.unique(semantic_label):
                        # Both `stuff` and `ignore` are background.
                        if sem_lab in data_loader.dataset.thing_list:
                            foreground_label[semantic_label == sem_lab] = 1

                    # Use semantic segmentation as foreground segmentation.
                    if foreground_pred is None:
                        foreground_pred = np.zeros_like(semantic_pred)
                        for sem_lab in np.unique(semantic_pred):
                            if sem_lab in data_loader.dataset.thing_list:
                                foreground_pred[semantic_pred == sem_lab] = 1

                    foreground_metric.update(foreground_pred,
                                             foreground_label,
                                             image_filename_list[i])

                if config.TEST.DEBUG:
                    # Raw outputs
                    save_debug_images(
                        dataset=data_loader.dataset,
                        batch_images=image,
                        batch_targets=data,
                        batch_outputs=out_dict,
                        out_dir=debug_out_dir,
                        iteration=i,
                        target_keys=config.DEBUG.TARGET_KEYS,
                        output_keys=config.DEBUG.OUTPUT_KEYS,
                        is_train=False,
                    )
                    if panoptic_pred is not None:
                        # Processed outputs
                        save_annotation(semantic_pred, debug_out_dir, 'semantic_pred_%d' % i,
                                        add_colormap=True, colormap=data_loader.dataset.create_label_colormap())
                        pan_to_sem = panoptic_pred // data_loader.dataset.label_divisor
                        save_annotation(pan_to_sem, debug_out_dir, 'pan_to_sem_pred_%d' % i,
                                        add_colormap=True, colormap=data_loader.dataset.create_label_colormap())
                        ins_id = panoptic_pred % data_loader.dataset.label_divisor
                        pan_to_ins = panoptic_pred.copy()
                        pan_to_ins[ins_id == 0] = 0
                        save_instance_annotation(pan_to_ins, debug_out_dir, 'pan_to_ins_pred_%d' % i)

                        save_panoptic_annotation(panoptic_pred, debug_out_dir, 'panoptic_pred_%d' % i,
                                                 label_divisor=data_loader.dataset.label_divisor,
                                                 colormap=data_loader.dataset.create_label_colormap())
    except Exception:
        logger.exception("Exception during testing:")
        raise
    finally:
        logger.info("Inference finished.")
        semantic_results = semantic_metric.evaluate()
        logger.info(semantic_results)
        if instance_metric is not None:
            instance_results = instance_metric.evaluate()
            logger.info(instance_results)
        if panoptic_metric is not None:
            panoptic_results = panoptic_metric.evaluate()
            logger.info(panoptic_results)
        if foreground_metric is not None:
            foreground_results = foreground_metric.evaluate()
            logger.info(foreground_results)