Example #1
0
def main():
    args = parse_args()

    logger = logging.getLogger('demo')
    if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called
        setup_logger(output=args.output_dir, name='demo')

    logger.info(pprint.pformat(args))
    logger.info(config)

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED
    gpus = list(config.TEST.GPUS)
    if len(gpus) > 1:
        raise ValueError('Test only supports single core.')
    device = torch.device('cuda:{}'.format(gpus[0]))

    # build model
    model = build_segmentation_model_from_cfg(config)

    # Change ASPP image pooling
    # output_stride = 2 ** (5 - sum(config.MODEL.BACKBONE.DILATION))
    # train_crop_h, train_crop_w = config.TEST.CROP_SIZE
    # scale = 1. / output_stride
    # pool_h = int((float(train_crop_h) - 1.0) * scale + 1.0)
    # pool_w = int((float(train_crop_w) - 1.0) * scale + 1.0)

    # model.set_image_pooling((pool_h, pool_w))

    logger.info("Model:\n{}".format(model))
    model = model.to(device)

    try:
        # build data_loader
        data_loader = build_test_loader_from_cfg(config)
        meta_dataset = data_loader.dataset
        save_intermediate_outputs = True
    except:
        logger.warning(
            "Cannot build data loader, using default meta data. This will disable visualizing intermediate outputs"
        )
        if 'cityscapes' in config.DATASET.DATASET:
            meta_dataset = CityscapesMeta()
        else:
            raise ValueError("Unsupported dataset: {}".format(
                config.DATASET.DATASET))
        save_intermediate_outputs = False

    # load model
    if config.TEST.MODEL_FILE:
        model_state_file = config.TEST.MODEL_FILE
    else:
        model_state_file = os.path.join(config.OUTPUT_DIR, 'final_state.pth')

    if os.path.isfile(model_state_file):
        model_weights = torch.load(model_state_file)
        if 'state_dict' in model_weights.keys():
            model_weights = model_weights['state_dict']
            logger.info('Evaluating a intermediate checkpoint.')
        model.load_state_dict(model_weights, strict=True)
        logger.info('Test model loaded from {}'.format(model_state_file))
    else:
        if not config.DEBUG.DEBUG:
            raise ValueError('Cannot find test model.')

    # load images
    input_list = []
    if os.path.exists(args.input_files):
        if os.path.isfile(args.input_files):
            # inference on a single file, extract extension
            ext = os.path.splitext(os.path.basename(args.input_files))[1]
            if ext in ['.png', '.jpg', '.jpeg']:
                # image file
                input_list.append(args.input_files)
            elif ext in ['.mpeg']:
                # video file
                # TODO: decode video and convert to image list
                raise NotImplementedError(
                    "Inference on video is not supported yet.")
            else:
                raise ValueError("Unsupported extension: {}.".format(ext))
        else:
            # inference on a directory
            for fname in glob.glob(
                    os.path.join(args.input_files, '*' + args.extension)):
                input_list.append(fname)
    else:
        raise ValueError('Input file or directory does not exists: {}'.format(
            args.input_files))

    if isinstance(input_list[0], str):
        logger.info("Inference on images")
        logger.info(input_list)
    else:
        logger.info("Inference on video")

    # dir to save intermediate raw outputs
    raw_out_dir = os.path.join(args.output_dir, 'raw')
    PathManager.mkdirs(raw_out_dir)

    # dir to save semantic outputs
    semantic_out_dir = os.path.join(args.output_dir, 'semantic')
    PathManager.mkdirs(semantic_out_dir)

    # dir to save instance outputs
    instance_out_dir = os.path.join(args.output_dir, 'instance')
    PathManager.mkdirs(instance_out_dir)

    # dir to save panoptic outputs
    panoptic_out_dir = os.path.join(args.output_dir, 'panoptic')
    PathManager.mkdirs(panoptic_out_dir)

    # Test loop
    model.eval()

    # build image demo transform
    transforms = T.Compose(
        [T.ToTensor(),
         T.Normalize(config.DATASET.MEAN, config.DATASET.STD)])

    net_time = AverageMeter()
    post_time = AverageMeter()
    try:
        with torch.no_grad():
            for i, fname in enumerate(input_list):
                if isinstance(fname, str):
                    # load image
                    raw_image = read_image(fname, 'RGB')
                else:
                    NotImplementedError(
                        "Inference on video is not supported yet.")

                # pad image
                raw_shape = raw_image.shape[:2]
                raw_h = raw_shape[0]
                raw_w = raw_shape[1]
                new_h = (raw_h + 31) // 32 * 32 + 1
                new_w = (raw_w + 31) // 32 * 32 + 1
                input_image = np.zeros((new_h, new_w, 3), dtype=np.uint8)
                input_image[:, :] = config.DATASET.MEAN
                input_image[:raw_h, :raw_w, :] = raw_image

                image, _ = transforms(input_image, None)
                image = image.unsqueeze(0).to(device)

                # network
                start_time = time.time()
                out_dict = model(image)
                torch.cuda.synchronize(device)
                net_time.update(time.time() - start_time)

                # post-processing
                start_time = time.time()
                semantic_pred = get_semantic_segmentation(out_dict['semantic'])

                panoptic_pred, center_pred = get_panoptic_segmentation(
                    semantic_pred,
                    out_dict['center'],
                    out_dict['offset'],
                    thing_list=meta_dataset.thing_list,
                    label_divisor=meta_dataset.label_divisor,
                    stuff_area=config.POST_PROCESSING.STUFF_AREA,
                    void_label=(meta_dataset.label_divisor *
                                meta_dataset.ignore_label),
                    threshold=config.POST_PROCESSING.CENTER_THRESHOLD,
                    nms_kernel=config.POST_PROCESSING.NMS_KERNEL,
                    top_k=config.POST_PROCESSING.TOP_K_INSTANCE,
                    foreground_mask=None)
                torch.cuda.synchronize(device)
                post_time.update(time.time() - start_time)

                logger.info(
                    '[{}/{}]\t'
                    'Network Time: {net_time.val:.3f}s ({net_time.avg:.3f}s)\t'
                    'Post-processing Time: {post_time.val:.3f}s ({post_time.avg:.3f}s)\t'
                    .format(i,
                            len(input_list),
                            net_time=net_time,
                            post_time=post_time))

                # save predictions
                semantic_pred = semantic_pred.squeeze(0).cpu().numpy()
                panoptic_pred = panoptic_pred.squeeze(0).cpu().numpy()

                # crop predictions
                semantic_pred = semantic_pred[:raw_h, :raw_w]
                panoptic_pred = panoptic_pred[:raw_h, :raw_w]

                if save_intermediate_outputs:
                    # Raw outputs
                    save_debug_images(
                        dataset=meta_dataset,
                        batch_images=image,
                        batch_targets={},
                        batch_outputs=out_dict,
                        out_dir=raw_out_dir,
                        iteration=i,
                        target_keys=[],
                        output_keys=['semantic', 'center', 'offset'],
                        is_train=False,
                    )

                save_annotation(semantic_pred,
                                semantic_out_dir,
                                'semantic_pred_%d' % i,
                                add_colormap=True,
                                colormap=meta_dataset.create_label_colormap(),
                                image=raw_image if args.merge_image else None)
                pan_to_sem = panoptic_pred // meta_dataset.label_divisor
                save_annotation(pan_to_sem,
                                semantic_out_dir,
                                'panoptic_to_semantic_pred_%d' % i,
                                add_colormap=True,
                                colormap=meta_dataset.create_label_colormap(),
                                image=raw_image if args.merge_image else None)
                ins_id = panoptic_pred % meta_dataset.label_divisor
                pan_to_ins = panoptic_pred.copy()
                pan_to_ins[ins_id == 0] = 0
                save_instance_annotation(
                    pan_to_ins,
                    instance_out_dir,
                    'panoptic_to_instance_pred_%d' % i,
                    image=raw_image if args.merge_image else None)
                save_panoptic_annotation(
                    panoptic_pred,
                    panoptic_out_dir,
                    'panoptic_pred_%d' % i,
                    label_divisor=meta_dataset.label_divisor,
                    colormap=meta_dataset.create_label_colormap(),
                    image=raw_image if args.merge_image else None)
    except Exception:
        logger.exception("Exception during demo:")
        raise
    finally:
        logger.info("Demo finished.")
        if save_intermediate_outputs:
            logger.info("Intermediate outputs saved to {}".format(raw_out_dir))
        logger.info(
            "Semantic predictions saved to {}".format(semantic_out_dir))
        logger.info(
            "Instance predictions saved to {}".format(instance_out_dir))
        logger.info(
            "Panoptic predictions saved to {}".format(panoptic_out_dir))
Example #2
0
    def main(self, frame, index, total):
        self.model.eval()

        # build image demo transform
        transforms = T.Compose([
            T.ToTensor(),
            T.Normalize(config.DATASET.MEAN, config.DATASET.STD)
        ])

        net_time = AverageMeter()
        post_time = AverageMeter()
        try:
            with torch.no_grad():
                raw_image = frame
                # pad image
                raw_shape = raw_image.shape[:2]
                raw_h = raw_shape[0]
                raw_w = raw_shape[1]
                new_h = (raw_h + 31) // 32 * 32 + 1
                new_w = (raw_w + 31) // 32 * 32 + 1
                input_image = np.zeros((new_h, new_w, 3), dtype=np.uint8)
                input_image[:, :] = config.DATASET.MEAN
                input_image[:raw_h, :raw_w, :] = raw_image

                image, _ = transforms(input_image, None)
                image = image.unsqueeze(0).to(self.device)

                # network
                start_time = time.time()
                out_dict = self.model(image)
                torch.cuda.synchronize(self.device)
                net_time.update(time.time() - start_time)

                # post-processing
                start_time = time.time()
                semantic_pred = get_semantic_segmentation(out_dict['semantic'])

                panoptic_pred, center_pred = get_panoptic_segmentation(
                    semantic_pred,
                    out_dict['center'],
                    out_dict['offset'],
                    thing_list=self.meta_dataset.thing_list,
                    label_divisor=self.meta_dataset.label_divisor,
                    stuff_area=config.POST_PROCESSING.STUFF_AREA,
                    void_label=(self.meta_dataset.label_divisor *
                                self.meta_dataset.ignore_label),
                    threshold=config.POST_PROCESSING.CENTER_THRESHOLD,
                    nms_kernel=config.POST_PROCESSING.NMS_KERNEL,
                    top_k=config.POST_PROCESSING.TOP_K_INSTANCE,
                    foreground_mask=None)
                torch.cuda.synchronize(self.device)
                post_time.update(time.time() - start_time)

                self.logger.info(
                    '[{}/{}]\t'
                    'Network Time: {net_time.val:.3f}s ({net_time.avg:.3f}s)\t'
                    'Post-processing Time: {post_time.val:.3f}s ({post_time.avg:.3f}s)\t'
                    .format(index,
                            total,
                            net_time=net_time,
                            post_time=post_time))

                # save predictions
                #semantic_pred = semantic_pred.squeeze(0).cpu().numpy()
                panoptic_pred = panoptic_pred.squeeze(0).cpu().numpy()

                # crop predictions
                #semantic_pred = semantic_pred[:raw_h, :raw_w]
                panoptic_pred = panoptic_pred[:raw_h, :raw_w]

                frame = creat_panoptic_annotation(
                    panoptic_pred,
                    label_divisor=self.meta_dataset.label_divisor,
                    colormap=self.meta_dataset.create_label_colormap(),
                    image=raw_image)
        except Exception:
            self.logger.exception("Exception during demo:")
            raise
        finally:
            self.logger.info("Demo finished.")
            return frame
def main():
    args = parse_args()

    logger = logging.getLogger('segmentation_test')
    if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called
        setup_logger(output=config.OUTPUT_DIR, name='segmentation_test')

    logger.info(pprint.pformat(args))
    logger.info(config)

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED
    gpus = list(config.TEST.GPUS)
    if len(gpus) > 1:
        raise ValueError('Test only supports single core.')
    device = torch.device('cuda:{}'.format(gpus[0]))

    # build model
    model = build_segmentation_model_from_cfg(config)

    # Change ASPP image pooling
    output_stride = 2**(5 - sum(config.MODEL.BACKBONE.DILATION))
    train_crop_h, train_crop_w = config.TEST.CROP_SIZE
    scale = 1. / output_stride
    pool_h = int((float(train_crop_h) - 1.0) * scale + 1.0)
    pool_w = int((float(train_crop_w) - 1.0) * scale + 1.0)

    model.set_image_pooling((pool_h, pool_w))

    logger.info("Model:\n{}".format(model))
    model = model.to(device)

    # build data_loader
    data_loader = build_test_loader_from_cfg(config)

    # load model
    if config.TEST.MODEL_FILE:
        model_state_file = config.TEST.MODEL_FILE
    else:
        model_state_file = os.path.join(config.OUTPUT_DIR, 'final_state.pth')

    if os.path.isfile(model_state_file):
        model_weights = torch.load(model_state_file)
        if 'state_dict' in model_weights.keys():
            model_weights = model_weights['state_dict']
            logger.info('Evaluating a intermediate checkpoint.')
        model.load_state_dict(model_weights, strict=True)
        logger.info('Test model loaded from {}'.format(model_state_file))
    else:
        if not config.DEBUG.DEBUG:
            raise ValueError('Cannot find test model.')

    data_time = AverageMeter()
    net_time = AverageMeter()
    post_time = AverageMeter()
    timing_warmup_iter = 10

    semantic_metric = SemanticEvaluator(
        num_classes=data_loader.dataset.num_classes,
        ignore_label=data_loader.dataset.ignore_label,
        output_dir=os.path.join(config.OUTPUT_DIR,
                                config.TEST.SEMANTIC_FOLDER),
        train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id())

    instance_metric = None
    panoptic_metric = None

    if config.TEST.EVAL_INSTANCE:
        if 'cityscapes' in config.DATASET.DATASET:
            instance_metric = CityscapesInstanceEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR,
                                        config.TEST.INSTANCE_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                gt_dir=os.path.join(config.DATASET.ROOT, 'gtFine',
                                    config.DATASET.TEST_SPLIT))
        elif 'coco' in config.DATASET.DATASET:
            instance_metric = COCOInstanceEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR,
                                        config.TEST.INSTANCE_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                gt_dir=os.path.join(
                    config.DATASET.ROOT, 'annotations',
                    'instances_{}.json'.format(config.DATASET.TEST_SPLIT)))
        elif 'ade' in config.DATASET.DATASET:
            instance_metric = COCOInstanceEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR,
                                        config.TEST.INSTANCE_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                gt_dir=os.path.join(
                    config.DATASET.ROOT, 'annotations',
                    'instances_{}.json'.format(config.DATASET.TEST_SPLIT)))
        else:
            raise ValueError('Undefined evaluator for dataset {}'.format(
                config.DATASET.DATASET))

    if config.TEST.EVAL_PANOPTIC:
        if 'cityscapes' in config.DATASET.DATASET:
            panoptic_metric = CityscapesPanopticEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR,
                                        config.TEST.PANOPTIC_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                label_divisor=data_loader.dataset.label_divisor,
                void_label=data_loader.dataset.label_divisor *
                data_loader.dataset.ignore_label,
                gt_dir=config.DATASET.ROOT,
                split=config.DATASET.TEST_SPLIT,
                num_classes=data_loader.dataset.num_classes)
        elif 'coco' in config.DATASET.DATASET:
            panoptic_metric = COCOPanopticEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR,
                                        config.TEST.PANOPTIC_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                label_divisor=data_loader.dataset.label_divisor,
                void_label=data_loader.dataset.label_divisor *
                data_loader.dataset.ignore_label,
                gt_dir=config.DATASET.ROOT,
                split=config.DATASET.TEST_SPLIT,
                num_classes=data_loader.dataset.num_classes)
        elif 'ade' in config.DATASET.DATASET:
            panoptic_metric = COCOPanopticEvaluator(
                output_dir=os.path.join(config.OUTPUT_DIR,
                                        config.TEST.PANOPTIC_FOLDER),
                train_id_to_eval_id=data_loader.dataset.train_id_to_eval_id(),
                label_divisor=data_loader.dataset.label_divisor,
                void_label=data_loader.dataset.label_divisor *
                data_loader.dataset.ignore_label,
                gt_dir=config.DATASET.ROOT,
                split=config.DATASET.TEST_SPLIT,
                num_classes=data_loader.dataset.num_classes)
        else:
            raise ValueError('Undefined evaluator for dataset {}'.format(
                config.DATASET.DATASET))

    foreground_metric = None
    if config.TEST.EVAL_FOREGROUND:
        foreground_metric = SemanticEvaluator(
            num_classes=2,
            ignore_label=data_loader.dataset.ignore_label,
            output_dir=os.path.join(config.OUTPUT_DIR,
                                    config.TEST.FOREGROUND_FOLDER))

    image_filename_list = [
        os.path.splitext(os.path.basename(ann))[0]
        for ann in data_loader.dataset.ann_list
    ]
    image_id_list = list(range(len(image_filename_list)))

    # Debug output.
    if config.TEST.DEBUG:
        debug_out_dir = os.path.join(config.OUTPUT_DIR, 'debug_test')
        PathManager.mkdirs(debug_out_dir)

    if not config.TEST.TEST_TIME_AUGMENTATION:
        if config.TEST.FLIP_TEST or len(config.TEST.SCALE_LIST) > 1:
            config.TEST.TEST_TIME_AUGMENTATION = True
            logger.warning(
                "Override TEST.TEST_TIME_AUGMENTATION to True because test time augmentation detected."
                "Please check your config file if you think it is a mistake.")

    # Train loop.
    try:
        model.eval()
        with torch.no_grad():
            for i, data in enumerate(data_loader):
                if i == timing_warmup_iter:
                    data_time.reset()
                    net_time.reset()
                    post_time.reset()

                # data
                start_time = time.time()
                for key in data.keys():
                    try:
                        data[key] = data[key].to(device)
                    except:
                        pass

                image = data.pop('image')
                torch.cuda.synchronize(device)
                data_time.update(time.time() - start_time)

                start_time = time.time()
                if config.TEST.TEST_TIME_AUGMENTATION:
                    raw_image = data['raw_image'][0].cpu().numpy()
                    out_dict = multi_scale_inference(config, model, raw_image,
                                                     device)
                else:
                    out_dict = model(image)

                torch.cuda.synchronize(device)
                net_time.update(time.time() - start_time)

                start_time = time.time()
                semantic_pred = get_semantic_segmentation(out_dict['semantic'])
                if 'foreground' in out_dict:
                    foreground_pred = get_semantic_segmentation(
                        out_dict['foreground'])
                else:
                    foreground_pred = None

                # Oracle experiment
                if config.TEST.ORACLE_SEMANTIC:
                    # Use predicted semantic for foreground
                    foreground_pred = torch.zeros_like(semantic_pred)
                    for thing_class in data_loader.dataset.thing_list:
                        foreground_pred[semantic_pred == thing_class] = 1
                    # Use gt semantic
                    semantic_pred = data['semantic']
                    # Set it to a stuff label
                    stuff_label = 0
                    while stuff_label in data_loader.dataset.thing_list:
                        stuff_label += 1
                    semantic_pred[semantic_pred == data_loader.dataset.
                                  ignore_label] = stuff_label
                if config.TEST.ORACLE_FOREGROUND:
                    foreground_pred = data['foreground']
                if config.TEST.ORACLE_CENTER:
                    out_dict['center'] = data['center']
                if config.TEST.ORACLE_OFFSET:
                    out_dict['offset'] = data['offset']

                if config.TEST.EVAL_INSTANCE or config.TEST.EVAL_PANOPTIC:
                    panoptic_pred, center_pred = get_panoptic_segmentation(
                        semantic_pred,
                        out_dict['center'],
                        out_dict['offset'],
                        thing_list=data_loader.dataset.thing_list,
                        label_divisor=data_loader.dataset.label_divisor,
                        stuff_area=config.POST_PROCESSING.STUFF_AREA,
                        void_label=(data_loader.dataset.label_divisor *
                                    data_loader.dataset.ignore_label),
                        threshold=config.POST_PROCESSING.CENTER_THRESHOLD,
                        nms_kernel=config.POST_PROCESSING.NMS_KERNEL,
                        top_k=config.POST_PROCESSING.TOP_K_INSTANCE,
                        foreground_mask=foreground_pred)
                else:
                    panoptic_pred = None
                torch.cuda.synchronize(device)
                post_time.update(time.time() - start_time)
                logger.info(
                    '[{}/{}]\t'
                    'Data Time: {data_time.val:.3f}s ({data_time.avg:.3f}s)\t'
                    'Network Time: {net_time.val:.3f}s ({net_time.avg:.3f}s)\t'
                    'Post-processing Time: {post_time.val:.3f}s ({post_time.avg:.3f}s)\t'
                    .format(i,
                            len(data_loader),
                            data_time=data_time,
                            net_time=net_time,
                            post_time=post_time))

                semantic_pred = semantic_pred.squeeze(0).cpu().numpy()
                if panoptic_pred is not None:
                    panoptic_pred = panoptic_pred.squeeze(0).cpu().numpy()
                if foreground_pred is not None:
                    foreground_pred = foreground_pred.squeeze(0).cpu().numpy()

                # Crop padded regions.
                image_size = data['size'].squeeze(0).cpu().numpy()
                semantic_pred = semantic_pred[:image_size[0], :image_size[1]]
                if panoptic_pred is not None:
                    panoptic_pred = panoptic_pred[:image_size[0], :
                                                  image_size[1]]
                if foreground_pred is not None:
                    foreground_pred = foreground_pred[:image_size[0], :
                                                      image_size[1]]

                # Resize back to the raw image size.
                raw_image_size = data['raw_size'].squeeze(0).cpu().numpy()
                if raw_image_size[0] != image_size[0] or raw_image_size[
                        1] != image_size[1]:
                    semantic_pred = cv2.resize(
                        semantic_pred.astype(np.float),
                        (raw_image_size[1], raw_image_size[0]),
                        interpolation=cv2.INTER_NEAREST).astype(np.int32)
                    if panoptic_pred is not None:
                        panoptic_pred = cv2.resize(
                            panoptic_pred.astype(np.float),
                            (raw_image_size[1], raw_image_size[0]),
                            interpolation=cv2.INTER_NEAREST).astype(np.int32)
                    if foreground_pred is not None:
                        foreground_pred = cv2.resize(
                            foreground_pred.astype(np.float),
                            (raw_image_size[1], raw_image_size[0]),
                            interpolation=cv2.INTER_NEAREST).astype(np.int32)

                # Evaluates semantic segmentation.
                semantic_metric.update(
                    semantic_pred, data['raw_label'].squeeze(0).cpu().numpy(),
                    image_filename_list[i])

                # Optional: evaluates instance segmentation.
                if instance_metric is not None:
                    raw_semantic = F.softmax(
                        out_dict['semantic']
                        [:, :, :image_size[0], :image_size[1]],
                        dim=1)
                    center_hmp = out_dict[
                        'center'][:, :, :image_size[0], :image_size[1]]
                    if raw_image_size[0] != image_size[0] or raw_image_size[
                            1] != image_size[1]:
                        raw_semantic = F.interpolate(
                            raw_semantic,
                            size=(raw_image_size[0], raw_image_size[1]),
                            mode='bilinear',
                            align_corners=False)  # Consistent with OpenCV.
                        center_hmp = F.interpolate(
                            center_hmp,
                            size=(raw_image_size[0], raw_image_size[1]),
                            mode='bilinear',
                            align_corners=False)  # Consistent with OpenCV.

                    raw_semantic = raw_semantic.squeeze(0).cpu().numpy()
                    center_hmp = center_hmp.squeeze(1).squeeze(0).cpu().numpy()

                    instances = get_cityscapes_instance_format(
                        panoptic_pred,
                        raw_semantic,
                        center_hmp,
                        label_divisor=data_loader.dataset.label_divisor,
                        score_type=config.TEST.INSTANCE_SCORE_TYPE)
                    instance_metric.update(instances, image_id_list[i])

                # Optional: evaluates panoptic segmentation.
                if panoptic_metric is not None:
                    panoptic_metric.update(
                        panoptic_pred,
                        image_filename=image_filename_list[i],
                        image_id=image_id_list[i])

                # Optional: evaluates foreground segmentation.
                if foreground_metric is not None:
                    semantic_label = data['raw_label'].squeeze(0).cpu().numpy()
                    foreground_label = np.zeros_like(semantic_label)
                    for sem_lab in np.unique(semantic_label):
                        # Both `stuff` and `ignore` are background.
                        if sem_lab in data_loader.dataset.thing_list:
                            foreground_label[semantic_label == sem_lab] = 1

                    # Use semantic segmentation as foreground segmentation.
                    if foreground_pred is None:
                        foreground_pred = np.zeros_like(semantic_pred)
                        for sem_lab in np.unique(semantic_pred):
                            if sem_lab in data_loader.dataset.thing_list:
                                foreground_pred[semantic_pred == sem_lab] = 1

                    foreground_metric.update(foreground_pred, foreground_label,
                                             image_filename_list[i])

                if config.TEST.DEBUG:
                    # Raw outputs
                    save_debug_images(
                        dataset=data_loader.dataset,
                        batch_images=image,
                        batch_targets=data,
                        batch_outputs=out_dict,
                        out_dir=debug_out_dir,
                        iteration=i,
                        target_keys=config.DEBUG.TARGET_KEYS,
                        output_keys=config.DEBUG.OUTPUT_KEYS,
                        is_train=False,
                    )
                    if panoptic_pred is not None:
                        # Processed outputs
                        save_annotation(semantic_pred,
                                        debug_out_dir,
                                        'semantic_pred_%d' % i,
                                        add_colormap=True,
                                        colormap=data_loader.dataset.
                                        create_label_colormap())
                        pan_to_sem = panoptic_pred // data_loader.dataset.label_divisor
                        save_annotation(pan_to_sem,
                                        debug_out_dir,
                                        'pan_to_sem_pred_%d' % i,
                                        add_colormap=True,
                                        colormap=data_loader.dataset.
                                        create_label_colormap())
                        ins_id = panoptic_pred % data_loader.dataset.label_divisor
                        pan_to_ins = panoptic_pred.copy()
                        pan_to_ins[ins_id == 0] = 0
                        save_instance_annotation(pan_to_ins, debug_out_dir,
                                                 'pan_to_ins_pred_%d' % i)

                        save_panoptic_annotation(
                            panoptic_pred,
                            debug_out_dir,
                            'panoptic_pred_%d' % i,
                            label_divisor=data_loader.dataset.label_divisor,
                            colormap=data_loader.dataset.create_label_colormap(
                            ))
    except Exception:
        logger.exception("Exception during testing:")
        raise
    finally:
        logger.info("Inference finished.")
        semantic_results = semantic_metric.evaluate()
        logger.info(semantic_results)
        if instance_metric is not None:
            instance_results = instance_metric.evaluate()
            logger.info(instance_results)
        if panoptic_metric is not None:
            panoptic_results = panoptic_metric.evaluate()
            logger.info(panoptic_results)
        if foreground_metric is not None:
            foreground_results = foreground_metric.evaluate()
            logger.info(foreground_results)
Example #4
0
def main():
    args = parse_args()

    logger = logging.getLogger('segment_video.py')
    if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called
        setup_logger(output=args.output_dir, name='demo')

    logger.info(pprint.pformat(args))
    logger.info(config)

    # cudnn related setting
    cudnn.benchmark = config.CUDNN.BENCHMARK
    cudnn.deterministic = config.CUDNN.DETERMINISTIC
    cudnn.enabled = config.CUDNN.ENABLED
    gpus = list(config.TEST.GPUS)
    if len(gpus) > 1:
        raise ValueError('Test only supports single core.')
    device = torch.device('cuda:{}'.format(gpus[0]))

    # build model
    model = build_segmentation_model_from_cfg(config)

    logger.info("Model:\n{}".format(model))
    model = model.to(device)
    meta_dataset = CityscapesMeta()

    # load model
    if config.TEST.MODEL_FILE:
        model_state_file = config.TEST.MODEL_FILE
    else:
        model_state_file = os.path.join(config.OUTPUT_DIR, 'final_state.pth')

    if os.path.isfile(model_state_file):
        model_weights = torch.load(model_state_file)
        if 'state_dict' in model_weights.keys():
            model_weights = model_weights['state_dict']
            logger.info('Evaluating a intermediate checkpoint.')
        model.load_state_dict(model_weights, strict=True)
        logger.info('Test model loaded from {}'.format(model_state_file))
    else:
        if not config.DEBUG.DEBUG:
            raise ValueError('Cannot find test model.')

    model.eval()

    # load images
    cap = None
    if os.path.exists(args.input):
        if os.path.isfile(args.input):
            # extract extension
            ext = os.path.splitext(os.path.basename(args.input))[1]
            if ext in ['.mpeg', '.mp4']:
                cap = cv2.VideoCapture(args.input)
            else:
                raise ValueError("Unsupported extension: {}.".format(ext))
        else:
            raise ValueError(
                "Input must be a file, not a directory: {}".format(args.input))
    else:
        raise ValueError('Input file does not exists: {}'.format(args.input))

    # dir to save panoptic outputs
    panoptic_out_dir = os.path.join(args.output_dir, 'panoptic')
    PathManager.mkdirs(panoptic_out_dir)

    # build image demo transform
    transforms = T.Compose(
        [T.ToTensor(),
         T.Normalize(config.DATASET.MEAN, config.DATASET.STD)])

    # Get video information
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(args.output_dir + '/output.avi', fourcc, fps,
                          (width, height))

    try:
        with torch.no_grad():
            pbar = tqdm(total=length)
            ii = 0
            while (cap.isOpened()):
                ret, raw_image = cap.read()
                if ret:

                    # pad image
                    raw_shape = raw_image.shape[:2]
                    raw_h = raw_shape[0]
                    raw_w = raw_shape[1]
                    new_h = (raw_h + 31) // 32 * 32 + 1
                    new_w = (raw_w + 31) // 32 * 32 + 1
                    input_image = np.zeros((new_h, new_w, 3), dtype=np.uint8)
                    input_image[:, :] = config.DATASET.MEAN
                    input_image[:raw_h, :raw_w, :] = raw_image

                    image, _ = transforms(input_image, None)
                    image = image.unsqueeze(0).to(device)

                    # network
                    out_dict = model(image)
                    torch.cuda.synchronize(device)

                    # post-processing
                    semantic_pred = get_semantic_segmentation(
                        out_dict['semantic'])

                    panoptic_pred, center_pred = get_panoptic_segmentation(
                        semantic_pred,
                        out_dict['center'],
                        out_dict['offset'],
                        thing_list=meta_dataset.thing_list,
                        label_divisor=meta_dataset.label_divisor,
                        stuff_area=config.POST_PROCESSING.STUFF_AREA,
                        void_label=(meta_dataset.label_divisor *
                                    meta_dataset.ignore_label),
                        threshold=config.POST_PROCESSING.CENTER_THRESHOLD,
                        nms_kernel=config.POST_PROCESSING.NMS_KERNEL,
                        top_k=config.POST_PROCESSING.TOP_K_INSTANCE,
                        foreground_mask=None)
                    torch.cuda.synchronize(device)

                    # Send predictions to cpu
                    center_pred = center_pred.squeeze(0).cpu().numpy()
                    semantic_pred = semantic_pred.squeeze(0).cpu().numpy()
                    panoptic_pred = panoptic_pred.squeeze(0).cpu().numpy()

                    # Crop predictions
                    semantic_pred = semantic_pred[:raw_h, :raw_w]
                    panoptic_pred = panoptic_pred[:raw_h, :raw_w]

                    # Save predictions
                    pil_image = save_panoptic_annotation(
                        panoptic_pred,
                        panoptic_out_dir,
                        'panoptic_pred_%d' % ii,
                        label_divisor=meta_dataset.label_divisor,
                        center_pred=center_pred,
                        colormap=meta_dataset.create_label_colormap(),
                        labelmap=meta_dataset.create_label_stringmap()
                        if args.text_labels else None,
                        image=raw_image)

                    ii += 1

                    # Write image to video file
                    np_image = np.asarray(pil_image)
                    np_image = np_image[:, :, ::
                                        -1]  # flip channels, OpenCV uses BGR
                    out.write(np_image)

                    # Update progress bar
                    pbar.update(1)
                else:
                    break

        pbar.close()
        # Release everything if job is finished
        cap.release()
        out.release()
        cv2.destroyAllWindows()

    except Exception:
        logger.exception("Exception during segment_video.py:")
        raise
    finally:
        logger.info("Segmenting video finished.")
        logger.info(
            "Panoptic predictions saved to {}".format(panoptic_out_dir))