Example #1
0
def process_panoptic_prediction(panoptic_pred, num_stuff, idx, img_size, original_size):
    # Extract panoptic prediction
    msk_pred, cat_pred, obj_pred, iscrowd_pred = panoptic_pred

    bbx_pred = extract_boxes(msk_pred, cat_pred.numel())

    # Convert bbx and redo clamping
    bbx_pred[:, [0, 2]] = (bbx_pred[:, [0, 2]] / img_size[0] * original_size[0]).clamp(min=0, max=original_size[0])
    bbx_pred[:, [1, 3]] = (bbx_pred[:, [1, 3]] / img_size[1] * original_size[1]).clamp(min=0, max=original_size[1])
    bbx_pred_size = bbx_pred[:, 2:] - bbx_pred[:, :2]

    outs = []
    for i, (obj_i, cat_i, bbx_i, iscrowd_i, bbx_size_i) in enumerate(zip(
            obj_pred, cat_pred, bbx_pred, iscrowd_pred, bbx_pred_size)):
        if iscrowd_i.item() == 1 or cat_i.item() < num_stuff or cat_i.item() == 255:
            continue
        out = dict(image_id=idx, category_id=int(cat_i.item()), score=float(obj_i.item()))

        out["bbox"] = [
            float(bbx_i[1].item()),
            float(bbx_i[0].item()),
            float(bbx_size_i[1].item()),
            float(bbx_size_i[0].item()),
        ]

        segmentation = msk_pred == i
        segmentation = Image.fromarray(segmentation.numpy()).resize(original_size[::-1], Image.NEAREST)
        out["segmentation"] = mask_encode(np.asfortranarray(np.array(segmentation)))
        out["segmentation"]["counts"] = str(out["segmentation"]["counts"], "utf-8")

        outs.append(out)

    return outs
Example #2
0
def im_post(boxes_all, masks_all, scores, pred_boxes, pred_masks, cls_inds,
            num_classes, im_info):

    cls_segms = [[] for _ in range(num_classes)]
    mask_ind = 0

    M = config.network.mask_size

    scale = (M + 2.0) / M

    ref_boxes = expand_boxes(pred_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    for idx in range(1, num_classes):
        segms = []
        cls_boxes = np.hstack([
            pred_boxes[idx == cls_inds, :],
            scores.reshape(-1, 1)[idx == cls_inds]
        ])
        cls_pred_masks = pred_masks[idx == cls_inds]
        cls_ref_boxes = ref_boxes[idx == cls_inds]
        for _ in range(cls_boxes.shape[0]):

            if pred_masks.shape[1] > 1:
                padded_mask[1:-1, 1:-1] = cls_pred_masks[_, idx, :, :]
            else:
                padded_mask[1:-1, 1:-1] = cls_pred_masks[_, 0, :, :]
            ref_box = cls_ref_boxes[_, :]

            w = ref_box[2] - ref_box[0] + 1
            h = ref_box[3] - ref_box[1] + 1
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask = cv2.resize(padded_mask, (w, h))
            mask = np.array(mask > 0.5, dtype=np.uint8)
            im_mask = np.zeros((im_info[0], im_info[1]), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_info[1])
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_info[0])

            im_mask[y_0:y_1,
                    x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
                                    (x_0 - ref_box[0]):(x_1 - ref_box[0])]

            # Get RLE encoding used by the COCO evaluation API
            rle = mask_encode(np.array(im_mask[:, :, np.newaxis],
                                       order='F'))[0]
            rle['counts'] = rle['counts'].decode()
            segms.append(rle)

            mask_ind += 1

        cls_segms[idx] = segms
        boxes_all[idx].append(cls_boxes)
        masks_all[idx].append(segms)
Example #3
0
def process_prediction(bbx_pred, cls_pred, obj_pred, msk_pred, img_size, idx, original_size):
    # Move everything to CPU
    bbx_pred, cls_pred, obj_pred = (t.cpu() for t in (bbx_pred, cls_pred, obj_pred))
    msk_pred = msk_pred.cpu() if msk_pred is not None else None

    if msk_pred is not None:
        if isinstance(msk_pred, torch.Tensor):
            # ROI-stile prediction
            bbx_inv = invert_roi_bbx(bbx_pred, list(msk_pred.shape[-2:]), list(img_size))
            bbx_idx = torch.arange(0, msk_pred.size(0), dtype=torch.long)
            msk_pred = roi_sampling(msk_pred.unsqueeze(1).sigmoid(), bbx_inv, bbx_idx, list(img_size), padding="zero")
            msk_pred = msk_pred.squeeze(1) > 0.5
        elif isinstance(msk_pred, PackedSequence):
            # Seeds-style prediction
            msk_pred.data = msk_pred.data > 0.5
            msk_pred_exp = msk_pred.data.new_zeros(len(msk_pred), img_size[0], img_size[1])

            for it, (msk_pred_i, bbx_pred_i) in enumerate(zip(msk_pred, bbx_pred)):
                i, j = int(bbx_pred_i[0].item()), int(bbx_pred_i[1].item())
                msk_pred_exp[it, i:i + msk_pred_i.size(0), j:j + msk_pred_i.size(1)] = msk_pred_i

            msk_pred = msk_pred_exp

    # Convert bbx and redo clamping
    bbx_pred[:, [0, 2]] = (bbx_pred[:, [0, 2]] / img_size[0] * original_size[0]).clamp(min=0, max=original_size[0])
    bbx_pred[:, [1, 3]] = (bbx_pred[:, [1, 3]] / img_size[1] * original_size[1]).clamp(min=0, max=original_size[1])
    bbx_pred_size = bbx_pred[:, 2:] - bbx_pred[:, :2]

    outs = []
    for i, (bbx_pred_i, bbx_pred_size_i, cls_pred_i, obj_pred_i) in \
            enumerate(zip(bbx_pred, bbx_pred_size, cls_pred, obj_pred)):
        out = dict(image_id=idx, category_id=int(cls_pred_i.item()), score=float(obj_pred_i.item()))

        out["bbox"] = [
            float(bbx_pred_i[1].item()),
            float(bbx_pred_i[0].item()),
            float(bbx_pred_size_i[1].item()),
            float(bbx_pred_size_i[0].item()),
        ]

        # Expand and convert mask if present
        if msk_pred is not None:
            segmentation = Image.fromarray(msk_pred[i].numpy()).resize(original_size[::-1], Image.NEAREST)

            out["segmentation"] = mask_encode(np.asfortranarray(np.array(segmentation)))
            out["segmentation"]["counts"] = str(out["segmentation"]["counts"], "utf-8")

        outs.append(out)

    return outs