Beispiel #1
0
def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w,
                 num_classes=81,
                 M=14, #  cfg.MRCNN.RESOLUTION
                 cls_specific_mask=True,
                 thresh_binarize=0.5):
    cls_segms = [[] for _ in range(num_classes)]
    mask_ind = 0
    # To work around an issue with cv2.resize (it seems to automatically pad
    # with repeated border values), we manually zero-pad the masks by 1 pixel
    # prior to resizing back to the original image resolution. This prevents
    # "top hat" artifacts. We therefore need to expand the reference boxes by an
    # appropriate factor.
    scale = (M + 2.0) / M
    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    # skip j = 0, because it's the background class
    for j in range(1, num_classes):
        segms = []
        for _ in range(cls_boxes[j].shape[0]):
            if cls_specific_mask:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]
            else:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]

            ref_box = ref_boxes[mask_ind, :]
            w = ref_box[2] - ref_box[0] + 1
            h = ref_box[3] - ref_box[1] + 1
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask = cv2.resize(padded_mask, (w, h))
            mask = np.array(mask > thresh_binarize, dtype=np.uint8)
            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_w)
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_h)

            im_mask[y_0:y_1, x_0:x_1] = mask[
                (y_0 - ref_box[1]):(y_1 - ref_box[1]),
                (x_0 - ref_box[0]):(x_1 - ref_box[0])
            ]

            # Get RLE encoding used by the COCO evaluation API
            rle = mask_util.encode(
                np.array(im_mask[:, :, np.newaxis], order='F')
            )[0]
            rle['counts'] = rle['counts'].decode() # convert back to str so that it can be later saved to json
            segms.append(rle)

            mask_ind += 1

        cls_segms[j] = segms

    assert mask_ind == masks.shape[0]
    return cls_segms
Beispiel #2
0
def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w):
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_segms = [[] for _ in range(num_classes)]
    mask_ind = 0
    # To work around an issue with cv2.resize (it seems to automatically pad
    # with repeated border values), we manually zero-pad the masks by 1 pixel
    # prior to resizing back to the original image resolution. This prevents
    # "top hat" artifacts. We therefore need to expand the reference boxes by an
    # appropriate factor.
    M = cfg.MRCNN.RESOLUTION
    scale = (M + 2.0) / M
    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    # skip j = 0, because it's the background class
    for j in range(1, num_classes):
        segms = []
        for _ in range(cls_boxes[j].shape[0]):
            if cfg.MRCNN.CLS_SPECIFIC_MASK:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]
            else:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]

            ref_box = ref_boxes[mask_ind, :]
            w = (ref_box[2] - ref_box[0] + 1)
            h = (ref_box[3] - ref_box[1] + 1)
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask = cv2.resize(padded_mask, (w, h))
            mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8)
            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_w)
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_h)

            im_mask[y_0:y_1, x_0:x_1] = mask[
                (y_0 - ref_box[1]):(y_1 - ref_box[1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])]

            # Get RLE encoding used by the COCO evaluation API
            rle = mask_util.encode(np.array(im_mask[:, :, np.newaxis], order='F'))[0]
            # For dumping to json, need to decode the byte string.
            # https://github.com/cocodataset/cocoapi/issues/70
            rle['counts'] = rle['counts'].decode('ascii')
            segms.append(rle)

            mask_ind += 1

        cls_segms[j] = segms

    assert mask_ind == masks.shape[0]
    return cls_segms
Beispiel #3
0
def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w):
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_segms = [[] for _ in range(num_classes)]
    mask_ind = 0
    # To work around an issue with cv2.resize (it seems to automatically pad
    # with repeated border values), we manually zero-pad the masks by 1 pixel
    # prior to resizing back to the original image resolution. This prevents
    # "top hat" artifacts. We therefore need to expand the reference boxes by an
    # appropriate factor.
    M = cfg.MRCNN.RESOLUTION
    scale = (M + 2.0) / M
    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    # skip j = 0, because it's the background class
    for j in range(1, num_classes):
        segms = []
        for _ in range(cls_boxes[j].shape[0]):
            if cfg.MRCNN.CLS_SPECIFIC_MASK:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]
            else:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]

            ref_box = ref_boxes[mask_ind, :]
            w = ref_box[2] - ref_box[0] + 1
            h = ref_box[3] - ref_box[1] + 1
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask = cv2.resize(padded_mask, (w, h))
            mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8)
            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_w)
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_h)

            im_mask[y_0:y_1,
                    x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
                                    (x_0 - ref_box[0]):(x_1 - ref_box[0])]

            # Get RLE encoding used by the COCO evaluation API
            rle = mask_util.encode(
                np.array(im_mask[:, :, np.newaxis], order='F'))[0]
            segms.append(rle)

            mask_ind += 1

        cls_segms[j] = segms

    assert mask_ind == masks.shape[0]
    return cls_segms
Beispiel #4
0
    def forward(self, inputs, outputs):
        data = inputs[0].data
        rois = inputs[1].data
        up_scale = self.up_scale
        height, width = data.shape[2], data.shape[3]

        bboxes = rois[:, 1:5]
        batch_ids = rois[:, [0]]
        # up-scale the bboxes and clip to image boundary
        # pad bboxes and narrow to the feature map scale
        pad_bboxes = box_utils.expand_boxes(bboxes, up_scale)
        pad_bboxes = box_utils.clip_boxes_to_image(pad_bboxes, height, width)

        # add the batch_ids to the rois
        pad_rois = np.hstack((batch_ids, pad_bboxes))

        outputs[0].reshape(pad_rois.shape)
        outputs[0].data[...] = pad_rois
    def forward(self, inputs, outputs):
        data = inputs[0].data
        keypoint_probs = inputs[1].data
        keypoint_rois = inputs[2].data

        # output indicator resolution
        M = self.resolution
        up_scale = self.up_scale
        num_rois = keypoint_rois.shape[0]
        num_keypoints = keypoint_probs.shape[1]

        # first expand the keypoint rois
        height, width = data.shape[2], data.shape[3]
        pad_rois = box_utils.expand_boxes(keypoint_rois[:, 1:5], up_scale)
        pad_rois = box_utils.clip_boxes_to_image(pad_rois, height, width)

        # get keypoint predictions and their probs
        # output shape is (#rois, 3, #keypoints) and 3 means (x, y, prob)
        pred_rois = keypoint_utils.probs_to_keypoints(keypoint_probs, keypoint_rois)
        
        # map keypoint position to the pad_rois
        # output shape is (#rois, #keypoints), locations flatter out
        locations_on_pad_rois, _ = keypoint_utils.keypoints_to_heatmap_labels(
            pred_rois, pad_rois, M
        )
        locations_on_pad_rois = locations_on_pad_rois.astype(np.int32)

        # and now generate keypoint indicators
        keypoint_indicators = blob_utils.zeros((num_rois, num_keypoints, M**2))
        for i in range(num_rois):
            locations = locations_on_pad_rois[i] # shape (#keypoints, )
            for k in range(num_keypoints):
                keypoint_indicators[i, k, locations[k]] = pred_rois[i, 2, k]

        # and reshape to 4 dimension
        keypoint_indicators = keypoint_indicators.reshape(
            (num_rois, num_keypoints, M, M)
        )

        outputs[0].reshape(keypoint_indicators.shape)
        outputs[0].data[...] = keypoint_indicators
Beispiel #6
0
def segm_results(args, cls_boxes, masks, ref_boxes, im_h, im_w, dataset):
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_segms = [[] for _ in range(num_classes)]
    mask_ind = 0
    # To work around an issue with cv2.resize (it seems to automatically pad
    # with repeated border values), we manually zero-pad the masks by 1 pixel
    # prior to resizing back to the original image resolution. This prevents
    # "top hat" artifacts. We therefore need to expand the reference boxes by an
    # appropriate factor.
    M = cfg.MRCNN.RESOLUTION
    scale = (M + 2.0) / M
    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    prediction_row = []
    # skip j = 0, because it's the background class
    for j in range(1, num_classes):
        segms = []
        class_instance_count = 0
        # we also require the classes in WAD
        for b in range(cls_boxes[j].shape[0]):
            if cfg.MRCNN.CLS_SPECIFIC_MASK:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]
            else:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]

            ref_box = ref_boxes[mask_ind, :]
            w = (ref_box[2] - ref_box[0] + 1)
            h = (ref_box[3] - ref_box[1] + 1)
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask = cv2.resize(padded_mask, (w, h))
            mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8)
            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_w)
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_h)

            im_mask[y_0:y_1,
                    x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
                                    (x_0 - ref_box[0]):(x_1 - ref_box[0])]

            # Get RLE encoding used by the COCO evaluation API
            rle = mask_util.encode(
                np.array(im_mask[:, :, np.newaxis], order='F'))[0]
            # For dumping to json, need to decode the byte string.
            # https://github.com/cocodataset/cocoapi/issues/70
            rle['counts'] = rle['counts'].decode('ascii')
            segms.append(rle)

            mask_ind += 1

            # the following are the historical code (not retrained one)
            if cls_boxes[j][b, -1] > args.cls_boxes_confident_threshold:
                LabelId = dataset.contiguous_category_id_to_json_id[j]
                Confidence = cls_boxes[j][b, -1]
                # save image to file
                img_name = args.output_img_dir + '/' + args.current_im_name + "_" + str(
                    LabelId) + '_' + str(class_instance_count) + '.png'
                im = Image.fromarray(im_mask * 255)
                im.save(img_name)
                prediction_row.append(" ".join(
                    (img_name, str(LabelId), str(Confidence))))
                class_instance_count += 1

        cls_segms[j] = segms

    assert mask_ind == masks.shape[0]
    return cls_segms, prediction_row
Beispiel #7
0
def add_refine_local_mask_blobs(blobs, sampled_boxes, roidb, im_scale,
                                batch_idx, data):
    """Add RefineNet Mask specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.REFINENET.RESOLUTION
    up_scale = cfg.REFINENET.UP_SCALE
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    gt_classes = roidb['gt_classes'][polys_gt_inds]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    # Define size variables
    inp_h, inp_w = data.shape[2], data.shape[3]
    pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # Expand the foreground rois by a factor of up_scale and
        # clip by the padded image boundary
        pad_rois_fg = box_utils.expand_boxes(rois_fg, up_scale)
        pad_rois_fg = box_utils.clip_boxes_to_image(pad_rois_fg, pad_img_h,
                                                    pad_img_w)

        if cfg.REFINENET.ONLY_USE_CROWDED_SAMPLES:
            # Only use crowded samples to train the RefineNet
            THRES = cfg.REFINENET.OVERLAP_THRESHOLD
            for i in range(rois_fg.shape[0]):
                overlap = overlaps_bbfg_bbpolys[i]
                if np.sum(overlap > THRES) > 1:
                    # if has multiple instances overlapped, use it for training
                    fg_polys_ind = fg_polys_inds[i]
                    poly_gt = polys_gt[fg_polys_ind]
                    pad_roi_fg = pad_rois_fg[i]
                    # Rasterize the portion of the polygon mask within the given fg roi
                    # to an M x M binary image
                    mask = segm_utils.polys_to_mask_wrt_box(
                        poly_gt, pad_roi_fg, M)
                    mask = np.array(mask > 0,
                                    dtype=np.int32)  # Ensure it's binary
                    masks[i, :] = np.reshape(mask, M**2)

                else:  # Only one instance, then set label to be -1 (ignored)
                    masks[i, :] = -1
                    mask_class_labels[i] = 0
        elif cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES:
            loss_weights = blob_utils.ones((rois_fg.shape[0], ))
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                pad_roi_fg = pad_rois_fg[i]
                class_label = mask_class_labels[i]

                # Rasterize the portion of the polygon mask within the given
                # fg roi to an M x M binary image
                mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M)
                mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
                masks[i, :] = np.reshape(mask, M**2)

                # And now determine the weight for each roi. If any instance
                # that is of the same class as the RoI, then we expect it to
                # be a hard sample and assigns a larger weight for this RoI
                for j in range(len(polys_gt)):
                    if j == fg_polys_ind:
                        continue
                    if gt_classes[
                            j] == class_label:  # only same class is valid
                        mask = segm_utils.polys_to_mask_wrt_box(
                            polys_gt[j], pad_roi_fg, M)
                        # and check if has anypart fall inside the bbox
                        is_inside_bbox = (np.sum(mask) > 0)
                        if is_inside_bbox:
                            loss_weights[i] = cfg.REFINENET.WEIGHT_LOSS_CROWDED
                            break  # early stop

        else:
            # add fg targets
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                pad_roi_fg = pad_rois_fg[i]
                # Rasterize the portion of the polygon mask within the given fg roi
                # to an M x M binary image
                mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M)
                mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
                masks[i, :] = np.reshape(mask, M**2)

    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # pad_rois_fg is actually one background roi, but that's ok because ...
        pad_rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    pad_rois_fg = (pad_rois_fg.astype(np.float32)) * im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((pad_rois_fg.shape[0], 1))
    pad_rois_fg = np.hstack((repeated_batch_idx, pad_rois_fg)).astype(np.int32)

    # Update blobs dict with Refine-Net blobs
    blobs['refined_mask_rois'] = pad_rois_fg
    blobs['roi_has_refined_mask_int32'] = roi_has_mask
    blobs['refined_masks_int32'] = masks

    if cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES:
        blobs['loss_weights'] = loss_weights
Beispiel #8
0
def add_refine_keypoints_blobs_softmax(blobs, roidb, fg_rois_per_image,
                                       fg_inds, im_scale, batch_idx, data):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    gt_keypoints = roidb['gt_keypoints']
    # Load the kp_fg_inds generated by keypoint_rcnn.py. So we avoid the issue
    # of mismatched keypoint_rois and refined_keypoint_rois, which cause a big
    # issue for training.
    kp_fg_inds = blobs['keypoint_fg_inds']
    if kp_fg_inds.shape[0] > 0:
        sampled_fg_rois = roidb['boxes'][kp_fg_inds]
        box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

        # Let's expand the rois
        up_scale = cfg.REFINENET.UP_SCALE
        inp_h, inp_w = data.shape[2], data.shape[3]
        pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale

        pad_fg_rois = box_utils.expand_boxes(sampled_fg_rois, up_scale)
        pad_fg_rois = box_utils.clip_boxes_to_image(pad_fg_rois, pad_img_h,
                                                    pad_img_w)

        num_keypoints = gt_keypoints.shape[2]
        sampled_keypoints = -np.ones(
            (len(pad_fg_rois), gt_keypoints.shape[1], num_keypoints),
            dtype=gt_keypoints.dtype)
        for ii in range(len(pad_fg_rois)):
            ind = box_to_gt_ind_map[ii]
            if ind >= 0:
                sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
                assert np.sum(sampled_keypoints[ii, 2, :]) > 0

        heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
            sampled_keypoints, pad_fg_rois, M=cfg.REFINENET.KRCNN.HEATMAP_SIZE)

        shape = (pad_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
        heats = heats.reshape(shape)
        weights = weights.reshape(shape)

    else:  # If there are no fg keypoint rois (it does happen)
        # The network cannot handle empty blobs, so we must provide a heatmap
        # We simply take the first bg roi, given it an all zero heatmap, and
        # set its weights to zero (ignore label).
        roi_inds = np.where(roidb['gt_classes'] == 0)[0]
        # sampled_fg_rois is actually one random roi, but that's ok because ...
        pad_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1))
        # We give it an 0's blob
        heats = blob_utils.zeros((1 * cfg.KRCNN.NUM_KEYPOINTS, 1))
        # We set weights to 0 (ignore label)
        weights = blob_utils.zeros((1 * cfg.KRCNN.NUM_KEYPOINTS, 1))

    pad_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((pad_fg_rois.shape[0], 1))
    pad_fg_rois = np.hstack((repeated_batch_idx, pad_fg_rois))

    blobs['refined_keypoint_rois'] = pad_fg_rois
    blobs['refined_keypoint_locations_int32'] = heats.astype(np.int32,
                                                             copy=False)
    blobs['refined_keypoint_weights'] = weights
Beispiel #9
0
def segm_results_inmodal(cls_boxes, masks_amodal, masks_inmodal, ref_boxes,
                         im_h, im_w):
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_segms_amodal_1 = [[] for _ in range(num_classes)]
    cls_segms_inmodal_1 = [[] for _ in range(num_classes)]
    mask_ind = 0
    # To work around an issue with cv2.resize (it seems to automatically pad
    # with repeated border values), we manually zero-pad the masks by 1 pixel
    # prior to resizing back to the original image resolution. This prevents
    # "top hat" artifacts. We therefore need to expand the reference boxes by an
    # appropriate factor.
    M = cfg.MRCNN.RESOLUTION
    scale = (M + 2.0) / M
    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_amodal = np.zeros((M + 2, M + 2), dtype=np.float32)
    padded_inmodal = np.zeros((M + 2, M + 2), dtype=np.float32)
    # skip j = 0, because it's the background class
    for j in range(1, num_classes):
        segms_amodal = []
        segms_inmodal = []
        for _ in range(cls_boxes[j].shape[0]):
            if cfg.MRCNN.CLS_SPECIFIC_MASK:
                padded_amodal[1:-1, 1:-1] = masks_amodal[mask_ind, j, :, :]
                padded_inmodal[1:-1, 1:-1] = masks_inmodal[mask_ind, j, :, :]
            else:
                padded_amodal[1:-1, 1:-1] = masks[mask_ind, 0, :, :]
                padded_inmodal[1:-1, 1:-1] = masks[mask_ind, 0, :, :]

            ref_box = ref_boxes[mask_ind, :]
            w = (ref_box[2] - ref_box[0] + 1)
            h = (ref_box[3] - ref_box[1] + 1)
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask_amodal = cv2.resize(padded_amodal, (w, h))
            mask_amodal = np.array(mask_amodal > cfg.MRCNN.THRESH_BINARIZE,
                                   dtype=np.uint8)
            im_mask_amodal = np.zeros((im_h, im_w), dtype=np.uint8)

            mask_inmodal = cv2.resize(padded_inmodal, (w, h))
            mask_inmodal = np.array(mask_inmodal > cfg.MRCNN.THRESH_BINARIZE,
                                    dtype=np.uint8)
            im_mask_inmodal = np.zeros((im_h, im_w), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_w)
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_h)

            im_mask_amodal[y_0:y_1,
                           x_0:x_1] = mask_amodal[(y_0 -
                                                   ref_box[1]):(y_1 -
                                                                ref_box[1]),
                                                  (x_0 -
                                                   ref_box[0]):(x_1 -
                                                                ref_box[0])]
            im_mask_inmodal[y_0:y_1,
                            x_0:x_1] = mask_inmodal[(y_0 -
                                                     ref_box[1]):(y_1 -
                                                                  ref_box[1]),
                                                    (x_0 -
                                                     ref_box[0]):(x_1 -
                                                                  ref_box[0])]

            # Get RLE encoding used by the COCO evaluation API
            rle_amodal = mask_util.encode(
                np.array(im_mask_amodal[:, :, np.newaxis], order='F'))[0]
            rle_inmodal = mask_util.encode(
                np.array(im_mask_inmodal[:, :, np.newaxis], order='F'))[0]
            # For dumping to json, need to decode the byte string.
            # https://github.com/cocodataset/cocoapi/issues/70
            rle_amodal['counts'] = rle_amodal['counts'].decode('ascii')
            rle_inmodal['counts'] = rle_inmodal['counts'].decode('ascii')
            segms_amodal.append(rle_amodal)
            segms_inmodal.append(rle_inmodal)
            mask_ind += 1

        cls_segms_amodal_1[j] = segms_amodal
        cls_segms_inmodal_1[j] = segms_inmodal

    assert mask_ind == masks_amodal.shape[0]
    assert mask_ind == masks_inmodal.shape[0]
    return cls_segms_amodal_1, cls_segms_inmodal_1